In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load the data
data = pd.read_csv('Final_Merged_Data.csv')

# Handle missing values
data = data.dropna()

# Convert 'timestamp_sentinel2' to datetime
data['timestamp_sentinel2'] = pd.to_datetime(data['timestamp_sentinel2'])

# Define the target variable (y)
target = 'Turbidity_Buoy_(NTU)'

# # Select features
# features = [
#     *[f'point{point}_B{band}' for band in [2, 3, 4, 8, 8, 8, 8, 11, 12] for point in range(1, 23)],
#     *[f'point{point}_cs' for point in range(1, 23)],
#     *[f'point{point}_cs_cdf' for point in range(1, 23)],
#     'B2_AVG', 'B3_AVG', 'B4_AVG', 'B8_AVG', 'B8A_AVG', 'B11_AVG', 'B12_AVG',
#     'cs_AVG', 'cs_cdf_AVG',
#     'Ground_Measurements_time_diff_(seconds)', 'Lake_Height_(m)', 
#     'PercentFull_Active_Lake_Storage_(%)', 'Snow_Volume_Opuha_Catchment_(mm)',
#     'WDir(Deg)', 'WSpd(m/s)', 'GustDir(Deg)', 'GustSpd(m/s)', 'WindRun(Km)', 
#     'Rain(mm)', 'Tdry(C)', 'TWet(C)', 'RH(%)', 'Tmax(C)', 'Tmin(C)', 
#     'Pmsl(hPa)', 'Pstn(hPa)', 'Water_Temp_Buoy_(degC)', 'Water_Temp_Platform_(degC)'
# ]

features = [
    'B2_AVG', 'B3_AVG', 'B4_AVG', 'B8_AVG', 'B8A_AVG', 'B11_AVG', 'B12_AVG',
    'cs_AVG', 'cs_cdf_AVG',
    'Ground_Measurements_time_diff_(seconds)', 'Lake_Height_(m)', 
    'PercentFull_Active_Lake_Storage_(%)', 'Snow_Volume_Opuha_Catchment_(mm)',
    'WDir(Deg)', 'WSpd(m/s)', 'GustDir(Deg)', 'GustSpd(m/s)', 'WindRun(Km)', 
    'Rain(mm)', 'Tdry(C)', 'TWet(C)', 'RH(%)', 'Tmax(C)', 'Tmin(C)', 
    'Pmsl(hPa)', 'Pstn(hPa)', 'Water_Temp_Buoy_(degC)', 'Water_Temp_Platform_(degC)'
]



X = data[features]
y = data[target]

# Normalise the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=features)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the neural network model
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# Make predictions
y_pred_nn = model.predict(X_test)

# Evaluate the model
mse_nn = mean_squared_error(y_test, y_pred_nn)
r2_nn = r2_score(y_test, y_pred_nn)

print(f"Neural Network Mean Squared Error: {mse_nn}")
print(f"Neural Network R-squared: {r2_nn}")


KeyError: 'timestamp_sentinel2'