# 1. Deep Learning

## 1.1 Preparing the notebook

Press *play* in the following cell to import the datasets from the GitHub repository.

In [None]:
! git clone https://github.com/vitoreno/StelleDataset.git
! unzip /content/StelleDataset/data.zip

Press *play* in the following cell to import the libraries needed to run the notebook.

In [3]:
%load_ext google.colab.data_table
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
from datetime import datetime
from sklearn.metrics import mean_squared_error
from keras.layers import Dense, Activation
from keras.models import Sequential
from sklearn.preprocessing import StandardScaler

## 1.2 Neural Network

We intend to predict the soil moisture values on the coast starting from the sea temperatures in the nearest points, through a neural network.

The architecture of the network is very simple. It is made of two hidden layers with dimensions of 8 and 16 neurons. The network is trained with batch size 100 for 5 epochs.

Select the regions to be trained on, and those to be evaluated on a given date. Possible dates range from 2016-01-01 to 2016-12-31. 

Finally press *play* to start the cell execution.

In [None]:
#@markdown Training
train_Adriatic = True #@param {type:"boolean"}
train_Ionian = True #@param {type:"boolean"}
train_Tyrrhenian = False #@param {type:"boolean"}
train_Labrador = False #@param {type:"boolean"}
train_Red = False #@param {type:"boolean"}
#@markdown Test
date_str = '2016-07-01' #@param {type:"date"}
test_Adriatic = False #@param {type:"boolean"}
test_Ionian = False #@param {type:"boolean"}
test_Tyrrhenian = True #@param {type:"boolean"}
test_Labrador = False #@param {type:"boolean"}
test_Red = False #@param {type:"boolean"}

train_list = []
test_list = []
if train_Adriatic:
  train_list = train_list + ["Adriatic"]
if train_Ionian:
  train_list = train_list + ["Ionian"]
if train_Tyrrhenian:
  train_list = train_list + ["Tyrrhenian"]
if train_Labrador:
  train_list = train_list + ["Labrador"]
if test_Red:
  test_list = test_list + ["Red"]
if test_Adriatic:
  test_list = test_list + ["Adriatic"]
if test_Ionian:
  test_list = test_list + ["Ionian"]
if test_Tyrrhenian:
  test_list = test_list + ["Tyrrhenian"]
if test_Labrador:
  test_list = test_list + ["Labrador"]
if test_Red:
  test_list = test_list + ["Red"]

current_date = datetime.strptime(date_str + " 12:00:00", '%Y-%m-%d %H:%M:%S')

if (current_date < datetime.strptime("2016-01-01 12:00:00", '%Y-%m-%d %H:%M:%S')) | (current_date > datetime.strptime("2016-12-31 12:00:00", '%Y-%m-%d %H:%M:%S')):
  sys.exit("Data non valida. Inserire data compresa fra 2016-01-01 e 2016-12-31")

data = pd.read_csv("/content/soil_moisture_2016.csv")
data.time = pd.to_datetime(data.time)
train_data = data.loc[data['sea'].isin(train_list)]
test_data = data.loc[((data['sea'].isin(test_list)) & (data['time'] == current_date))]

train_sst = train_data.sst.to_numpy().reshape(-1, 1)
train_sm = train_data.sm.to_numpy()
test_sst = test_data.sst.to_numpy().reshape(-1, 1)
test_sm = test_data.sm.to_numpy()

# Feature scaling
sc = StandardScaler()
train_sst = sc.fit_transform(train_sst)
test_sst = sc.transform(test_sst)

# Model
model = Sequential()
model.add(Dense(8, activation = 'relu', input_dim = 1))
model.add(Dense(units = 16, activation = 'relu'))
model.add(Dense(units = 1))

model.compile(optimizer = 'adam', loss = 'mean_squared_error')

# Training
model.fit(train_sst, train_sm, batch_size = 100, epochs = 5)

prediction = model.predict(test_sst).reshape(-1)

for i in range(test_sm.shape[0]):
  plt.plot([test_sst[i],test_sst[i]], [test_sm[i],prediction[i]], '--b')
plt.scatter(test_sst, test_sm, color='black', label='Observation')
plt.scatter(test_sst, prediction, color='blue', label='Prediction')
plt.xlabel('Temperature')
plt.ylabel('Albedo')
plt.legend()
plt.show()

results = pd.DataFrame({"Observed albedo": test_sm, "Predicted albedo": prediction, "Error": np.abs(test_sm - prediction)})
print("Mean Squared Error: ", mean_squared_error(test_sm, prediction))
results