In [89]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

In [90]:
data = pd.read_csv('simulated_nz_ocean_current_with_regions.csv')
data.head()

Unnamed: 0,SurveyId,Region,SurveyDate,SurveyAreaLatitudeStart,SurveyAreaLongitudeStart,Rubbish_Count,count_same,date average,Current_Speed,Current_Direction,Water_Temperature
0,1,Upper West Coast,05/22/2023,-37.55229,174.371686,114.507119,112,3.370723,0.549671,150.267961,17.013002
1,2,East Coast,01/24/2018,-40.802044,170.064949,133.914511,112,4.979226,0.486174,79.958812,17.453534
2,3,Upper East Coast,09/20/2020,-42.327318,170.038232,239.062053,112,2.805245,0.564769,43.151532,17.735343
3,4,Upper East Coast,07/03/2023,-45.374311,167.444834,169.7572,112,2.379577,0.652303,121.541462,20.720169
4,5,West Coast,02/23/2018,-39.411788,175.926444,38.818991,112,1.50605,0.476585,339.447493,14.625667


In [91]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   SurveyId                  100 non-null    int64  
 1   Region                    100 non-null    object 
 2   SurveyDate                100 non-null    object 
 3   SurveyAreaLatitudeStart   100 non-null    float64
 4   SurveyAreaLongitudeStart  100 non-null    float64
 5   Rubbish_Count             100 non-null    float64
 6   count_same                100 non-null    int64  
 7   date average              100 non-null    float64
 8   Current_Speed             100 non-null    float64
 9   Current_Direction         100 non-null    float64
 10  Water_Temperature         100 non-null    float64
dtypes: float64(7), int64(2), object(2)
memory usage: 8.7+ KB


In [92]:
#encode the behaviour column
le = LabelEncoder()
data['Region'] = le.fit_transform(data['Region'])

In [93]:
# Drop the 'SurveyId' and 'SurveyDate' columns
data = data.drop(columns=['SurveyId', 'SurveyDate', 'date average', 'count_same', 'SurveyAreaLatitudeStart', 'SurveyAreaLongitudeStart'])
X = data.drop('Rubbish_Count', axis=1)
y = data['Rubbish_Count']


In [94]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Region             100 non-null    int32  
 1   Current_Speed      100 non-null    float64
 2   Current_Direction  100 non-null    float64
 3   Water_Temperature  100 non-null    float64
dtypes: float64(3), int32(1)
memory usage: 2.9 KB


In [95]:
y.info()

<class 'pandas.core.series.Series'>
RangeIndex: 100 entries, 0 to 99
Series name: Rubbish_Count
Non-Null Count  Dtype  
--------------  -----  
100 non-null    float64
dtypes: float64(1)
memory usage: 928.0 bytes


In [96]:
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.15, random_state=42)
# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(xtrain)
X_test_scaled = scaler.transform(xtest)

In [98]:
model = Sequential()
model.add(Dense(256, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(1, activation='linear'))

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

model.fit(X_train_scaled, ytrain, epochs=2000, batch_size=32, validation_split=0.2)

model.evaluate(X_test_scaled, ytest)



Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

[361.7874450683594, 361.7874450683594]

In [99]:
model.save('model.h5')