In [53]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

In [54]:
data = pd.read_csv('simulated_nz_ocean_current.csv')
data.head()

Unnamed: 0,SurveyId,Region,SurveyDate,SurveyAreaLatitudeStart,SurveyAreaLongitudeStart,Rubbish_Count,count_same,date average,Current_Speed,Current_Direction,Water_Temperature
0,1,Northland,09/10/2023,-34.334773,173.398059,92,112,0.922314,0.549671,150.267961,15.026004
1,2,Gisborne,12/08/2021,-34.052894,166.825654,101,112,1.49798,0.486174,79.958812,17.907068
2,3,Wellington,08/27/2019,-46.273675,166.478684,80,112,1.549685,0.564769,43.151532,14.470686
3,4,Northland,03/04/2023,-37.418538,167.740078,87,112,1.986591,0.652303,121.541462,20.440338
4,5,Gisborne,03/19/2021,-39.903095,166.177736,112,112,2.133945,0.476585,339.447493,16.251335


In [55]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   SurveyId                  100 non-null    int64  
 1   Region                    100 non-null    object 
 2   SurveyDate                100 non-null    object 
 3   SurveyAreaLatitudeStart   100 non-null    float64
 4   SurveyAreaLongitudeStart  100 non-null    float64
 5   Rubbish_Count             100 non-null    int64  
 6   count_same                100 non-null    int64  
 7   date average              100 non-null    float64
 8   Current_Speed             100 non-null    float64
 9   Current_Direction         100 non-null    float64
 10  Water_Temperature         100 non-null    float64
dtypes: float64(6), int64(3), object(2)
memory usage: 8.7+ KB


In [56]:
#encode the behaviour column
le = LabelEncoder()
data['Region'] = le.fit_transform(data['Region'])

In [57]:
# Drop the 'SurveyId' and 'SurveyDate' columns
data = data.drop(columns=['SurveyId', 'SurveyDate', 'date average', 'count_same', 'SurveyAreaLatitudeStart', 'SurveyAreaLongitudeStart'])
X = data.drop('Rubbish_Count', axis=1)
y = data['Rubbish_Count']


In [58]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Region             100 non-null    int32  
 1   Current_Speed      100 non-null    float64
 2   Current_Direction  100 non-null    float64
 3   Water_Temperature  100 non-null    float64
dtypes: float64(3), int32(1)
memory usage: 2.9 KB


In [59]:
y.info()

<class 'pandas.core.series.Series'>
RangeIndex: 100 entries, 0 to 99
Series name: Rubbish_Count
Non-Null Count  Dtype
--------------  -----
100 non-null    int64
dtypes: int64(1)
memory usage: 928.0 bytes


In [60]:
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.15, random_state=42)
# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(xtrain)
X_test_scaled = scaler.transform(xtest)

In [61]:
model = Sequential()
model.add(Dense(512, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(BatchNormalization())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(16, activation='relu')) 
model.add(BatchNormalization())
model.add(Dense(1, activation='linear'))

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

model.fit(X_train_scaled, ytrain, epochs=5000, batch_size=32, validation_split=0.2)

model.evaluate(X_test_scaled, ytest)



Epoch 1/5000
Epoch 2/5000
Epoch 3/5000
Epoch 4/5000
Epoch 5/5000
Epoch 6/5000
Epoch 7/5000
Epoch 8/5000
Epoch 9/5000
Epoch 10/5000
Epoch 11/5000
Epoch 12/5000
Epoch 13/5000
Epoch 14/5000
Epoch 15/5000
Epoch 16/5000
Epoch 17/5000
Epoch 18/5000
Epoch 19/5000
Epoch 20/5000
Epoch 21/5000
Epoch 22/5000
Epoch 23/5000
Epoch 24/5000
Epoch 25/5000
Epoch 26/5000
Epoch 27/5000
Epoch 28/5000
Epoch 29/5000
Epoch 30/5000
Epoch 31/5000
Epoch 32/5000
Epoch 33/5000
Epoch 34/5000
Epoch 35/5000
Epoch 36/5000
Epoch 37/5000
Epoch 38/5000
Epoch 39/5000
Epoch 40/5000
Epoch 41/5000
Epoch 42/5000
Epoch 43/5000
Epoch 44/5000
Epoch 45/5000
Epoch 46/5000
Epoch 47/5000
Epoch 48/5000
Epoch 49/5000
Epoch 50/5000
Epoch 51/5000
Epoch 52/5000
Epoch 53/5000
Epoch 54/5000
Epoch 55/5000
Epoch 56/5000
Epoch 57/5000
Epoch 58/5000
Epoch 59/5000
Epoch 60/5000
Epoch 61/5000
Epoch 62/5000
Epoch 63/5000
Epoch 64/5000
Epoch 65/5000
Epoch 66/5000
Epoch 67/5000
Epoch 68/5000
Epoch 69/5000
Epoch 70/5000
Epoch 71/5000
Epoch 72/5000
E

KeyboardInterrupt: 