<a href="https://colab.research.google.com/github/vernonconnelldavies/20025299_DataAnalytics/blob/main/DNN_python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [43]:
# needed to create the data frame
import pandas as pd

# needed to help with speedy maths based calculations
import numpy as np

# create data frame from csv file we hosted on our github
url = "https://raw.githubusercontent.com/vernonconnelldavies/20025299_DataAnalytics/refs/heads/main/data_input_into_DNN.csv"
df = pd.read_csv(url, index_col=0)

In [44]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

2.17.1


In [45]:
# make sure we have our data by printing it out
df[:5]
# df #all

Unnamed: 0,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,...,Mon,Sat,Sun,Thu,Tue,Wed,year,temp,dewp,NUM_COLLISIONS
1,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,2013,37.8,23.6,381
2,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,2013,27.1,10.5,480
3,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,2013,28.4,14.1,549
4,0,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,2013,33.4,18.6,505
5,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,2013,36.1,18.7,389


In [46]:
dnn_input_data = [df["year"], df["temp"], df["dewp"], df["Sat"], df["Sun"], df["Mon"], df["Tue"], df["Wed"], df["Thu"], df["Fri"], df["Jan"], df["Feb"], df["Mar"], df["Apr"], df["May"], df["Jun"], df["Jul"], df["Aug"], df["Sep"], df["Oct"], df["Nov"], df["Dec"], df["NUM_COLLISIONS"]]
headers = ["year","temp", "dewp", "Sat","Sun","Mon","Tue","Wed","Thu","Fri","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec","NUM_COLLISIONS"]
df_dnn_input = pd.concat(dnn_input_data, axis=1, keys=headers)
df_dnn_input.head()

Unnamed: 0,year,temp,dewp,Sat,Sun,Mon,Tue,Wed,Thu,Fri,...,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,NUM_COLLISIONS
1,2013,37.8,23.6,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,381
2,2013,27.1,10.5,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,480
3,2013,28.4,14.1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,549
4,2013,33.4,18.6,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,505
5,2013,36.1,18.7,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,389


In [47]:
training_dataset = df_dnn_input.sample(frac=0.8, random_state=0)
test_dataset = df_dnn_input.drop(training_dataset.index)

In [48]:
training_features = training_dataset.copy()
test_features = test_dataset.copy()

training_labels = training_features.pop('NUM_COLLISIONS')
test_labels = test_features.pop('NUM_COLLISIONS')

In [49]:
# A scale is not required here, but the constant will be useful in the assignment.
SCALE_NUM_COLLISIONS = 1000

In [50]:
training_labels = training_labels/SCALE_NUM_COLLISIONS
test_labels = test_labels/SCALE_NUM_COLLISIONS

In [51]:
normaliser = tf.keras.layers.Normalization(axis=-1)
normaliser.adapt(np.array(training_features))

In [52]:
# This is the only difference, instead of a single layer, we have our normalisation layer (22 inputs), 2 layers of 48, with 1 output. The 48 can be adjusted to improve the net.
dnn_model_1 = keras.Sequential([
      normaliser,
      layers.Dense(48, activation='relu'),
      layers.Dense(48, activation='relu'),
      layers.Dense(1)
  ])

dnn_model_1.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))

In [53]:
%%time
history = dnn_model_1.fit(
    training_features,
    training_labels,
    validation_split=0.2,
    verbose=0,
    epochs=100)

CPU times: user 17 s, sys: 814 ms, total: 17.8 s
Wall time: 17 s


In [54]:
# remember, we want to minimise this. The model with the lowest is the best.
dnn_model_1_results = dnn_model_1.evaluate(test_features, test_labels, verbose=0)
print(dnn_model_1_results)

0.060171935707330704


In [55]:
# make sure the labels match up with the dataframe from earlier.
input_1 = pd.DataFrame.from_dict(data =
				{
         'year' : [2020,2020,2020],
         'temp' : [59.3, 32.6, 39.6],
         'dewp' : [48.1, 10.9, 22.4],
         'Sat' : [0,0,0],
         'Sun' : [0,0,0],
         'Mon' : [0,0,0],
         'Tue' : [0,1,1],
         'Wed' : [0,0,0],
         'Thu' : [1,0,0],
         'Fri' : [0,0,0],
         'Jan' : [1,0,1],
         'Feb' : [0,1,0],
         'Mar' : [0,0,0],
         'Apr' : [0,0,0],
         'May' : [0,0,0],
         'Jun' : [0,0,0],
         'Jul' : [0,0,0],
         'Aug' : [0,0,0],
         'Sep' : [0,0,0],
         'Oct' : [0,0,0],
         'Nov' : [0,0,0],
         'Dec' : [0,0,0],
        })


In [56]:
linear_day_predictions = dnn_model_1.predict(input_1[:3])*SCALE_NUM_COLLISIONS
linear_day_predictions

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step


array([[582.0173 ],
       [536.7206 ],
       [498.93988]], dtype=float32)