In [1]:
%fs
ls FileStore/tables

path,name,size
dbfs:/FileStore/tables/c99temp_train_pseudo_snappy-76e66.parquet,c99temp_train_pseudo_snappy-76e66.parquet,37151588
dbfs:/FileStore/tables/c99temp_valid_pseudo_snappy-29080.parquet,c99temp_valid_pseudo_snappy-29080.parquet,13570913
dbfs:/FileStore/tables/trainingset.csv,trainingset.csv,0


In [2]:
import numpy as np  
import matplotlib.pyplot as plt  
import pandas as pd

In [3]:
# load training dataset prepared from other notebook

training_complete = pd.read_csv(r'/dbfs/FileStore/tables/trainingset.csv', delimiter=';', nrows=50000)  

In [4]:
from sklearn.utils import shuffle
training_complete = shuffle(training_complete)

In [5]:
training_complete['X']

In [6]:
# Get min max temperature values for normalization

temp_min = 999
temp_max = 0
features = []

for line in training_complete['X']:
  list = line[1:-1].split(', ')
  features.append(list)
  for val in list:
    value = float(val)
    if value > temp_max:
      temp_max = value
    if value < temp_min:
      temp_min = value

In [7]:
temp_min

In [8]:
temp_max

In [9]:
# normalization

temp_diff = (temp_max - temp_min)
    
for values in range(len(features)):
  for value in range(len(features[values])):
    features[values][value] = (float(features[values][value]) - temp_min) / temp_diff
    
features

In [10]:
# read labels

labels = []
for line in training_complete['Y']:
  labels.append(int(line))

labels

In [11]:
# Get min max raminingLifetime values for normalization

days_min = 999
days_max = 0

for value in labels:
  if value > days_max:
    days_max = value
  if value < days_min:
    days_min = value

In [12]:
days_min

In [13]:
days_max

In [14]:
# Normalization

days_diff = (days_max - days_min)

for value in range(len(labels)):
  labels[value] = (labels[value] - days_min) / days_diff
  
labels

In [15]:
# prepare features and labels for training

features = np.array(features)
labels = np.array(labels)

ltsm_features = np.reshape(features, (features.shape[0], features.shape[1], 1)) 

In [16]:
from tensorflow.keras.models import Sequential  
from tensorflow.keras.layers import Dense  
from tensorflow.keras.layers import LSTM  
from tensorflow.keras.layers import Dropout 
from tensorflow.keras.layers import BatchNormalization

In [17]:
# LSTM model

model = Sequential() 

model.add(LSTM(units=50, return_sequences=True, input_shape=(features.shape[1], 1))) 
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=True))  
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=True))  
model.add(Dropout(0.2))

model.add(LSTM(units=50))  
model.add(Dropout(0.2))

model.add(Dense(units = 1)) 

model.compile(optimizer = 'adam', loss = 'mean_squared_error')  

In [18]:
# MLP model

model = Sequential()

model.add(BatchNormalization(input_shape=tuple([features.shape[1]])))
model.add(Dense(20, activation='relu'))
model.add(Dropout(rate=0.5))

model.add(BatchNormalization())
model.add(Dense(20, activation='relu'))
model.add(Dropout(rate=0.5))

model.add(BatchNormalization())
model.add(Dense(20, activation='relu'))
model.add(Dropout(rate=0.5))

model.add(Dense(1, activation='sigmoid'))
model.compile(loss="mean_squared_error", optimizer="adam",metrics=["accuracy"])

In [19]:
# fit lstm

model.fit(x=ltsm_features, y=labels, epochs=20, batch_size=256, verbose=2)

In [20]:
# fit mlp

model.fit(x=features, y=labels, epochs=20, batch_size=1024, verbose=2)

In [21]:
model.save('/dbfs/FileStore/tables/model.h5')