# Main Notebook

In [86]:
import os
import gc
import pytz
import operator
import numpy as np
import pickle as pkl
from time import sleep
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
import tensorflow as tf
from tensorflow import math
import keras
from keras import layers
from keras.models import Sequential
from keras.activations import relu, elu
from keras.layers import Dense, Dropout
from talos.model import early_stopper
from talos.utils.best_model import activate_model
from talos import Evaluate
import talos

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings(action = 'ignore', category = FutureWarning)
warnings.filterwarnings(action = 'ignore', category = DeprecationWarning)

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

import sys
sys.path.append('..')

time = datetime.now(pytz.timezone('Europe/Oslo')).strftime('%m.%d.%Y_%H.%M.%S')
print(f'Notebook initialized execution at {time}.')
#import xgboost as xgb

Notebook initialized execution at 03.11.2020_13.50.54.


## General Methods

In [4]:
def memory_optimization(dfs):
    for df in dfs:
        del df
    gc.collect()

## Prepare Data

In [5]:
import datasets

start_time = datetime.now()

scaler = MinMaxScaler()

train_x, train_y, validation_x, validation_y, test_x, test_y, scaler = datasets.load(f'../input/finn_Trondheim.csv', scaler)

print(train_x)

      Felleskost/mnd.  Soverom  Primærrom  Bruksareal    Etasje   Byggeår  \
593          0.000000      0.5   0.571429    0.517391  0.222222  0.846626   
891          0.083292      0.1   0.081633    0.069565  0.333333  0.711656   
737          0.163917      0.2   0.181122    0.158696  0.611111  0.938650   
679          0.173833      0.4   0.426020    0.384783  0.222222  0.957055   
775          0.155792      0.2   0.183673    0.156522  0.333333  0.993865   
...               ...      ...        ...         ...       ...       ...   
835          0.156125      0.1   0.132653    0.113043  0.333333  0.484663   
1216         0.000000      0.4   0.635204    0.584783  0.222222  0.595092   
1653         0.188708      0.3   0.224490    0.200000  0.277778  0.981595   
559          0.252542      0.3   0.242347    0.206522  0.277778  0.773006   
684          0.117833      0.2   0.154337    0.131522  0.388889  0.993865   

           Rom  parkering  fiber  kabel-tv  tg 0  tg 1  tg 2  vedovn  \
593

# Optimization Parameters

In [6]:
parameters = {'activation_1':['relu', 'elu'],
     'activation_2':['relu', 'elu'],
     'activation_3':['relu', 'elu'],
     'optimizer': ['Adam', "RMSprop"],
     'loss-functions': ['mse'],
     'neurons_HL1': [50, 100, 200, 400],
     'neurons_HL2': [40, 80, 160, 320],
     'neurons_HL3': [40, 80, 160, 320, None],
     'dropout1': [0.1, 0.2, 0.3],
     'dropout2': [0.1, 0.2, 0.3],
     'batch_size': [100, 250, 500],
     'epochs': [400, 900]
}

In [132]:
def talolos(x_train, y_train, x_val, y_val, parameters):
    model = Sequential()

    model.add(Dense(parameters['neurons_HL1'], 
    input_shape=(train_x.shape[1],), 
    activation=parameters['activation_1'],use_bias=True))

    model.add(Dropout(parameters['dropout1']))

    model.add(Dense(parameters['neurons_HL2'], 
    activation=parameters['activation_2'], use_bias=True))

    model.add(Dropout(parameters['dropout1']))
    
    if parameters['neurons_HL3']:
        model.add(Dense(parameters['neurons_HL3'], 
        activation=parameters['activation_3'], use_bias=True))


    model.add(Dense(1, activation='relu'))

    model.compile(optimizer=parameters['optimizer'], loss=parameters['loss-functions'], 
    metrics=['mse', 'mae'])

    history = model.fit(x_train, y_train,
            batch_size=parameters['batch_size'],epochs=parameters['epochs'],
            verbose=0,validation_data=[x_val, y_val],
            callbacks=[early_stopper(epochs=parameters['epochs'], 
            mode='moderate',monitor='val_loss', patience=25)])
    
    return history, model

## Specify model

In [None]:
t = talos.Scan(x=np.array(train_x),
               y=np.array(train_y),
               x_val=np.array(validation_x),
               y_val=np.array(validation_y),
               model=talolos,
               params=parameters,
               experiment_name='oloo',
               round_limit=50)

 44%|████▍     | 22/50 [02:20<03:07,  6.68s/it]

In [104]:
def inverse_transform(scaler, value):
    mat = np.zeros((1, scaler.scale_.shape[0]))
    mat[0, 0] = value
    return scaler.inverse_transform(mat)[:,0]

for index in t.data.index:
    t.data.at[index, 'real_error'] = inverse_transform(scaler, t.data.at[index, 'mse'])

In [105]:

t.data.sort_values(by=['real_error'])

Unnamed: 0,round_epochs,val_loss,val_mse,val_mae,loss,mse,mae,activation_1,activation_2,activation_3,batch_size,dropout1,dropout2,epochs,loss-functions,neurons_HL1,neurons_HL2,neurons_HL3,optimizer,real_error
3,533,0.003128,0.003128,0.035466,0.000873,0.000873,0.02027,relu,relu,elu,500,0.1,0.3,900,mse,50,80,,Adam,1056239.0
9,618,0.003794,0.003794,0.042056,0.00131,0.00131,0.026641,elu,elu,relu,250,0.2,0.1,900,mse,200,40,320.0,RMSprop,1069002.0
7,238,0.003265,0.003265,0.036682,0.001649,0.001649,0.028618,relu,relu,relu,500,0.1,0.1,400,mse,50,80,80.0,RMSprop,1078917.0
1,106,0.003213,0.003213,0.035683,0.00189,0.00189,0.031368,relu,elu,relu,250,0.2,0.1,400,mse,200,40,80.0,Adam,1085941.0
4,151,0.005641,0.005641,0.053841,0.002864,0.002864,0.037099,relu,relu,relu,500,0.1,0.1,900,mse,400,320,80.0,RMSprop,1114414.0
0,141,0.004392,0.004392,0.040457,0.004903,0.004903,0.051422,relu,relu,elu,500,0.2,0.1,400,mse,400,40,40.0,RMSprop,1173970.0
2,91,0.039013,0.039013,0.169144,0.048708,0.048708,0.180886,elu,relu,elu,500,0.1,0.2,900,mse,100,320,,Adam,2453889.0
6,91,0.039013,0.039013,0.169144,0.048708,0.048708,0.180886,elu,relu,elu,500,0.3,0.1,900,mse,200,80,320.0,Adam,2453889.0
5,91,0.039013,0.039013,0.169144,0.048708,0.048708,0.180886,elu,elu,elu,500,0.2,0.2,900,mse,100,160,320.0,RMSprop,2453889.0
8,41,0.039013,0.039013,0.169144,0.048708,0.048708,0.180886,relu,elu,elu,250,0.1,0.3,400,mse,200,160,320.0,RMSprop,2453889.0


## Evaluate


In [131]:
r = talos.Reporting('oloo/031120112150.csv')
r.data
p = talos.Predict(t)
model = t.best_model(metric='round_epochs')
e = Evaluate(t)
results = e.evaluate(np.array(test_x), np.array(test_y), task='continuous',folds=10, metric='loss')
t.data['test_mae'] = np.array([inverse_transform(scaler,result) for result in results])
t.data['test_mae']

0    6.853775e+06
1    5.395104e+06
2    5.970299e+06
3    6.875108e+06
4    5.295096e+06
5    6.666209e+06
6    6.699915e+06
7    6.082677e+06
8    6.598261e+06
9    6.203466e+06
Name: test_mae, dtype: float64

## Results

In [None]:
results = model.evaluate(validation_x, validation_y, batch_size=64)
print(results.shape)

In [None]:
print(results*train_y_scale)

ValueError: setting an array element with a sequence.