# Main Notebook

In [1]:
import os
import gc
import pytz
import operator
import numpy as np
import pickle as pkl
from time import sleep
from datetime import datetime
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import tensorflow as tf
from tensorflow import math
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.activations import relu, elu
from tensorflow.keras.layers import Dense, Dropout
from talos.model import early_stopper
import talos
from talos.utils.gpu_utils import parallel_gpu_jobs

import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings(action = 'ignore', category = FutureWarning)
warnings.filterwarnings(action = 'ignore', category = DeprecationWarning)

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

import sys
sys.path.append('..')

time = datetime.now(pytz.timezone('Europe/Oslo')).strftime('%m.%d.%Y_%H.%M.%S')
print(f'Notebook initialized execution at {time}.')
#import xgboost as xgb

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


Notebook initialized execution at 03.18.2020_14.01.09.


## General Methods

In [2]:
def memory_optimization(dfs):
    for df in dfs:
        del df
    gc.collect()

## Prepare Data

In [3]:
import datasets

start_time = datetime.now()

scaler = MinMaxScaler()

train_x, train_y, validation_x, validation_y, test_x, test_y, scaler = datasets.load(f'../input/hele_norge.csv', scaler)

print('Train', train_x.shape)
print('validate', validation_x.shape)
print('Test', test_x.shape)
print(train_x)

Train (10802, 107)
validate (2315, 107)
Test (2315, 107)
        soverom  primaerrom  bruksareal  byggeaar       rom  parkering  fiber  \
9259   0.002132    0.001878    0.245726  0.843243  0.002132        1.0    0.0   
10670  0.000426    0.000150    0.013355  0.778378  0.000000        0.0    0.0   
6042   0.001706    0.000939    0.091346  0.778378  0.001706        1.0    0.0   
7539   0.001279    0.000820    0.073184  0.975676  0.001279        1.0    0.0   
6411   0.001279    0.000628    0.119124  0.854054  0.001279        1.0    0.0   
...         ...         ...         ...       ...       ...        ...    ...   
13123  0.001706    0.003081    0.357906  0.786486  0.001706        1.0    0.0   
3264   0.000426    0.000951    0.088141  0.872973  0.000426        1.0    0.0   
9845   0.001706    0.001202    0.111111  0.964865  0.002132        0.0    0.0   
10799  0.000426    0.000329    0.029380  0.821622  0.000426        1.0    0.0   
2732   0.001279    0.000700    0.070513  0.964865  0

# Optimization Parameters

In [4]:
parameters = {'activation_1':['relu', 'elu'],
     'activation_2':['relu', 'elu'],
     'activation_3':['relu', 'elu'],
     'optimizer': ['Adam', "RMSprop"],
     'loss-functions': ['mse'],
     'neurons_HL1': [50, 100, 200, 400],
     'neurons_HL2': [40, 80, 160, 320],
     'neurons_HL3': [40, 80, 160, 320, None],
     'dropout1': [0.1, 0.2, 0.3],
     'dropout2': [0.1, 0.2, 0.3],
     'batch_size': [100, 250, 500],
     'epochs': [400, 900]
}

In [5]:
def talolos(x_train, y_train, x_val, y_val, parameters):
    model = Sequential()

    model.add(Dense(parameters['neurons_HL1'], 
    input_shape=(train_x.shape[1],), 
    activation=parameters['activation_1'],use_bias=True))

    model.add(Dropout(parameters['dropout1']))

    model.add(Dense(parameters['neurons_HL2'], 
    activation=parameters['activation_2'], use_bias=True))

    model.add(Dropout(parameters['dropout1']))
    
    if parameters['neurons_HL3']:
        model.add(Dense(parameters['neurons_HL3'], 
        activation=parameters['activation_3'], use_bias=True))


    model.add(Dense(1, activation='relu'))

    model.compile(optimizer=parameters['optimizer'], loss=parameters['loss-functions'], 
    metrics=['mse', 'mae'])

    history = model.fit(x_train, y_train,
            batch_size=parameters['batch_size'],epochs=parameters['epochs'],
            verbose=0,validation_data=[x_val, y_val],
            callbacks=[early_stopper(epochs=parameters['epochs'], 
            mode='moderate',monitor='val_loss')])
    
    return history, model

## Specify model

In [6]:
t = talos.Scan(x=np.array(train_x),
               y=np.array(train_y),
               x_val=np.array(validation_x),
               y_val=np.array(validation_y),
               model=talolos,
               params=parameters,
               experiment_name='oloo',
               round_limit=50)

  0%|                                                                                                                                                                                                                                                                                                                                      | 0/50 [00:00<?, ?it/s]

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [14:47<00:00, 17.75s/it]


In [7]:
for index in t.data.index:
    mat = np.zeros((1, 117))
    mat[0, 0] = t.data.at[index, 'mean_absolute_error']
    t.data.at[index, 'real_error'] = scaler.inverse_transform(mat)[:,0]

ValueError: operands could not be broadcast together with shapes (1,117) (108,) (1,117) 

In [None]:
print(t.data)

## Train model

In [None]:
print('# Fit model on training data')
history = model.fit(train_x, train_y,
                    batch_size=64,
                    epochs=3,
                    validation_data=(validation_x, validation_y))

print('\nhistory dict:', history.history)

In [None]:
print(t.data)

## Results

In [None]:
results = model.evaluate(validation_x, validation_y, batch_size=64)
print(results.shape)

In [None]:
print(results*train_y_scale)