# Тема: TensorFlow

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model

import warnings
warnings.simplefilter("ignore")

### 1. Постройте нейронную сеть (берем несложную полносвязную сеть, меняем число слоев, число нейронов, типы активации, тип оптимизатора) на датасете from sklearn.datasets import load_boston.

In [2]:
boston = load_boston()
X = pd.DataFrame(boston['data'], columns=boston['feature_names'])
y = pd.DataFrame(boston['target'], columns=['target'])
data = pd.concat([X, y],axis=1)
data

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


In [3]:
# масштабирование данных
colunms = X.columns.to_list()

for column in colunms:
    print(f'{"*" * 7} {column}')
    print(f'До: min={X[column].min()}, max={X[column].max()}')
    X[column] = X[column] / X[column].max()
    print(f'После: min={X[column].min()}, max={X[column].max()}')

******* CRIM
До: min=0.00632, max=88.9762
После: min=7.103023055603633e-05, max=1.0
******* ZN
До: min=0.0, max=100.0
После: min=0.0, max=1.0
******* INDUS
До: min=0.46, max=27.74
После: min=0.016582552271088683, max=1.0
******* CHAS
До: min=0.0, max=1.0
После: min=0.0, max=1.0
******* NOX
До: min=0.385, max=0.871
После: min=0.44202066590126293, max=1.0
******* RM
До: min=3.561, max=8.78
После: min=0.40558086560364465, max=1.0
******* AGE
До: min=2.9, max=100.0
После: min=0.028999999999999998, max=1.0
******* DIS
До: min=1.1296, max=12.1265
После: min=0.09315136271801426, max=1.0
******* RAD
До: min=1.0, max=24.0
После: min=0.041666666666666664, max=1.0
******* TAX
До: min=187.0, max=711.0
После: min=0.2630098452883263, max=1.0
******* PTRATIO
До: min=12.6, max=22.0
После: min=0.5727272727272728, max=1.0
******* B
До: min=0.32, max=396.9
После: min=0.0008062484252960444, max=1.0
******* LSTAT
До: min=1.73, max=37.97
После: min=0.04556228601527522, max=1.0


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((354, 13), (152, 13), (354, 1), (152, 1))

### 2. Измените функцию потерь и метрику для этой задачи. Постройте 10-15 вариантов и сведите результаты их работы в таблицу. Опишите, какого результата вы добились от нейросети? Что помогло вам улучшить ее точность?

In [6]:
stats = pd.DataFrame(columns=['Слои', 'Нейроны', 'Оптимизатор', 'Train', 'Test'])

best_metric = 100

In [7]:
def model_two_layers(neuron, optimizer):
    
    inputs = Input(shape=(13))
    x = Dense(neuron, activation='relu')(inputs)
    outputs = Dense(1)(x)
    
    model = Model(inputs, outputs)
    model.compile(optimizer=optimizer, loss='mae', metrics=['mae'])
    
    model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=0)
    
    return model

In [8]:
neurons = [64, 128, 256]
optimizers = ['SGD', 'RMSprop', 'Adam']

for neuron in neurons:
    for optimizer in optimizers:
    
        model2 = model_two_layers(neuron, optimizer)

        loss_train, metric_train = model2.evaluate(X_train, y_train, batch_size=10)
        loss_test, metric_test = model2.evaluate(X_test, y_test, batch_size=10)
        
        if metric_test < best_metric:
        
            best_metric = metric_test
            model2.save('models/03_dz_best_model.h5')

        new_row = {'Слои':'2', 'Нейроны':neuron, 'Оптимизатор': optimizer, \
                   'Train':round(metric_train, 4), 'Test':round(metric_test, 4)}

        stats = stats.append(new_row, ignore_index=True)



In [9]:
def model_three_layers(neuron, optimizer):
    
    inputs = Input(shape=(13))
    x = Dense(neuron[0], activation='relu')(inputs)
    x_1 = Dense(neuron[1], activation='relu')(x)
    outputs = Dense(1)(x_1)
    
    model = Model(inputs, outputs)
    model.compile(optimizer=optimizer, loss='mae', metrics=['mae'])
    
    model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=0)
    
    return model

In [10]:
neurons = [[64, 16], [128, 32], [256, 64]]
optimizers = ['SGD', 'RMSprop', 'Adam']

for neuron in neurons:
    for optimizer in optimizers:
    
        model3 = model_three_layers(neuron, optimizer)

        loss_train, metric_train = model3.evaluate(X_train, y_train, batch_size=10)
        loss_test, metric_test = model3.evaluate(X_test, y_test, batch_size=10)
        
        if metric_test < best_metric:
        
            best_metric = metric_test
            model3.save('models/03_dz_best_model.h5')

        new_row = {'Слои':'3', 'Нейроны':neuron, 'Оптимизатор': optimizer, \
                   'Train':round(metric_train, 4), 'Test':round(metric_test, 4)}

        stats = stats.append(new_row, ignore_index=True)



In [11]:
def model_five_layers(neuron, optimizer):
    
    inputs = Input(shape=(13))
    x = Dense(neuron[0], activation='relu')(inputs)
    x_1 = Dense(neuron[1], activation='relu')(x)
    x_2 = Dense(neuron[2], activation='relu')(x_1)
    x_3 = Dense(neuron[3], activation='relu')(x_2)
    outputs = Dense(1)(x_3)
    
    model = Model(inputs, outputs)
    model.compile(optimizer=optimizer, loss='mae', metrics=['mae'])
    
    model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=0)
    
    return model

In [12]:
neurons = [[64, 32, 32, 16], [128, 64, 32, 16], [256, 128, 64, 32]]
optimizers = ['SGD', 'RMSprop', 'Adam']

for neuron in neurons:
    for optimizer in optimizers:
    
        model5 = model_five_layers(neuron, optimizer)

        loss_train, metric_train = model5.evaluate(X_train, y_train, batch_size=10)
        loss_test, metric_test = model5.evaluate(X_test, y_test, batch_size=10)
        
        if metric_test < best_metric:
        
            best_metric = metric_test
            model5.save('models/03_dz_best_model.h5')

        new_row = {'Слои':'5', 'Нейроны':neuron, 'Оптимизатор': optimizer, \
                   'Train':round(metric_train, 4), 'Test':round(metric_test, 4)}

        stats = stats.append(new_row, ignore_index=True)



In [13]:
stats

Unnamed: 0,Слои,Нейроны,Оптимизатор,Train,Test
0,2,64,SGD,2.993,3.3394
1,2,64,RMSprop,3.6678,3.9828
2,2,64,Adam,3.6142,3.9101
3,2,128,SGD,3.024,3.4041
4,2,128,RMSprop,3.357,3.7052
5,2,128,Adam,3.355,3.6671
6,2,256,SGD,2.9922,3.2092
7,2,256,RMSprop,3.2147,3.4406
8,2,256,Adam,3.0807,3.3948
9,3,"[64, 16]",SGD,2.5457,2.6413


In [14]:
stats['Нейроны'] = stats['Нейроны'].astype(np.str) 
stats_neurons = stats[['Нейроны', 'Train', 'Test']].groupby(['Нейроны'],as_index=False).mean().sort_values('Test')
stats_neurons

Unnamed: 0,Нейроны,Train,Test
6,"[256, 64]",2.3512,2.526167
8,"[64, 32, 32, 16]",2.379033,2.526333
3,"[128, 32]",2.494233,2.676767
4,"[128, 64, 32, 16]",2.5076,2.818667
7,"[64, 16]",2.7297,2.956867
5,"[256, 128, 64, 32]",2.6289,3.048267
1,256,3.095867,3.3482
0,128,3.245333,3.592133
2,64,3.425,3.7441


**Вывод:**
- увеличение нейронов дает увеличение результата в нейросетях с небольшим количеством слоев
- в многослойных сетях увеличение количества нейронов не гарантирует увеличение качества модели

In [15]:
stats_layers = stats[['Слои', 'Train', 'Test']].groupby(['Слои'],as_index=False).mean().sort_values('Test')
stats_layers

Unnamed: 0,Слои,Train,Test
1,3,2.525044,2.719933
2,5,2.505178,2.797756
0,2,3.2554,3.561478


**Вывод:**
- увеличение количества слоев в нейросети дает увеличение результата

In [16]:
stats_optimizers = stats[['Оптимизатор', 'Train', 'Test']].groupby(['Оптимизатор'],as_index=False).mean().\
                   sort_values('Test')
stats_optimizers

Unnamed: 0,Оптимизатор,Train,Test
0,Adam,2.649344,2.961556
2,SGD,2.827289,3.018611
1,RMSprop,2.808989,3.099


**Вывод:**
- самый результативный оптимизатор - Adam

In [17]:
best_model = load_model('models/03_dz_best_model.h5')
best_model.summary()

Model: "model_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_19 (InputLayer)        [(None, 13)]              0         
_________________________________________________________________
dense_45 (Dense)             (None, 64)                896       
_________________________________________________________________
dense_46 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_47 (Dense)             (None, 32)                1056      
_________________________________________________________________
dense_48 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_49 (Dense)             (None, 1)                 17        
Total params: 4,577
Trainable params: 4,577
Non-trainable params: 0
________________________________________________________

In [18]:
data['pred'] = best_model.predict(X)
data

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target,pred
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0,31.829748
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6,22.949078
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,33.668953
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,32.514633
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2,30.242500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4,22.573614
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6,21.598684
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9,27.288023
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0,25.052336
