In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.datasets import boston_housing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.utils import to_categorical
import tensorflow_addons as tfa

from sklearn.metrics import r2_score

In [2]:
(X_train, y_train), (X_test, y_test) = boston_housing.load_data()

In [3]:
X_train.shape

(404, 13)

- CRIM     per capita crime rate by town
- ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
- INDUS    proportion of non-retail business acres per town
- CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
- NOX      nitric oxides concentration (parts per 10 million)
- RM       average number of rooms per dwelling
- AGE      proportion of owner-occupied units built prior to 1940
- DIS      weighted distances to five Boston employment centres
- RAD      index of accessibility to radial highways
- TAX      full-value property-tax rate per $10,000
- PTRATIO  pupil-teacher ratio by town
- B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
- LSTAT    % lower status of the population
- MEDV     Median value of owner-occupied homes in $1000's

In [4]:
# tf.keras.activations.

In [5]:
def fit_nn(X_train, y_train, cv=5, activators=['relu'], losses=['mae'], optimizers=['Adam'], shapes=[[13,6,1]]):
    '''
    Input:
        X_train, y_train
        cv - number of model runs
        activators - list of activators names
        losses - list of losses names
        optimizers - list of optimizers names
        shapes - list of neural networks shapes
    Output:
        global_result - list of mean R2_score with std for every model runned cv times, looks like:
            [[optimizer, activation, loss, shape, (R2_mean, R2_std)]]
    '''

    global_result = []
    for optimizer in optimizers:
        for activation in activators:
            for loss in losses:
                for shape in shapes:
                    result = []
                    print(optimizer, activation, loss, shape)
                    for i in range(cv):
                        model_reg = Sequential()
                        model_reg.add(Dense(shape[0], activation=activation, input_shape=(13,)))
                        for layer in range(1, len(shape) - 1):
                            model_reg.add(Dense(shape[layer], activation=activation))
                        model_reg.add(Dense(1, activation=activation))
                        model_reg.compile(
                                    optimizer=optimizer,
                                    loss=loss,
                                    metrics=[loss]
                                )
                        model_reg.fit(
                                X_train,
                                y_train,
                                epochs=200,
                                batch_size=20, 
                                validation_split=0.2,
                                verbose=False)
                        y_pred = model_reg.predict(X_test).flatten()
                        if not np.isnan(y_pred.sum()):
                            result.append(r2_score(y_test, y_pred))
                        else:
                            result.append(-10)
                    global_result.append([optimizer, activation, loss, shape, (np.mean(result), np.std(result))])
    return global_result


In [6]:
normalizer = Normalization()
normalizer.adapt(X_train)
X_train = normalizer(X_train)
X_test = normalizer(X_test)


In [7]:
model_reg = Sequential([
    Dense(13, activation='relu', input_shape=(13,)),
    Dense(6, activation='relu'),
    Dense(1, activation='relu')
])

In [8]:
model_reg.compile(optimizer='SGD',
                     loss='mse',
                     metrics=['mse'])

In [9]:
model_reg.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 13)                182       
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 84        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 7         
Total params: 273
Trainable params: 273
Non-trainable params: 0
_________________________________________________________________


In [10]:
model_reg.fit(X_train, y_train,
  epochs=1,
  batch_size=20, validation_split=0.2)



<tensorflow.python.keras.callbacks.History at 0x1ed29032d30>

In [11]:
y_pred = model_reg.predict(X_test)

In [12]:
y_pred = y_pred.flatten()

In [13]:
np.isnan(y_pred.sum())

False

In [14]:
from sklearn.metrics import r2_score

In [15]:
r2_score(y_test, y_pred)

-0.41795448539080104

In [16]:
%%time
models_result = fit_nn(
    X_train, 
    y_train,
    cv=5,
    activators=['relu', 'selu', 'linear'], 
    optimizers=['Adam', 'RMSprop', 'SGD'],
    losses=['mae', 'mse'],
    shapes=[[13,6,1], [50,30,1],[13,8,5,3,1]],
    )

Adam relu mae [13, 6, 1]
Adam relu mae [50, 30, 1]
Adam relu mae [13, 8, 5, 3, 1]
Adam relu mse [13, 6, 1]
Adam relu mse [50, 30, 1]
Adam relu mse [13, 8, 5, 3, 1]
Adam selu mae [13, 6, 1]
Adam selu mae [50, 30, 1]
Adam selu mae [13, 8, 5, 3, 1]
Adam selu mse [13, 6, 1]
Adam selu mse [50, 30, 1]
Adam selu mse [13, 8, 5, 3, 1]
Adam linear mae [13, 6, 1]
Adam linear mae [50, 30, 1]
Adam linear mae [13, 8, 5, 3, 1]
Adam linear mse [13, 6, 1]
Adam linear mse [50, 30, 1]
Adam linear mse [13, 8, 5, 3, 1]
RMSprop relu mae [13, 6, 1]
RMSprop relu mae [50, 30, 1]
RMSprop relu mae [13, 8, 5, 3, 1]
RMSprop relu mse [13, 6, 1]
RMSprop relu mse [50, 30, 1]
RMSprop relu mse [13, 8, 5, 3, 1]
RMSprop selu mae [13, 6, 1]
RMSprop selu mae [50, 30, 1]
RMSprop selu mae [13, 8, 5, 3, 1]
RMSprop selu mse [13, 6, 1]
RMSprop selu mse [50, 30, 1]
RMSprop selu mse [13, 8, 5, 3, 1]
RMSprop linear mae [13, 6, 1]
RMSprop linear mae [50, 30, 1]
RMSprop linear mae [13, 8, 5, 3, 1]
RMSprop linear mse [13, 6, 1]
RMSpr

In [19]:
columns = ['optimizer', 'activation', 'loss', 'layers_shape', 'R2_score, R2_std']
pd.DataFrame(models_result,columns=columns).sort_values('R2_score, R2_std',ascending=False,)

Unnamed: 0,optimizer,activation,loss,layers_shape,"R2_score, R2_std"
24,RMSprop,selu,mae,"[13, 6, 1]","(0.7971420146721825, 0.023071709866071273)"
18,RMSprop,relu,mae,"[13, 6, 1]","(0.7905222860665662, 0.00666282304786063)"
46,SGD,selu,mse,"[50, 30, 1]","(0.7889368367335818, 0.04818958757485916)"
0,Adam,relu,mae,"[13, 6, 1]","(0.7873722926195287, 0.018833544419624967)"
49,SGD,linear,mae,"[50, 30, 1]","(0.7810966945571922, 0.00767961795034878)"
26,RMSprop,selu,mae,"[13, 8, 5, 3, 1]","(0.7775766646973096, 0.0491364572776173)"
14,Adam,linear,mae,"[13, 8, 5, 3, 1]","(0.7714796512416612, 0.0037570931662666562)"
30,RMSprop,linear,mae,"[13, 6, 1]","(0.7710631248033433, 0.01189962406137452)"
31,RMSprop,linear,mae,"[50, 30, 1]","(0.7688738094850047, 0.013713705773164435)"
32,RMSprop,linear,mae,"[13, 8, 5, 3, 1]","(0.7687810269385315, 0.010028637921486842)"


достаточно разные сети хорошо себя показывают, разные оптимизаторы, активаторы, функции потерь, даже слои весьма разные дают хорошие результаты. Тут достаточно сложно сказать про какуюто структуру что она работает лучше остальных. Некоторые структуры  могут быть неплохи, но переодически расходятся. Если сеть расходилась, то получала R2=-10. И ее средний отрицательный.  По какой то причине видим что SGD linear mse вообще не справился с задачей, не сошелся ни разу. И впринципе SGD проявил себя хуже всех. Чаще всего расходился. Так же мы не использовали регуляризацию, и наши модели всегда работали только 200 эпох. Мы никак не застраховали себя от недообучения и перееобучения, так что имеем сферического коня в вакууме.

Кроме tensorflow.keras.layers.experimental.preprocessing.Normalization ничего не нашел, да и странно искать то что пока не нужно. Полисмотрел многие функции и классы, но даже разобраться что они делают достаточно сложно не понимая всей структуры tonsorflow.