In [1]:
# Зависимости
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.neural_network import MLPRegressor, MLPClassifier

from sklearn.metrics import mean_squared_error, f1_score

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout

Using TensorFlow backend.


In [2]:
# Инициализируем все известные генераторы случайных чисел / Setting all known random seeds
my_code = "Johnson"
seed_limit = 2 ** 32
my_seed = int.from_bytes(my_code.encode(), "little") % seed_limit

os.environ['PYTHONHASHSEED']=str(my_seed)

random.seed(my_seed)

np.random.seed(my_seed)

tf.compat.v1.set_random_seed(my_seed)

session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

In [3]:
# Читаем данные из файла
example_data = pd.read_csv("datasets/Fish.csv")

In [4]:
example_data.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


In [5]:
# Определим размер валидационной и тестовой выборок
val_test_size = round(0.2*len(example_data))
print(val_test_size)

32


In [6]:
# Создадим обучающую, валидационную и тестовую выборки
random_state = my_seed
train_val, test = train_test_split(example_data, test_size=val_test_size, random_state=random_state)
train, val = train_test_split(train_val, test_size=val_test_size, random_state=random_state)
print(len(train), len(val), len(test))

95 32 32


In [7]:
# Значения в числовых столбцах преобразуем к отрезку [0,1].
# Для настройки скалировщика используем только обучающую выборку.
num_columns = ['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']
ord_columns = ['Species']

ct = ColumnTransformer(transformers=[
    ('numerical', MinMaxScaler(), num_columns), 
    ('ordinal', OneHotEncoder(), ord_columns)])

ct.fit(train)

ColumnTransformer(transformers=[('numerical', MinMaxScaler(),
                                 ['Weight', 'Length1', 'Length2', 'Length3',
                                  'Height', 'Width']),
                                ('ordinal', OneHotEncoder(), ['Species'])])

In [8]:
# Преобразуем значения, тип данных приводим к DataFrame
sc_train = pd.DataFrame(ct.transform(train))
sc_test = pd.DataFrame(ct.transform(test))
sc_val = pd.DataFrame(ct.transform(val))

In [11]:
sc_train

Unnamed: 0,Weight,Length1,Length2,Length3,Height,Width,0,1,2,3,4,5,6
0,0.378960,0.482474,0.486434,0.539855,0.806948,0.575975,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.282354,0.670103,0.660853,0.664855,0.321822,0.461618,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.121761,0.463918,0.463178,0.471014,0.222393,0.328146,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0.114234,0.311340,0.313953,0.315217,0.290077,0.439530,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.623612,0.614433,0.612403,0.628623,0.616510,0.772074,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,0.008782,0.129897,0.127907,0.134058,0.069310,0.117219,0.0,0.0,0.0,0.0,0.0,1.0,0.0
91,0.074713,0.237113,0.244186,0.248188,0.229623,0.369291,0.0,0.0,1.0,0.0,0.0,0.0,0.0
92,0.084123,0.278351,0.273256,0.293478,0.279425,0.321014,0.0,0.0,0.0,0.0,1.0,0.0,0.0
93,0.003952,0.082474,0.073643,0.083333,0.020420,0.048771,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [10]:
# Устанавливаем названия столбцов
column_names = num_columns + list(range(7))
sc_train.columns = column_names
sc_test.columns = column_names
sc_val.columns = column_names

In [12]:
# В качестве входных параметров используем первые 5 числовых параметров,
# в качестве выходного - шестой числовой параметр.
x_labels = num_columns[:-1]
y_labels = num_columns[-1]
print(x_labels)
print(y_labels)

['Weight', 'Length1', 'Length2', 'Length3', 'Height']
Width


In [13]:
# Отберем необходимые параметры
x_train = sc_train[x_labels]
x_test = sc_test[x_labels]
x_val = sc_val[x_labels]

y_train = sc_train[y_labels]
y_test = sc_test[y_labels]
y_val = sc_val[y_labels]

In [14]:
# Создадим нейроннную сеть для решения задачи регрессии на базе библиотеки sklearn
reg = MLPRegressor(alpha=0.0, batch_size=16, epsilon=1e-07, max_iter=50)
reg.get_params()

{'activation': 'relu',
 'alpha': 0.0,
 'batch_size': 16,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-07,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 50,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [15]:
# Обучим нейронную сеть
reg.fit(x_train, y_train)

MLPRegressor(alpha=0.0, batch_size=16, epsilon=1e-07, max_iter=50)

In [16]:
# Проверим работу обученной нейронной сети на валидационной выборке
pred_val = reg.predict(x_val)
mse1 = mean_squared_error(y_val, pred_val)
print(mse1)

0.0028190107394401795


In [17]:
# Создадим нейронную сеть на базе библиотеки keras
model = Sequential()

model.add(Dense(100, input_dim=5, activation='relu', use_bias=False))

model.add(Dense(1, activation='linear', use_bias=False))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               500       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 100       
Total params: 600
Trainable params: 600
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Обучим нейронную сеть
history = model.fit(x_train, y_train, validation_data = (x_val, y_val), epochs=50, batch_size=16, verbose=0)