# Задача:
ДЗ - обучить при помощи tf (keras) API любое ДЗ по обучению классических моделей. 
Можно сдавать на торче.

In [158]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.notebook_repr_html', True)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 20)
pd.set_option('display.width', 80)

In [182]:
# Прочитаем файл данных
df = pd.read_csv("./winequality-white.csv", encoding='ANSI', on_bad_lines = 'skip',  sep=';')
df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.30,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.28,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4893,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
4894,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
4895,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
4896,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [183]:
# Разделение на признаки и целевую переменную
X = df.drop('quality', axis=1)
y = df['quality']

In [184]:
# Преобразование целевой переменной в one-hot encoding
from tensorflow.keras.utils import to_categorical
y = y - y.min()
num_classes = y.max() - y.min() + 1
y = to_categorical(y, num_classes=num_classes)


In [185]:
# Разделение данных на обучающую, тестовую и валидационную выборки
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=42)

# Масштабирование данных
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)


In [186]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dense, Dropout
# Создание модели для задачи регрессии
model = Sequential()
model.add(Dense(1400, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.1))
model.add(Dense(140, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.1))
model.add(Dense(num_classes, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.1))
model.add(Dense(1)) 

In [187]:
import tensorflow as tf
# Обучение модели
opt = tf.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_valid, y_valid))

Epoch 1/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.8799 - val_loss: 0.2592
Epoch 2/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2061 - val_loss: 0.1460
Epoch 3/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1416 - val_loss: 0.1307
Epoch 4/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1292 - val_loss: 0.1251
Epoch 5/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1246 - val_loss: 0.1232
Epoch 6/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1231 - val_loss: 0.1227
Epoch 7/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1227 - val_loss: 0.1225
Epoch 8/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1225 - val_loss: 0.1225
Epoch 9/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x1c71169c800>

In [188]:
# Оценка модели на тестовых данных
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1225 
Test Loss: 0.12245023995637894


In [201]:
# Создание модели для задачи классификации
model_2 = Sequential()
model_2.add(Dense(4898, activation='relu', kernel_regularizer=l2(0.01)))
model_2.add(Dropout(0.2))
model_2.add(Dense(70, activation='relu', kernel_regularizer=l2(0.01)))
model_2.add(Dropout(0.2))
model_2.add(Dense(num_classes, activation='softmax'))

In [203]:
# Обучение модели
from tensorflow.keras.optimizers import Adam
model_2.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model_2.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5312 - loss: 1.1676 - val_accuracy: 0.5286 - val_loss: 1.1502
Epoch 2/50
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5234 - loss: 1.1638 - val_accuracy: 0.5224 - val_loss: 1.1582
Epoch 3/50
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5334 - loss: 1.1532 - val_accuracy: 0.5245 - val_loss: 1.1411
Epoch 4/50
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5355 - loss: 1.1513 - val_accuracy: 0.5122 - val_loss: 1.1564
Epoch 5/50
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5582 - loss: 1.1380 - val_accuracy: 0.5327 - val_loss: 1.1401
Epoch 6/50
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5459 - loss: 1.1375 - val_accuracy: 0.5265 - val_loss: 1.1422
Epoch 7/50
[1m111/111[0m 

<keras.src.callbacks.history.History at 0x1c7ab0783b0>

In [204]:
# Оценка модели на тестовых данных
loss, accuracy = model_2.evaluate(X_valid, y_valid)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')

[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5278 - loss: 1.1412
Test Loss: 1.0814614295959473
Test Accuracy: 0.5600907206535339


# Вывод:
Модель регрессии дала результат Test Loss: 0.12245023995637894, что является достаточно хорошим показателем при у = 0...6.
Модель классификации: Test Loss: 1.0814614295959473
Test Accuracy: 0.5600907206535339, что означает угадывание класса в 56% тестовых данных.
