In [1]:
import keras

In [2]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.preprocessing import StandardScaler

SEED = 2017

In [3]:
data = pd.read_csv('Data/winequality-red.csv', sep=';')
y = data['quality']
X = data.drop(['quality'], axis=1)
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)

In [5]:
#normalize data to sample mean = 0, sample variance = 1
scaler = StandardScaler().fit(X_train)
X_train = pd.DataFrame(scaler.transform(X_train))
X_test = pd.DataFrame(scaler.transform(X_test))


In [11]:
batch_size = 64
n_epochs = 200
lr = 0.01

model = Sequential()
# First hidden layer with 100 hidden units
model.add(Dense(512, input_dim=X_train.shape[1], activation='relu')) 
# Output layer
model.add(Dense(1, activation='linear'))

# Compile model
model.compile(loss='mse', optimizer="adam") 

In [12]:
callbacks = [
             EarlyStopping(monitor='val_loss', patience=20, verbose=2)
            ]

In [13]:
model.fit(X_train.values, y_train, batch_size=64, epochs=n_epochs, validation_split=0.2,     
             verbose=2, callbacks=callbacks)

Epoch 1/200
16/16 - 0s - loss: 27.0789 - val_loss: 19.6190 - 443ms/epoch - 28ms/step
Epoch 2/200
16/16 - 0s - loss: 15.0869 - val_loss: 10.3344 - 51ms/epoch - 3ms/step
Epoch 3/200
16/16 - 0s - loss: 7.3438 - val_loss: 4.7886 - 52ms/epoch - 3ms/step
Epoch 4/200
16/16 - 0s - loss: 3.6602 - val_loss: 2.6499 - 51ms/epoch - 3ms/step
Epoch 5/200
16/16 - 0s - loss: 2.5393 - val_loss: 2.0942 - 52ms/epoch - 3ms/step
Epoch 6/200
16/16 - 0s - loss: 2.1836 - val_loss: 1.8311 - 44ms/epoch - 3ms/step
Epoch 7/200
16/16 - 0s - loss: 1.9427 - val_loss: 1.6932 - 45ms/epoch - 3ms/step
Epoch 8/200
16/16 - 0s - loss: 1.7853 - val_loss: 1.5771 - 43ms/epoch - 3ms/step
Epoch 9/200
16/16 - 0s - loss: 1.6632 - val_loss: 1.4685 - 44ms/epoch - 3ms/step
Epoch 10/200
16/16 - 0s - loss: 1.5516 - val_loss: 1.3897 - 46ms/epoch - 3ms/step
Epoch 11/200
16/16 - 0s - loss: 1.4515 - val_loss: 1.3107 - 44ms/epoch - 3ms/step
Epoch 12/200
16/16 - 0s - loss: 1.3604 - val_loss: 1.2381 - 49ms/epoch - 3ms/step
Epoch 13/200
16/16 

Epoch 101/200
16/16 - 0s - loss: 0.2826 - val_loss: 0.3869 - 44ms/epoch - 3ms/step
Epoch 102/200
16/16 - 0s - loss: 0.2826 - val_loss: 0.4181 - 44ms/epoch - 3ms/step
Epoch 103/200
16/16 - 0s - loss: 0.2822 - val_loss: 0.4010 - 44ms/epoch - 3ms/step
Epoch 104/200
16/16 - 0s - loss: 0.2762 - val_loss: 0.3846 - 44ms/epoch - 3ms/step
Epoch 105/200
16/16 - 0s - loss: 0.2690 - val_loss: 0.3913 - 44ms/epoch - 3ms/step
Epoch 106/200
16/16 - 0s - loss: 0.2711 - val_loss: 0.4047 - 46ms/epoch - 3ms/step
Epoch 107/200
16/16 - 0s - loss: 0.2686 - val_loss: 0.3753 - 47ms/epoch - 3ms/step
Epoch 108/200
16/16 - 0s - loss: 0.2738 - val_loss: 0.4079 - 48ms/epoch - 3ms/step
Epoch 109/200
16/16 - 0s - loss: 0.2710 - val_loss: 0.3970 - 43ms/epoch - 3ms/step
Epoch 110/200
16/16 - 0s - loss: 0.2703 - val_loss: 0.3845 - 43ms/epoch - 3ms/step
Epoch 111/200
16/16 - 0s - loss: 0.2656 - val_loss: 0.4022 - 45ms/epoch - 3ms/step
Epoch 112/200
16/16 - 0s - loss: 0.2703 - val_loss: 0.3825 - 47ms/epoch - 3ms/step
Epoc

<keras.callbacks.History at 0x7f7b20569040>

In [14]:
score = model.evaluate(X_test.values, y_test, verbose=1)
# print('Test accuracy: %.2f%%' % (score[1]*100))
print(y_test[:4])

457    5
350    6
904    7
759    5
Name: quality, dtype: int64


In [15]:
test_prediction = model.predict(X_test[:4])
print(test_prediction)


[[5.1889544]
 [5.4060616]
 [6.0121775]
 [5.0413194]]


In [16]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 512)               6144      
                                                                 
 dense_3 (Dense)             (None, 1)                 513       
                                                                 
Total params: 6,657
Trainable params: 6,657
Non-trainable params: 0
_________________________________________________________________
