In [1]:
from tensorflow.keras import layers, models, Model
import numpy as np
import pandas as pd

In [2]:
# Import the data
path = 'https://static.bc-edx.com/ai/ail-v-1-0/m19/lesson_3/datasets/wine_quality.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,color
0,7.2,0.39,0.63,11.0,0.044,55.0,156.0,0.9974,3.09,0.44,8.7,ok,white
1,6.9,0.63,0.02,1.9,0.078,18.0,30.0,0.99712,3.4,0.75,9.8,ok,red
2,6.9,0.3,0.33,4.1,0.035,26.0,155.0,0.9925,3.25,0.79,12.3,good,white
3,7.3,0.42,0.38,6.8,0.045,29.0,122.0,0.9925,3.19,0.37,12.6,good,white
4,6.9,0.18,0.38,8.1,0.049,44.0,176.0,0.9958,3.3,0.54,9.8,ok,white


In [3]:
# Preprocess y
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Preprocess "quality" column (one-hot encoding)
quality_encoder = OneHotEncoder(sparse_output=False)
quality_encoded = quality_encoder.fit_transform(df[['quality']])
quality_columns = quality_encoder.get_feature_names_out(['quality'])
df_quality_encoded = pd.DataFrame(quality_encoded, columns=quality_columns)

# Preprocess "color" column (label encoding for binary; one-hot encoding for multiple categories)
color_encoder = LabelEncoder()
df['color_encoded'] = color_encoder.fit_transform(df['color'])

# Concatenate the encoded columns to the original DataFrame
df_processed = pd.concat([df, df_quality_encoded], axis=1)

# Drop the original "quality" and "color" columns
df_processed = df_processed.drop(['quality', 'color'], axis=1)

df_processed.head()


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,color_encoded,quality_bad,quality_good,quality_ok
0,7.2,0.39,0.63,11.0,0.044,55.0,156.0,0.9974,3.09,0.44,8.7,1,0.0,0.0,1.0
1,6.9,0.63,0.02,1.9,0.078,18.0,30.0,0.99712,3.4,0.75,9.8,0,0.0,0.0,1.0
2,6.9,0.3,0.33,4.1,0.035,26.0,155.0,0.9925,3.25,0.79,12.3,1,0.0,1.0,0.0
3,7.3,0.42,0.38,6.8,0.045,29.0,122.0,0.9925,3.19,0.37,12.6,1,0.0,1.0,0.0
4,6.9,0.18,0.38,8.1,0.049,44.0,176.0,0.9958,3.3,0.54,9.8,1,0.0,0.0,1.0


In [4]:
# Split data into X and two separate y variables
X = df_processed.drop(columns=['quality_good', 'quality_ok', 'quality_bad', 'color_encoded'])

y_color = df_processed['color_encoded']

y_quality = df_processed[['quality_good', 'quality_ok', 'quality_bad']]

# Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_color_train, y_color_test, y_quality_train, y_quality_test = train_test_split(X, y_color, y_quality)

In [5]:
# Create the shared layers of the model

# Input layer
input_layer = layers.Input(shape=(X.shape[1],), name='input_features')

# Shared hidden layers
shared_layer1 = layers.Dense(64, activation='relu')(input_layer)
shared_layer2 = layers.Dense(32, activation='relu')(shared_layer1)

In [6]:
# Branch for quality prediction
quality_output = layers.Dense(3, activation='softmax', name='quality_output')(shared_layer2)

# Branch for color prediction
color_output = layers.Dense(1, activation='sigmoid', name='color_output')(shared_layer2)

In [7]:
# Create the model
model = Model(inputs=input_layer, outputs=[quality_output, color_output])

# Compile the model
model.compile(optimizer='adam',
              loss={'quality_output': 'categorical_crossentropy', 'color_output': 'binary_crossentropy'},
              metrics={'quality_output': 'accuracy', 'color_output': 'accuracy'})

# Display the model summary
model.summary()

To fit the model to the data, we specify X_train as normal, but pass a dictionary for the y_data.

In [8]:
# Fit the model
model.fit(
    X,
    {'quality_output': y_quality, 'color_output': y_color},
    epochs=10,
    batch_size=32,
    validation_split=0.2
)

Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - color_output_accuracy: 0.7824 - loss: 1.6191 - quality_output_accuracy: 0.6929 - val_color_output_accuracy: 0.9323 - val_loss: 0.8604 - val_quality_output_accuracy: 0.7469
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - color_output_accuracy: 0.9379 - loss: 0.8045 - quality_output_accuracy: 0.7727 - val_color_output_accuracy: 0.9215 - val_loss: 0.8843 - val_quality_output_accuracy: 0.7500
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - color_output_accuracy: 0.9391 - loss: 0.8100 - quality_output_accuracy: 0.7615 - val_color_output_accuracy: 0.9369 - val_loss: 0.8343 - val_quality_output_accuracy: 0.7469
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - color_output_accuracy: 0.9406 - loss: 0.7912 - quality_output_accuracy: 0.7669 - val_color_output_accuracy: 0.9408 - val_loss: 0.829

<keras.src.callbacks.history.History at 0x11f5abb1f50>

In [9]:
# Evaluate the model with the testing data
test_results = model.evaluate(X_test, {'quality_output': y_quality_test, 'color_output': y_color_test})
test_results

[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - color_output_accuracy: 0.9526 - loss: 0.6609 - quality_output_accuracy: 0.7735


[0.6695435643196106, 0.9538461565971375, 0.780923068523407]