In [200]:
import random

import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from deap import creator, base, tools, algorithms

from data_preprocessing import read_dataset, categorical_to_numeric

In [201]:
TEST_DATA_AMOUNT = .2

In [202]:
raw_df = read_dataset()
raw_df

Unnamed: 0,Age,Gender,Weight (kg),Height (m),Max_BPM,Avg_BPM,Resting_BPM,Session_Duration (hours),Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,BMI
0,56,Male,88.3,1.71,180,157,60,1.69,1313.0,Yoga,12.6,3.5,4,3,30.20
1,46,Female,74.9,1.53,179,151,66,1.30,883.0,HIIT,33.9,2.1,4,2,32.00
2,32,Female,68.1,1.66,167,122,54,1.11,677.0,Cardio,33.4,2.3,4,2,24.71
3,25,Male,53.2,1.70,190,164,56,0.59,532.0,Strength,28.8,2.1,3,1,18.41
4,38,Male,46.1,1.79,188,158,68,0.64,556.0,Strength,29.2,2.8,3,1,14.39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
968,24,Male,87.1,1.74,187,158,67,1.57,1364.0,Strength,10.0,3.5,4,3,28.77
969,25,Male,66.6,1.61,184,166,56,1.38,1260.0,Strength,25.0,3.0,2,1,25.69
970,59,Female,60.4,1.76,194,120,53,1.72,929.0,Cardio,18.8,2.7,5,3,19.50
971,32,Male,126.4,1.83,198,146,62,1.10,883.0,HIIT,28.2,2.1,3,2,37.74


In [203]:
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 973 entries, 0 to 972
Data columns (total 15 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Age                            973 non-null    int64  
 1   Gender                         973 non-null    object 
 2   Weight (kg)                    973 non-null    float64
 3   Height (m)                     973 non-null    float64
 4   Max_BPM                        973 non-null    int64  
 5   Avg_BPM                        973 non-null    int64  
 6   Resting_BPM                    973 non-null    int64  
 7   Session_Duration (hours)       973 non-null    float64
 8   Calories_Burned                973 non-null    float64
 9   Workout_Type                   973 non-null    object 
 10  Fat_Percentage                 973 non-null    float64
 11  Water_Intake (liters)          973 non-null    float64
 12  Workout_Frequency (days/week)  973 non-null    int

In [204]:
categorical_columns = ['Gender', 'Workout_Type']

for column in categorical_columns:
    print(raw_df[column].value_counts(), '\n')

Gender
Male      511
Female    462
Name: count, dtype: int64 

Workout_Type
Strength    258
Cardio      255
Yoga        239
HIIT        221
Name: count, dtype: int64 



### As we can see, there are two string columns (Gender and Workout_type) that happen to be categorical, so we can easily map them to numeric values. 
### Let's also define a target variable here. Let's say - Gender.

In [205]:
pd.set_option('future.no_silent_downcasting', True)
target_variable_name = 'Gender'
for column in categorical_columns:
    raw_df.loc[:, column] = categorical_to_numeric(raw_df.loc[:, column])

For column - Gender, created mapping - {'Male': 0, 'Female': 1}
For column - Workout_Type, created mapping - {'Yoga': 0, 'HIIT': 1, 'Cardio': 2, 'Strength': 3}


In [206]:
for column in categorical_columns:
    print(raw_df[column].value_counts(), '\n')

Gender
0    511
1    462
Name: count, dtype: int64 

Workout_Type
3    258
2    255
0    239
1    221
Name: count, dtype: int64 



### Data normalization

In [207]:
raw_df.describe()

Unnamed: 0,Age,Weight (kg),Height (m),Max_BPM,Avg_BPM,Resting_BPM,Session_Duration (hours),Calories_Burned,Fat_Percentage,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,BMI
count,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0
mean,38.683453,73.854676,1.72258,179.883864,143.766701,62.223022,1.256423,905.422405,24.976773,2.626619,3.321686,1.809866,24.912127
std,12.180928,21.2075,0.12772,11.525686,14.345101,7.32706,0.343033,272.641516,6.259419,0.600172,0.913047,0.739693,6.660879
min,18.0,40.0,1.5,160.0,120.0,50.0,0.5,303.0,10.0,1.5,2.0,1.0,12.32
25%,28.0,58.1,1.62,170.0,131.0,56.0,1.04,720.0,21.3,2.2,3.0,1.0,20.11
50%,40.0,70.0,1.71,180.0,143.0,62.0,1.26,893.0,26.2,2.6,3.0,2.0,24.16
75%,49.0,86.0,1.8,190.0,156.0,68.0,1.46,1076.0,29.3,3.1,4.0,2.0,28.56
max,59.0,129.9,2.0,199.0,169.0,74.0,2.0,1783.0,35.0,3.7,5.0,3.0,49.84


In [208]:
features = raw_df.drop(columns=[target_variable_name])
target = raw_df[target_variable_name]

scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features)
scaled_df = pd.DataFrame(scaled_features, columns=features.columns)

preprocessed_df = pd.concat([scaled_df, target], axis=1)
preprocessed_df.describe()

Unnamed: 0,Age,Weight (kg),Height (m),Max_BPM,Avg_BPM,Resting_BPM,Session_Duration (hours),Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,BMI
count,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0
mean,0.504474,0.376581,0.445159,0.509843,0.485035,0.509293,0.504282,0.407042,0.515588,0.599071,0.512099,0.440562,0.404933,0.335611
std,0.297096,0.235901,0.25544,0.29553,0.292757,0.305294,0.228689,0.184217,0.375751,0.250377,0.272805,0.304349,0.369846,0.177529
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.243902,0.201335,0.24,0.25641,0.22449,0.25,0.36,0.281757,0.333333,0.452,0.318182,0.333333,0.0,0.207623
50%,0.536585,0.333704,0.42,0.512821,0.469388,0.5,0.506667,0.398649,0.666667,0.648,0.5,0.333333,0.5,0.315565
75%,0.756098,0.51168,0.6,0.769231,0.734694,0.75,0.64,0.522297,1.0,0.772,0.727273,0.666667,0.5,0.432836
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Split the input dataset

In [209]:
X = preprocessed_df.drop([target_variable_name], axis=1).values  
y = preprocessed_df[target_variable_name].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_DATA_AMOUNT, random_state=1)
num_classes = len(pd.unique(y))
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

### Generalized model creation function

In [210]:
def create_nn_model(input_dim, layers_config, activation_functions, loss_function, metrics):
    """
    Create a flexible NN model.
    
    Args:
    - input_dim: int, number of input features.
    - layers_config: list of ints, number of neurons in each layer.
    - activation_functions: list of strings, activation function for each layer.
    - loss_function: string, loss function for the model.
    - metrics: list of strings, metrics to evaluate the model on.

    Returns:
    - model: Compiled Keras model.
    """
    assert len(layers_config) == len(activation_functions), "Mismatch in layers and activations"
    
    model = Sequential()
    model.add(Input((input_dim, )))
    for i, (neurons, activation) in enumerate(zip(layers_config, activation_functions)):
        model.add(Dense(neurons, activation=activation))
    
    model.compile(optimizer=Adam(), loss=loss_function, metrics=metrics)
    return model

### Example configuration

In [211]:
input_dim = X_train.shape[1]  # Number of input features
layers_config = [64, 32, num_classes]  # Number of neurons in each layer
activation_functions = ['relu', 'relu', 'softmax']  # Activation functions for each layer
loss_function = 'categorical_crossentropy'
metrics = ['accuracy']

In [212]:
model = create_nn_model(input_dim, layers_config, activation_functions, loss_function, metrics)

epochs = 20
batch_size = 32
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Epoch 1/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.6241 - loss: 0.6702 - val_accuracy: 0.7372 - val_loss: 0.6052
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6564 - loss: 0.5885 - val_accuracy: 0.8782 - val_loss: 0.5242
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8958 - loss: 0.5041 - val_accuracy: 0.9359 - val_loss: 0.4207
Epoch 4/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8907 - loss: 0.4133 - val_accuracy: 0.9551 - val_loss: 0.3106
Epoch 5/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8995 - loss: 0.3260 - val_accuracy: 0.9744 - val_loss: 0.2338
Epoch 6/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9193 - loss: 0.2563 - val_accuracy: 0.9808 - val_loss: 0.1927
Epoch 7/20
[1m20/20[0m [32m━━━━━━━━━

### Evolutionary algorithm