In [1]:
# Import libraries
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# Load the dataset
df = pd.read_csv("obesity_dataset.csv")
df.head()

Unnamed: 0,Gender,Height,Age,Meals_Per_Day,Frequency_Physical_Activity,Water_Intake,Vegetable_Intake,Technology_Use,Weight,family_history_with_overweight,High_Caloric_Food,Mode_Transportation,Food_Between_Meals,Alcohol_Intake,SMOKE,Monitor_Calories,Obesity_Level,Obese/Overweight
0,Female,1.62,21.0,3.0,0.0,2.0,2.0,1.0,64.0,yes,no,Public_Transportation,Sometimes,no,no,no,Normal_Weight,No
1,Female,1.52,21.0,3.0,3.0,3.0,3.0,0.0,56.0,yes,no,Public_Transportation,Sometimes,Sometimes,yes,yes,Normal_Weight,No
2,Male,1.8,23.0,3.0,2.0,2.0,2.0,1.0,77.0,yes,no,Public_Transportation,Sometimes,Frequently,no,no,Normal_Weight,No
3,Male,1.8,27.0,3.0,2.0,2.0,3.0,0.0,87.0,no,no,Walking,Sometimes,Frequently,no,no,Overweight_Level_I,Yes
4,Male,1.78,22.0,1.0,0.0,2.0,2.0,0.0,89.8,no,no,Public_Transportation,Sometimes,Sometimes,no,no,Overweight_Level_II,Yes


## 1. Food between meals as predictor of obesity and overweight

In [3]:
# Select columns 'Food_Between_Meals' and 'Obese/Overweight'
data = df[['Food_Between_Meals', 'Obese/Overweight']]
data.head()

Unnamed: 0,Food_Between_Meals,Obese/Overweight
0,Sometimes,No
1,Sometimes,No
2,Sometimes,No
3,Sometimes,Yes
4,Sometimes,Yes


In [4]:
# Encode categorical data
label_encoder = LabelEncoder()
data['Food_Between_Meals'] = label_encoder.fit_transform(data['Food_Between_Meals'])
data['Obese/Overweight'] = label_encoder.fit_transform(data['Obese/Overweight'])

In [5]:
# Define features (X) and target (y)
X = data[['Food_Between_Meals']] 
y = data['Obese/Overweight']   

In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = 0.2, stratify = y)

In [8]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = len(X_train_scaled[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = 20, activation = "relu", input_dim = input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = 10, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Check the structure of the model
nn.summary()

In [10]:
# Compile the neural network
nn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [11]:
# Train the neural network
nn.fit(X_train_scaled, y_train, epochs = 100, verbose = 1, validation_split = 0.2)

Epoch 1/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6747 - loss: 0.6764 - val_accuracy: 0.8373 - val_loss: 0.6377
Epoch 2/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 749us/step - accuracy: 0.8117 - loss: 0.6278 - val_accuracy: 0.8373 - val_loss: 0.5848
Epoch 3/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 734us/step - accuracy: 0.8119 - loss: 0.5807 - val_accuracy: 0.8373 - val_loss: 0.5252
Epoch 4/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 722us/step - accuracy: 0.8323 - loss: 0.5127 - val_accuracy: 0.8373 - val_loss: 0.4832
Epoch 5/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 706us/step - accuracy: 0.8084 - loss: 0.5017 - val_accuracy: 0.8373 - val_loss: 0.4655
Epoch 6/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 704us/step - accuracy: 0.8263 - loss: 0.4738 - val_accuracy: 0.8373 - val_loss: 0.4622
Epoch 7/100
[1m43/43[0

<keras.src.callbacks.history.History at 0x1776f7d70>

In [12]:
# Evaluate the neural network
nn_loss, nn_accuracy = nn.evaluate(X_test_scaled, y_test)
print(f"Neural Network Accuracy: {nn_accuracy}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8436 - loss: 0.4377 
Neural Network Accuracy: 0.8368794322013855


## 2. Mode of transportation as predictor of obesity/overweight

In [13]:
# Select columns
data2 = df[['Mode_Transportation', 'Obese/Overweight']]
data2.head()

Unnamed: 0,Mode_Transportation,Obese/Overweight
0,Public_Transportation,No
1,Public_Transportation,No
2,Public_Transportation,No
3,Walking,Yes
4,Public_Transportation,Yes


In [14]:
# Encode categorical data
label_encoder = LabelEncoder()
data2['Mode_Transportation'] = label_encoder.fit_transform(data2['Mode_Transportation'])
data2['Obese/Overweight'] = label_encoder.fit_transform(data2['Obese/Overweight'])

In [15]:
# Define features (X) and target (y)
X = data2[['Mode_Transportation']]
y = data2['Obese/Overweight']   

In [16]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = 0.2, stratify = y)

In [17]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [18]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = len(X_train_scaled[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = 20, activation = "relu", input_dim = input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = 10, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Check the structure of the model
nn.summary()

In [19]:
# Compile the neural network
nn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [20]:
# Train the neural network
nn.fit(X_train_scaled, y_train, epochs = 100, verbose = 1, validation_split = 0.2)

Epoch 1/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7512 - loss: 0.6603 - val_accuracy: 0.7544 - val_loss: 0.6080
Epoch 2/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7448 - loss: 0.6085 - val_accuracy: 0.7544 - val_loss: 0.5735
Epoch 3/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 722us/step - accuracy: 0.7283 - loss: 0.5979 - val_accuracy: 0.7544 - val_loss: 0.5634
Epoch 4/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 701us/step - accuracy: 0.7442 - loss: 0.5829 - val_accuracy: 0.7544 - val_loss: 0.5603
Epoch 5/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 693us/step - accuracy: 0.7282 - loss: 0.5895 - val_accuracy: 0.7544 - val_loss: 0.5588
Epoch 6/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 731us/step - accuracy: 0.7337 - loss: 0.5827 - val_accuracy: 0.7544 - val_loss: 0.5575
Epoch 7/100
[1m43/43[0m 

<keras.src.callbacks.history.History at 0x1502f2d50>

In [21]:
# Evaluate the neural network
nn_loss, nn_accuracy = nn.evaluate(X_test_scaled, y_test)
print(f"Neural Network Accuracy: {nn_accuracy}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7567 - loss: 0.5499 
Neural Network Accuracy: 0.73758864402771


## 3. Alcohol intake as predictor of obesity/overweight

In [27]:
# Select columns
data3 = df[['Alcohol_Intake', 'Obese/Overweight']]
data3.head()

Unnamed: 0,Alcohol_Intake,Obese/Overweight
0,no,No
1,Sometimes,No
2,Frequently,No
3,Frequently,Yes
4,Sometimes,Yes


In [29]:
# Encode categorical data
label_encoder = LabelEncoder()
data3['Alcohol_Intake'] = label_encoder.fit_transform(data3['Alcohol_Intake'])
data3['Obese/Overweight'] = label_encoder.fit_transform(data3['Obese/Overweight'])

In [30]:
# Define features (X) and target (y)
X = data3[['Alcohol_Intake']]
y = data3['Obese/Overweight']   

In [31]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = 0.2, stratify = y)

In [32]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [33]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = len(X_train_scaled[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = 20, activation = "relu", input_dim = input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = 10, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Check the structure of the model
nn.summary()

In [34]:
# Compile the neural network
nn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [35]:
# Train the neural network
nn.fit(X_train_scaled, y_train, epochs = 100, verbose = 1, validation_split = 0.2)

Epoch 1/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6192 - loss: 0.6953 - val_accuracy: 0.6538 - val_loss: 0.6266
Epoch 2/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 712us/step - accuracy: 0.6508 - loss: 0.6264 - val_accuracy: 0.7544 - val_loss: 0.5845
Epoch 3/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 650us/step - accuracy: 0.7262 - loss: 0.5933 - val_accuracy: 0.7544 - val_loss: 0.5698
Epoch 4/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 708us/step - accuracy: 0.7299 - loss: 0.5792 - val_accuracy: 0.7544 - val_loss: 0.5646
Epoch 5/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 701us/step - accuracy: 0.7276 - loss: 0.5898 - val_accuracy: 0.7544 - val_loss: 0.5616
Epoch 6/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 722us/step - accuracy: 0.7381 - loss: 0.5765 - val_accuracy: 0.7544 - val_loss: 0.5596
Epoch 7/100
[1m43/43[0

<keras.src.callbacks.history.History at 0x1508c9fa0>

In [36]:
# Evaluate the neural network
nn_loss, nn_accuracy = nn.evaluate(X_test_scaled, y_test)
print(f"Neural Network Accuracy: {nn_accuracy}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7515 - loss: 0.5563 
Neural Network Accuracy: 0.7352246046066284


## 4. Smoking as predictor of obesity/overweight

In [37]:
# Select columns
data4 = df[['SMOKE', 'Obese/Overweight']]
data4.head()

Unnamed: 0,SMOKE,Obese/Overweight
0,no,No
1,yes,No
2,no,No
3,no,Yes
4,no,Yes


In [38]:
# Encode categorical data
label_encoder = LabelEncoder()
data4['SMOKE'] = label_encoder.fit_transform(data4['SMOKE'])
data4['Obese/Overweight'] = label_encoder.fit_transform(data4['Obese/Overweight'])

In [39]:
# Define features (X) and target (y)
X = data4[['SMOKE']]
y = data4['Obese/Overweight']   

In [40]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = 0.2, stratify = y)

In [41]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [42]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = len(X_train_scaled[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = 20, activation = "relu", input_dim = input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = 10, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Check the structure of the model
nn.summary()

In [43]:
# Compile the neural network
nn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [44]:
# Train the neural network
nn.fit(X_train_scaled, y_train, epochs = 100, verbose = 1, validation_split = 0.2)

Epoch 1/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5074 - loss: 0.6931 - val_accuracy: 0.7544 - val_loss: 0.6672
Epoch 2/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 723us/step - accuracy: 0.7220 - loss: 0.6641 - val_accuracy: 0.7544 - val_loss: 0.6372
Epoch 3/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 702us/step - accuracy: 0.7268 - loss: 0.6359 - val_accuracy: 0.7544 - val_loss: 0.6028
Epoch 4/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 731us/step - accuracy: 0.7222 - loss: 0.6115 - val_accuracy: 0.7544 - val_loss: 0.5751
Epoch 5/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 709us/step - accuracy: 0.7079 - loss: 0.6056 - val_accuracy: 0.7544 - val_loss: 0.5631
Epoch 6/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 707us/step - accuracy: 0.7242 - loss: 0.5891 - val_accuracy: 0.7544 - val_loss: 0.5590
Epoch 7/100
[1m43/43[0

<keras.src.callbacks.history.History at 0x15112fc20>

In [45]:
# Evaluate the neural network
nn_loss, nn_accuracy = nn.evaluate(X_test_scaled, y_test)
print(f"Neural Network Accuracy: {nn_accuracy}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7515 - loss: 0.5615 
Neural Network Accuracy: 0.7352246046066284


## 5. Family history as predictor of obesity/overweight

In [46]:
# Select columns
data5 = df[['family_history_with_overweight', 'Obese/Overweight']]
data5.head()

Unnamed: 0,family_history_with_overweight,Obese/Overweight
0,yes,No
1,yes,No
2,yes,No
3,no,Yes
4,no,Yes


In [47]:
# Encode categorical data
label_encoder = LabelEncoder()
data5['family_history_with_overweight'] = label_encoder.fit_transform(data5['family_history_with_overweight'])
data5['Obese/Overweight'] = label_encoder.fit_transform(data5['Obese/Overweight'])

In [48]:
# Define features (X) and target (y)
X = data5[['family_history_with_overweight']]
y = data5['Obese/Overweight']   

In [49]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = 0.2, stratify = y)

In [50]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [51]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = len(X_train_scaled[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = 20, activation = "relu", input_dim = input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = 10, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Check the structure of the model
nn.summary()

In [52]:
# Compile the neural network
nn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [53]:
# Train the neural network
nn.fit(X_train_scaled, y_train, epochs = 100, verbose = 1, validation_split = 0.2)

Epoch 1/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7726 - loss: 0.6338 - val_accuracy: 0.8047 - val_loss: 0.5579
Epoch 2/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 725us/step - accuracy: 0.8189 - loss: 0.5337 - val_accuracy: 0.8047 - val_loss: 0.5050
Epoch 3/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 657us/step - accuracy: 0.8236 - loss: 0.4798 - val_accuracy: 0.8047 - val_loss: 0.4890
Epoch 4/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 660us/step - accuracy: 0.8342 - loss: 0.4509 - val_accuracy: 0.8047 - val_loss: 0.4870
Epoch 5/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 650us/step - accuracy: 0.8146 - loss: 0.4719 - val_accuracy: 0.8047 - val_loss: 0.4871
Epoch 6/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 662us/step - accuracy: 0.8072 - loss: 0.4842 - val_accuracy: 0.8047 - val_loss: 0.4873
Epoch 7/100
[1m43/43[0

<keras.src.callbacks.history.History at 0x1690b5850>

In [54]:
# Evaluate the neural network
nn_loss, nn_accuracy = nn.evaluate(X_test_scaled, y_test)
print(f"Neural Network Accuracy: {nn_accuracy}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8514 - loss: 0.4192 
Neural Network Accuracy: 0.8368794322013855


## 6. High-caloric food as predictor of obesity/overweight

In [55]:
# Select columns
data6 = df[['High_Caloric_Food', 'Obese/Overweight']]
data6.head()

Unnamed: 0,High_Caloric_Food,Obese/Overweight
0,no,No
1,no,No
2,no,No
3,no,Yes
4,no,Yes


In [58]:
# Encode categorical data
label_encoder = LabelEncoder()
data6['High_Caloric_Food'] = label_encoder.fit_transform(data6['High_Caloric_Food'])
data6['Obese/Overweight'] = label_encoder.fit_transform(data6['Obese/Overweight'])

In [60]:
# Define features (X) and target (y)
X = data6[['High_Caloric_Food']]
y = data6['Obese/Overweight']   

In [61]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = 0.2, stratify = y)

In [62]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [63]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = len(X_train_scaled[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = 20, activation = "relu", input_dim = input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = 10, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Check the structure of the model
nn.summary()

In [65]:
# Compile the neural network
nn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [66]:
# Train the neural network
nn.fit(X_train_scaled, y_train, epochs = 100, verbose = 1, validation_split = 0.2)

Epoch 1/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7211 - loss: 0.6614 - val_accuracy: 0.7544 - val_loss: 0.6091
Epoch 2/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 745us/step - accuracy: 0.7206 - loss: 0.6175 - val_accuracy: 0.7544 - val_loss: 0.5605
Epoch 3/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 664us/step - accuracy: 0.7264 - loss: 0.5856 - val_accuracy: 0.7544 - val_loss: 0.5375
Epoch 4/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 658us/step - accuracy: 0.7211 - loss: 0.5764 - val_accuracy: 0.7544 - val_loss: 0.5302
Epoch 5/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 664us/step - accuracy: 0.7276 - loss: 0.5731 - val_accuracy: 0.7544 - val_loss: 0.5283
Epoch 6/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 662us/step - accuracy: 0.7219 - loss: 0.5714 - val_accuracy: 0.7544 - val_loss: 0.5285
Epoch 7/100
[1m43/43[0

<keras.src.callbacks.history.History at 0x15206be90>

In [67]:
# Evaluate the neural network
nn_loss, nn_accuracy = nn.evaluate(X_test_scaled, y_test)
print(f"Neural Network Accuracy: {nn_accuracy}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7515 - loss: 0.5341 
Neural Network Accuracy: 0.7352246046066284


## 7. Monitoring calories as predictor of obesity/overweight

In [68]:
# Select columns
data7 = df[['Monitor_Calories', 'Obese/Overweight']]
data7.head()

Unnamed: 0,Monitor_Calories,Obese/Overweight
0,no,No
1,yes,No
2,no,No
3,no,Yes
4,no,Yes


In [69]:
# Encode categorical data
label_encoder = LabelEncoder()
data7['Monitor_Calories'] = label_encoder.fit_transform(data7['Monitor_Calories'])
data7['Obese/Overweight'] = label_encoder.fit_transform(data7['Obese/Overweight'])

In [70]:
# Define features (X) and target (y)
X = data7[['Monitor_Calories']]
y = data7['Obese/Overweight']   

In [71]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = 0.2, stratify = y)

In [72]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [73]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = len(X_train_scaled[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = 20, activation = "relu", input_dim = input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = 10, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Check the structure of the model
nn.summary()

In [74]:
# Compile the neural network
nn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [75]:
# Train the neural network
nn.fit(X_train_scaled, y_train, epochs = 100, verbose = 1, validation_split = 0.2)

Epoch 1/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7411 - loss: 0.6792 - val_accuracy: 0.7544 - val_loss: 0.6248
Epoch 2/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 740us/step - accuracy: 0.7206 - loss: 0.6310 - val_accuracy: 0.7544 - val_loss: 0.5793
Epoch 3/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 651us/step - accuracy: 0.7436 - loss: 0.5802 - val_accuracy: 0.7544 - val_loss: 0.5600
Epoch 4/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 642us/step - accuracy: 0.7077 - loss: 0.5981 - val_accuracy: 0.7544 - val_loss: 0.5577
Epoch 5/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 656us/step - accuracy: 0.7321 - loss: 0.5730 - val_accuracy: 0.7426 - val_loss: 0.5569
Epoch 6/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 662us/step - accuracy: 0.7373 - loss: 0.5730 - val_accuracy: 0.7426 - val_loss: 0.5584
Epoch 7/100
[1m43/43[0

<keras.src.callbacks.history.History at 0x152069730>

In [76]:
# Evaluate the neural network
nn_loss, nn_accuracy = nn.evaluate(X_test_scaled, y_test)
print(f"Neural Network Accuracy: {nn_accuracy}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7547 - loss: 0.5534 
Neural Network Accuracy: 0.7494089603424072
