<a href="https://www.kaggle.com/code/vtrackstar/marvel-health-and-wellness-predictive-modeling?scriptVersionId=238668858" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
sheet_name = 'Food_Diary_Marvel'
sheet_id = '1t2eBcYGvtNocz_-20akb0fsZPuRscWFr6l0yjQuyYLw'
url = "https://docs.google.com/spreadsheets/d/1t2eBcYGvtNocz_-20akb0fsZPuRscWFr6l0yjQuyYLw/gviz/tq?tqx=out:csv&gid=1380274994"

In [3]:
df = pd.read_csv(url)
df[df.columns[:5]].head()

Unnamed: 0,Daily Totals,Calories (kcal),Fat (g),Carbs (g),Protein (g)
0,3-Mar-2025,3457.519736,172.62,219.3,277.64
1,4-Mar-2025,2515.878522,119.78,145.19,224.05
2,5-Mar-2025,3096.439487,144.13,164.59,295.72
3,6-Mar-2025,2428.879383,121.31,80.43,258.7
4,7-Mar-2025,2358.335948,89.93,132.9,254.58


In [4]:
df.columns

Index(['Daily Totals', 'Calories (kcal)', 'Fat (g)', 'Carbs (g)',
       'Protein (g)', 'Calories Burned (Apple Watch)',
       'Calories Burned (WHOOP)', 'BMR', 'Caloric Deficit (Apple Watch)',
       'Caloric Deficit (WHOOP)', 'Weight (lbs.)', 'Unnamed: 11',
       'Diet Adherence Predictive Model External Link', 'Unnamed: 13',
       'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17'],
      dtype='object')

In [5]:
df_clean = df[['Daily Totals', 'Calories (kcal)', 'Fat (g)', 'Carbs (g)',
       'Protein (g)']]
df_a = df_clean.dropna()
df_a

Unnamed: 0,Daily Totals,Calories (kcal),Fat (g),Carbs (g),Protein (g)
0,3-Mar-2025,3457.519736,172.62,219.30,277.64
1,4-Mar-2025,2515.878522,119.78,145.19,224.05
2,5-Mar-2025,3096.439487,144.13,164.59,295.72
3,6-Mar-2025,2428.879383,121.31,80.43,258.70
4,7-Mar-2025,2358.335948,89.93,132.90,254.58
...,...,...,...,...,...
243,1-Nov-2025,0.000000,-,-,-
244,2-Nov-2025,0.000000,-,-,-
245,3-Nov-2025,0.000000,-,-,-
246,4-Nov-2025,0.000000,-,-,-


In [6]:
print(df_a.isnull().sum())
print(df_a.info())

Daily Totals       0
Calories (kcal)    0
Fat (g)            0
Carbs (g)          0
Protein (g)        0
dtype: int64
<class 'pandas.core.frame.DataFrame'>
Index: 248 entries, 0 to 247
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Daily Totals     248 non-null    object 
 1   Calories (kcal)  248 non-null    float64
 2   Fat (g)          248 non-null    object 
 3   Carbs (g)        248 non-null    object 
 4   Protein (g)      248 non-null    object 
dtypes: float64(1), object(4)
memory usage: 11.6+ KB
None


In [7]:
df_c = df_a.copy()
df_c['Calories (kcal)'] = pd.to_numeric(df_c['Calories (kcal)'], errors='coerce')
df_c['Protein (g)'] = pd.to_numeric(df_c['Protein (g)'], errors='coerce')
df_c['Fat (g)'] = pd.to_numeric(df_c['Fat (g)'], errors='coerce')
df_c['Carbs (g)'] = pd.to_numeric(df_c['Carbs (g)'], errors='coerce')

print(df_c.info())

<class 'pandas.core.frame.DataFrame'>
Index: 248 entries, 0 to 247
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Daily Totals     248 non-null    object 
 1   Calories (kcal)  248 non-null    float64
 2   Fat (g)          21 non-null     float64
 3   Carbs (g)        21 non-null     float64
 4   Protein (g)      21 non-null     float64
dtypes: float64(4), object(1)
memory usage: 11.6+ KB
None


In [8]:
BMR = 66.47 + (6.24*257) + (12.7*78) - (6.76*28)
TDEE = BMR*1.55 ## moderately active male activity level multiplier

calorie_goals = {
    'super_cut': TDEE - 750,
    'cut': TDEE - 500,
    'maintain': TDEE,
    'bulk': TDEE + 500,
    'fast_bulk': TDEE + 750
}

tdee_df = pd.DataFrame([calorie_goals])
tdee_df.insert(0, 'BMR', BMR)
tdee_df.insert(1, 'TDEE', TDEE)

print("BMR, TDEE, Daily Caloric Need Based on Goal")
tdee_df

BMR, TDEE, Daily Caloric Need Based on Goal


Unnamed: 0,BMR,TDEE,super_cut,cut,maintain,bulk,fast_bulk
0,2471.47,3830.7785,3080.7785,3330.7785,3830.7785,4330.7785,4580.7785


In [9]:
df1 = df_c.copy()
df1['calories_needed_maintainence'] = df1['Calories (kcal)'] - calorie_goals['maintain']
df1['caloric_deficit_bulk'] = df1['Calories (kcal)'] - calorie_goals['bulk']
df1['caloric_deficit_super_cut'] = df1['Calories (kcal)'] - calorie_goals['super_cut']
df1['protein_in_grams_needed'] = (0.8*257) - df1['Protein (g)']
print("Daily Caloric and Protein Surplus/Deficit Based On Goal")
df1.head()

Daily Caloric and Protein Surplus/Deficit Based On Goal


Unnamed: 0,Daily Totals,Calories (kcal),Fat (g),Carbs (g),Protein (g),calories_needed_maintainence,caloric_deficit_bulk,caloric_deficit_super_cut,protein_in_grams_needed
0,3-Mar-2025,3457.519736,172.62,219.3,277.64,-373.258764,-873.258764,376.741236,-72.04
1,4-Mar-2025,2515.878522,119.78,145.19,224.05,-1314.899978,-1814.899978,-564.899978,-18.45
2,5-Mar-2025,3096.439487,144.13,164.59,295.72,-734.339013,-1234.339013,15.660987,-90.12
3,6-Mar-2025,2428.879383,121.31,80.43,258.7,-1401.899117,-1901.899117,-651.899117,-53.1
4,7-Mar-2025,2358.335948,89.93,132.9,254.58,-1472.442552,-1972.442552,-722.442552,-48.98


Future Adherence to Diet Goal (Cut)

In [10]:
dfa = df1.copy()
dfa['adherence'] = ((dfa['Calories (kcal)'] <= calorie_goals['cut']) & (dfa['protein_in_grams_needed'] < 0)).astype(int)
dfa[['Daily Totals', 'Calories (kcal)', 'adherence', 'protein_in_grams_needed']].head()

  return op(a, b)


Unnamed: 0,Daily Totals,Calories (kcal),adherence,protein_in_grams_needed
0,3-Mar-2025,3457.519736,0,-72.04
1,4-Mar-2025,2515.878522,1,-18.45
2,5-Mar-2025,3096.439487,1,-90.12
3,6-Mar-2025,2428.879383,1,-53.1
4,7-Mar-2025,2358.335948,1,-48.98


Random Forest Classification

In [11]:
dfba = dfa.copy()
dfb = dfba.dropna()
X = dfb[['Calories (kcal)', 'Fat (g)', 'Carbs (g)',
       'Protein (g)']]
y = dfb['adherence']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy}")

Model Accuracy: 0.8


## Daily Diet Adherence Predictive Model for Cut Goal (500 Calorie Deficit and 205 g Protein Minimum)

In [12]:
dfb.columns

Index(['Daily Totals', 'Calories (kcal)', 'Fat (g)', 'Carbs (g)',
       'Protein (g)', 'calories_needed_maintainence', 'caloric_deficit_bulk',
       'caloric_deficit_super_cut', 'protein_in_grams_needed', 'adherence'],
      dtype='object')

In [13]:
dfca = dfb[['Daily Totals', 'Calories (kcal)', 'Fat (g)', 'Carbs (g)', 'Protein (g)']]
dfc = dfca.copy()
dfc['adherence_prediction'] = model.predict(dfc[['Calories (kcal)', 'Fat (g)', 'Carbs (g)', 'Protein (g)']])
dfc['adherence_status'] = dfc['adherence_prediction'].map({1: '✅ On Track', 0: '⚠️ Off Track'})
print("Diet Adherence for Cut Goal (500 Calorie Deficit and 205 g Protein Minimum (Last Five Entries)")
dfc[['Daily Totals', 'Calories (kcal)', 'Fat (g)', 'Carbs (g)', 'Protein (g)', 'adherence_status']].tail()

Diet Adherence for Cut Goal (500 Calorie Deficit and 205 g Protein Minimum (Last Five Entries)


Unnamed: 0,Daily Totals,Calories (kcal),Fat (g),Carbs (g),Protein (g),adherence_status
16,19-Mar-2025,2294.334552,93.77,131.32,238.55,✅ On Track
17,20-Mar-2025,2699.114477,109.32,208.25,225.68,✅ On Track
18,21-Mar-2025,2583.277311,146.33,212.78,113.49,⚠️ Off Track
21,24-Mar-2025,826.5,51.4,17.8,69.9,⚠️ Off Track
28,31-Mar-2025,231.0,15.9,1.8,18.9,⚠️ Off Track


In [14]:
adherence_summary = dfc['adherence_prediction'].value_counts().rename_axis('adherence_status').reset_index(name='count')
adherence_summary['adherence_status'] = adherence_summary['adherence_status'].map({1: '✅ On Track', 0: '⚠️ Off Track'})
print("\n🔹 Diet (Cut) Adherence Summary Report 🔹\n")
adherence_summary


🔹 Diet (Cut) Adherence Summary Report 🔹



Unnamed: 0,adherence_status,count
0,✅ On Track,12
1,⚠️ Off Track,9
