fitness_analysis.ipynb

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline

plt.rcParams['figure.figsize'] = (12,4)

In [None]:

df = pd.read_excel('fitness_data.xlsx')

df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

display(df.head())
display(df.info())
display(df.isna().sum())

In [None]:
if 'Calories Burned' in df.columns:
    plt.figure()
    plt.plot(df['Date'], df['Calories Burned'], marker='o')
    plt.title("Calories Burned Over Time")
    plt.xticks(rotation=45)
    plt.show()

if 'Sleep Duration' in df.columns:
    plt.figure()
    plt.plot(df['Date'], df['Sleep Duration'], marker='o')
    plt.title("Sleep Duration Over Time")
    plt.xticks(rotation=45)
    plt.show()

if 'Workout Duration' in df.columns:
    plt.figure()
    plt.plot(df['Date'], df['Workout Duration'], marker='o')
    plt.title("Workout Duration Over Time")
    plt.xticks(rotation=45)
    plt.show()

In [None]:
num_cols = [c for c in ['Workout Duration','Calories Burned','Sleep Duration','Steps Count'] if c in df.columns]
corr = df[num_cols].corr()
display(corr)

sns.heatmap(corr, annot=True, linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

In [None]:
print("HYPOTHESIS TESTING\n")

# H1: Pearson correlation Sleep vs Calories
if {'Sleep Duration','Calories Burned'}.issubset(df.columns):
    r, p = stats.pearsonr(df['Sleep Duration'].dropna(), df['Calories Burned'].dropna())
    print(f"H1 Sleep â†” Calories: r={r:.3f}, p={p:.4f}")

# H2/H3: Workout day t-test
if 'Workout Duration' in df.columns:
    df['Workout Day'] = df['Workout Duration'] > 0

if 'Workout Day' in df.columns and 'Calories Burned' in df.columns:
    w = df[df['Workout Day']==True]['Calories Burned'].dropna()
    nw = df[df['Workout Day']==False]['Calories Burned'].dropna()
    if len(w)>1 and len(nw)>1:
        t, p = stats.ttest_ind(w, nw, equal_var=False)
        print(f"H2 Calories Workout vs Rest: t={t:.3f}, p={p:.4f}")

if 'Workout Day' in df.columns and 'Sleep Duration' in df.columns:
    w_sleep = df[df['Workout Day']==True]['Sleep Duration'].dropna()
    nw_sleep = df[df['Workout Day']==False]['Sleep Duration'].dropna()
    if len(w_sleep)>1 and len(nw_sleep)>1:
        t, p = stats.ttest_ind(w_sleep, nw_sleep, equal_var=False)
        print(f"H3 Sleep Workout vs Rest: t={t:.3f}, p={p:.4f}")

# H4: ANOVA by weekday
if 'Date' in df.columns and 'Calories Burned' in df.columns:
    df['Weekday'] = df['Date'].dt.day_name()
    groups = [g['Calories Burned'].values for n, g in df.groupby('Weekday') if len(g)>1]
    if len(groups) > 1:
        F, p = stats.f_oneway(*groups)
        print(f"H4 Calories by Weekday ANOVA: F={F:.3f}, p={p:.4f}")

## Results
- Findings will be written here.

## Next Steps
- Missing data handling, feature engineering, machine learning.