## Calculate calories

Goal is using attendances and invoices - to calculate the number of calories available per person per day

## Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Code

In [None]:
## Read the data to join
df_kitchen = pd.read_excel('../output/cleaned_metadata.xlsx')
df_attand = pd.read_excel('../output/attendances_completed_imputed.xlsx')
df_attand_over_month = pd.read_excel('../output/attendances_comparison_kitchens.xlsx')

# Add a Month-Year column
df_kitchen['Month-Year'] = df_kitchen['date'].dt.to_period('M').astype(str)
df_kitchen = df_kitchen.loc[df_kitchen['kitchen_ids'].str.contains('KH/JA|KH/MA')]

## Expand kitchen data over time -  First with min values
# Expand rows for each date range
expanded_rows = []

for _, row in df_kitchen.iterrows():
    date_range = pd.date_range(start=row['start_date'], end=row['max_end_date'])
    for date in date_range:
        expanded_rows.append({
            'kitchen_ids': row['kitchen_ids'],
            'kitchen_cluster': row['kitchen_cluster'],
            'Month-Year': row['Month-Year'],
            'date': date,
            'total_calories_per_days': row['min_total_calories_per_days']
        })

# Create expanded DataFrame
expanded_df_min_cal = pd.DataFrame(expanded_rows)

## Expand kitchen data over time -  First with max values
# Expand rows for each date range
expanded_rows = []

for _, row in df_kitchen.iterrows():
    date_range = pd.date_range(start=row['start_date'], end=row['min_end_date'])
    for date in date_range:
        expanded_rows.append({
            'kitchen_ids': row['kitchen_ids'],
            'kitchen_cluster': row['kitchen_cluster'],
            'Month-Year': row['Month-Year'],
            'date': date,
            'total_calories_per_days': row['max_total_calories_per_days']
        })

# Create expanded DataFrame
expanded_df_max_cal = pd.DataFrame(expanded_rows)
expanded_df_max_cal['kitchen_cluster'] = expanded_df_max_cal['kitchen_cluster'].apply(eval)
expanded_df_min_cal['kitchen_cluster'] = expanded_df_min_cal['kitchen_cluster'].apply(eval)

##Calculare the number of attendance per kitchsn
def get_total_attendance(row):
    month = row['Month-Year']
    kitchens = [k for k in row['kitchen_cluster']]
    return round(df_attand_over_month.loc[
        (df_attand_over_month['kitchen_code'].isin(kitchens)) & 
        (df_attand_over_month['Month-Year'] == month),
        'est_benef'
    ].sum(skipna=True), 0)  # Sum while skipping NaN values

expanded_df_max_cal['total_att_clust'] = expanded_df_max_cal.apply(get_total_attendance, axis=1)
expanded_df_min_cal['total_att_clust'] = expanded_df_min_cal.apply(get_total_attendance, axis=1)

expanded_df_min_cal = pd.merge(expanded_df_min_cal, df_attand, left_on=['kitchen_ids', 'date'], right_on=['kitchen_code', 'date'], how='left')
expanded_df_max_cal = pd.merge(expanded_df_max_cal, df_attand, left_on=['kitchen_ids', 'date'], right_on=['kitchen_code', 'date'], how='left')

expanded_df_min_cal['prop_att'] = expanded_df_min_cal['est_benef'] / expanded_df_min_cal['total_att_clust']
expanded_df_min_cal['min_tot_calories_per_p'] = expanded_df_min_cal['total_calories_per_days'] * expanded_df_min_cal['prop_att'] / expanded_df_min_cal['est_benef']

expanded_df_max_cal['prop_att'] = expanded_df_max_cal['est_benef'] / expanded_df_max_cal['total_att_clust']
expanded_df_max_cal['max_tot_calories_per_p'] = expanded_df_max_cal['total_calories_per_days'] * expanded_df_max_cal['prop_att'] / expanded_df_max_cal['est_benef']

expanded_df_max_cal = expanded_df_max_cal.groupby(['kitchen_ids', 'date', 'Month',
                                                  'total_calories_per_days', 'total_att_clust',
                                                  'Year', 'nb_meals', 'est_benef', 'prop_att']).sum('max_tot_calories_per_p').reset_index()
expanded_df_min_cal = expanded_df_min_cal.groupby(['kitchen_ids', 'date', 'Month',
                                                  'total_calories_per_days', 'total_att_clust',
                                                  'Year', 'nb_meals', 'est_benef', 'prop_att']).sum('min_tot_calories_per_p').reset_index()

merged_data = pd.merge(expanded_df_max_cal[['kitchen_ids', 'Month', 'Year', 'max_tot_calories_per_p', 'date']], 
         expanded_df_min_cal[['kitchen_ids', 'Month', 'Year', 'min_tot_calories_per_p', 'date']],
         on=['kitchen_ids', 'Month', 'date', 'Year'],
         how='left')
merged_data = merged_data.dropna(subset=['max_tot_calories_per_p'])

# Exclude rows where 'tot_calories_prop' is 0 or NaN before grouping
filtered_df = merged_data.loc[merged_data['min_tot_calories_per_p'] > 0].dropna(subset=['min_tot_calories_per_p'])

# First, ensure the data is sorted by 'kitchen_ids' and 'date' before applying the rolling function
filtered_df = filtered_df.sort_values(by=['kitchen_ids', 'date'])

# Apply rolling sum over a 10-day window for both 'min_tot_calories_prop' and 'max_tot_calories_prop'
filtered_df['min_tot_calories_per_p'] = filtered_df.groupby('kitchen_ids')['min_tot_calories_per_p'].rolling(window=2, min_periods=1).sum().reset_index(level=0, drop=True)
filtered_df['max_tot_calories_per_p'] = filtered_df.groupby('kitchen_ids')['max_tot_calories_per_p'].rolling(window=2, min_periods=1).sum().reset_index(level=0, drop=True)

# Group by 'kitchen_ids' and 'date' to sum over the 10-day window
grouped_df = filtered_df.groupby(['kitchen_ids', 'date', 'Month', 'Year'], as_index=False).agg({
    'min_tot_calories_per_p': 'last',  # Take the last value in the 10-day window
    'max_tot_calories_per_p': 'last'   # Take the last value in the 10-day window
})

# Exclude rows where 'tot_calories_prop' is 0 or NaN before grouping
filtered_df = filtered_df.loc[filtered_df['min_tot_calories_per_p'] > 200].dropna(subset=['min_tot_calories_per_p'])
filtered_df = filtered_df.loc[filtered_df['max_tot_calories_per_p'] > 200].dropna(subset=['max_tot_calories_per_p'])

grouped_df.to_excel('../output/first_draft_cal_per_person.xlsx', index=False)

In [None]:
# Group the data by month and calculate the mean
grouped_df_date = grouped_df.groupby(['Month', 'Year'], as_index=False).agg({
    'min_tot_calories_per_p': 'mean',
    'max_tot_calories_per_p': 'mean'
})

# Calculate standard deviation and count for each month
grouped_df_std = grouped_df.groupby(['Month', 'Year']).agg({
    'min_tot_calories_per_p': 'std',
    'max_tot_calories_per_p': 'std',
    'min_tot_calories_per_p': 'count',
    'max_tot_calories_per_p': 'count'
}).reset_index()

# Merge the calculated std and counts with the grouped means
grouped_df_date = pd.merge(grouped_df_date, grouped_df_std, on=['Month', 'Year'], suffixes=('_mean', '_std'))

# Calculate standard error (SE)
grouped_df_date['min_tot_calories_per_p_se'] = grouped_df_date['min_tot_calories_per_p_std'] / np.sqrt(grouped_df['min_tot_calories_per_p'])
grouped_df_date['max_tot_calories_per_p_se'] = grouped_df_date['max_tot_calories_per_p_std'] / np.sqrt(grouped_df['max_tot_calories_per_p'])

# Calculate 95% CI
z_value = 1.96  # For 95% confidence
grouped_df_date['min_tot_calories_per_p_ci_lower'] = grouped_df_date['min_tot_calories_per_p_mean'] - z_value * grouped_df_date['min_tot_calories_per_p_se']
grouped_df_date['min_tot_calories_per_p_ci_upper'] = grouped_df_date['min_tot_calories_per_p_mean'] + z_value * grouped_df_date['min_tot_calories_per_p_se']

grouped_df_date['max_tot_calories_per_p_ci_lower'] = grouped_df_date['max_tot_calories_per_p_mean'] - z_value * grouped_df_date['max_tot_calories_per_p_se']
grouped_df_date['max_tot_calories_per_p_ci_upper'] = grouped_df_date['max_tot_calories_per_p_mean'] + z_value * grouped_df_date['max_tot_calories_per_p_se']

# Save the final results to Excel
grouped_df_date.to_excel('../output/first_draft_cal_per_person_grouped_date_with_ci.xlsx', index=False)

In [None]:
grouped_df = pd.read_excel('../output/first_draft_cal_per_person.xlsx')
# Ensure the Date column is in datetime format
grouped_df['date'] = pd.to_datetime(grouped_df['date'])

# Create a 'Half' column: 1 for days 1-15, 2 for days 16-end of month
grouped_df['Half'] = grouped_df['date'].dt.day.apply(lambda x: 1 if x <= 15 else 2)

grouped_df['min_20_mean_cal'] = grouped_df['min_tot_calories_per_p'] + 2247*0.2
grouped_df['max_20_mean_cal'] = grouped_df['max_tot_calories_per_p'] + 2247*0.2
grouped_df['min_50_mean_cal'] = grouped_df['min_tot_calories_per_p'] + 2247*0.5
grouped_df['max_50_mean_cal'] = grouped_df['max_tot_calories_per_p'] + 2247*0.5

# Group the data by Month and Half, then calculate the mean
grouped_df_date = grouped_df.groupby(['Month', 'Year', 'Half'], as_index=False).agg({
    'min_tot_calories_per_p': 'mean',
    'max_tot_calories_per_p': 'mean',
    'min_20_mean_cal': 'mean',
    'max_20_mean_cal': 'mean',
    'min_50_mean_cal': 'mean',
    'max_50_mean_cal': 'mean'
})



# Rename columns properly after aggregation
grouped_df_date.rename(columns={
    'min_tot_calories_per_p': 'min_tot_calories_per_p_mean',
    'max_tot_calories_per_p': 'max_tot_calories_per_p_mean'
}, inplace=True)

# Calculate standard deviation and count for each half-month period
grouped_df_std = grouped_df.groupby(['Month', 'Year', 'Half']).agg({
    'min_tot_calories_per_p': ['std', 'count'],
    'max_tot_calories_per_p': ['std', 'count'],
    'min_20_mean_cal': ['std', 'count'],
    'max_20_mean_cal': ['std', 'count'],
    'min_50_mean_cal': ['std', 'count'],
    'max_50_mean_cal': ['std', 'count']
}).reset_index()

# Flatten multi-index column names
grouped_df_std.columns = ['Month', 'Year', 'Half', 
                          'min_tot_calories_per_p_std', 'min_tot_calories_per_p_count',
                          'max_tot_calories_per_p_std', 'max_tot_calories_per_p_count',
                         'min_20_mean_cal_std', 'min_20_mean_cal_count',
                          'max_20_mean_cal_std', 'max_20_mean_cal_count',
                         'min_50_mean_cal_std', 'min_50_mean_cal_count',
                          'max_50_mean_cal_std', 'max_50_mean_cal_count']

# Merge standard deviation and count with grouped means
grouped_df_date = pd.merge(grouped_df_date, grouped_df_std, on=['Month', 'Year', 'Half'])

# Calculate standard error (SE) - avoid division by zero
grouped_df_date['min_tot_calories_per_p_se'] = grouped_df_date['min_tot_calories_per_p_std'] / np.sqrt(grouped_df_date['min_tot_calories_per_p_count'])
grouped_df_date['max_tot_calories_per_p_se'] = grouped_df_date['max_tot_calories_per_p_std'] / np.sqrt(grouped_df_date['max_tot_calories_per_p_count'])

grouped_df_date['min_20_cal_se'] = grouped_df_date['min_20_mean_cal_std'] / np.sqrt(grouped_df_date['min_20_mean_cal_count'])
grouped_df_date['max_20_cal_se'] = grouped_df_date['max_20_mean_cal_std'] / np.sqrt(grouped_df_date['max_20_mean_cal_count'])

grouped_df_date['min_50_cal_se'] = grouped_df_date['min_50_mean_cal_std'] / np.sqrt(grouped_df_date['min_50_mean_cal_count'])
grouped_df_date['max_50_cal_se'] = grouped_df_date['max_50_mean_cal_std'] / np.sqrt(grouped_df_date['max_50_mean_cal_count'])

# Calculate 95% Confidence Interval (CI)
z_value = 1.96  # For 95% confidence

grouped_df_date['min_tot_calories_per_p_ci_lower'] = grouped_df_date['min_tot_calories_per_p_mean'] - z_value * grouped_df_date['min_tot_calories_per_p_se']
grouped_df_date['min_tot_calories_per_p_ci_upper'] = grouped_df_date['min_tot_calories_per_p_mean'] + z_value * grouped_df_date['min_tot_calories_per_p_se']

grouped_df_date['max_tot_calories_per_p_ci_lower'] = grouped_df_date['max_tot_calories_per_p_mean'] - z_value * grouped_df_date['max_tot_calories_per_p_se']
grouped_df_date['max_tot_calories_per_p_ci_upper'] = grouped_df_date['max_tot_calories_per_p_mean'] + z_value * grouped_df_date['max_tot_calories_per_p_se']

grouped_df_date['min_20_cal_lower'] = grouped_df_date['min_20_mean_cal'] - z_value * grouped_df_date['min_20_cal_se']
grouped_df_date['min_20_cal_upper'] = grouped_df_date['min_20_mean_cal'] + z_value * grouped_df_date['min_20_cal_se']

grouped_df_date['max_20_cal_lower'] = grouped_df_date['max_20_mean_cal'] - z_value * grouped_df_date['max_20_cal_se']
grouped_df_date['max_20_cal_upper'] = grouped_df_date['max_20_mean_cal'] + z_value * grouped_df_date['max_20_cal_se']

grouped_df_date['min_50_cal_lower'] = grouped_df_date['min_50_mean_cal'] - z_value * grouped_df_date['min_50_cal_se']
grouped_df_date['min_50_cal_upper'] = grouped_df_date['min_50_mean_cal'] + z_value * grouped_df_date['min_50_cal_se']

grouped_df_date['max_50_cal_lower'] = grouped_df_date['max_50_mean_cal'] - z_value * grouped_df_date['max_50_cal_se']
grouped_df_date['max_50_cal_upper'] = grouped_df_date['max_50_mean_cal'] + z_value * grouped_df_date['max_50_cal_se']

# Save the final results to Excel
grouped_df_date.to_excel('../output/first_draft_cal_per_person_grouped_half_month.xlsx', index=False)

In [None]:
## Clean excel for nutrition model
grouped_df_date = pd.read_excel('../output/first_draft_cal_per_person_grouped_half_month.xlsx')

#Format dates
start_dates = pd.to_datetime(grouped_df_date['Year'].astype(str)+ '-' + grouped_df_date['Month'].astype(str) + '-01', format='%Y-%m-%d')  # Start of the month
half_months = grouped_df_date['Half']

# Calculate the mid of the month (15th for the second half)
dates = pd.to_datetime(start_dates) + pd.to_timedelta((half_months - 1) * 15, unit='D')
grouped_df_date['half_date'] = dates

#Calculare mean
grouped_df_date['scen_0_mean_tot_cal_per_p'] = grouped_df_date[['max_tot_calories_per_p_mean', 'min_tot_calories_per_p_mean']].mean(axis=1)
grouped_df_date['scen_20_mean_tot_cal_per_p'] = grouped_df_date[['min_20_mean_cal', 'max_20_mean_cal']].mean(axis=1)
grouped_df_date['scen_50_mean_tot_cal_per_p'] = grouped_df_date[['min_50_mean_cal', 'max_50_mean_cal']].mean(axis=1)

# Select the right columns
grouped_df_date = grouped_df_date[['Month', 'half_date', 'scen_0_mean_tot_cal_per_p',
       'scen_20_mean_tot_cal_per_p', 'scen_50_mean_tot_cal_per_p', 
        'min_tot_calories_per_p_ci_lower', 'max_tot_calories_per_p_ci_upper', 
        'min_20_cal_lower', 'max_20_cal_upper',
       'min_50_cal_lower', 'max_50_cal_upper']]

grouped_df_date = grouped_df_date.rename(columns={"min_tot_calories_per_p_ci_lower": "scen_0_cal_lower_ci", 
                                "max_tot_calories_per_p_ci_upper": "scen_0_cal_upper_ci",
                                "min_20_cal_lower": "scen_20_cal_lower_ci", 
                                "max_20_cal_upper": "scen_20_cal_upper_ci",
                                "min_50_cal_lower": "scen_50_cal_lower_ci", 
                                "max_50_cal_upper": "scen_50_cal_upper_ci",
                               })

grouped_df_date = grouped_df_date.sort_values('half_date')
# Save the final results to Excel
grouped_df_date.to_excel('../output/final_nut_val_for_malnutrition.xlsx', index=False)

## Visualization 

In [None]:
### Beneficiary over time
# Aggregate beneficiaries by date

# Apply smoothing function for each kitchen
#grouped_df_date.loc[grouped_df_date['tot_calories_prop'] >2000, 'tot_calories_prop']=np.nan
grouped_df_date = pd.read_excel('../output/first_draft_cal_per_person_grouped_date_with_ci.xlsx')
fig, ax = plt.subplots(figsize=(10, 3))
ax.plot(grouped_df_date['Month'].to_numpy(), grouped_df_date[['max_tot_calories_per_p_mean', 
                                                              'min_tot_calories_per_p_mean']].mean(axis=1).to_numpy(), 
        label='Kcal (average)', color='orange', linestyle='--', marker='x')
ax.fill_between(grouped_df_date['Month'].to_numpy(), grouped_df_date['min_tot_calories_per_p_ci_lower'].to_numpy(), 
                grouped_df_date['max_tot_calories_per_p_ci_upper'].to_numpy(), 
                 color='orange', alpha=0.2, label='95% confidence interval for funding')
min_calories = 1050
ax.axhline(y=min_calories, color='black', linestyle='dashed', linewidth=1.5, label='Min calories - WFP 25%')
half_calories = 525
ax.axhline(y=half_calories, color='grey', linestyle='dashed', linewidth=1.5, label='Min calories - WFP 12.5%')
#ax.legend(title ='Type of cleaning')
#ax.xlabel('Date')
ax.set_ylabel('Calories per person per day')
ax.grid(axis='x')
#ax.set_xticks(rotation=45)
# Save the figure to a PNG file
plt.savefig('../visualization/calories_trends.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
import datetime
# Load the updated dataset
grouped_df_date = pd.read_excel('../output/first_draft_cal_per_person_grouped_half_month.xlsx')
grouped_df_date = grouped_df_date.sort_values(['Year', 'Month', 'Half'])
# Generate the start and mid-date for each month-half period
start_dates = pd.to_datetime(grouped_df_date['Year'].astype(int).astype(str) + '-' + grouped_df_date['Month'].astype(str) + '-01', format='%Y-%m-%d')  # Start of the month
#start_dates = sorted(start_dates)
half_months = grouped_df_date['Half']

# Calculate the mid of the month (15th for the second half)
dates = pd.to_datetime(start_dates) + pd.to_timedelta((half_months - 1) * 15, unit='D')

# Create figure and axis
fig, ax = plt.subplots(figsize=(12, 5))

# Plot the mean calorie intake
ax.plot(dates.to_numpy(), 
        grouped_df_date[['max_tot_calories_per_p_mean', 'min_tot_calories_per_p_mean']].mean(axis=1).to_numpy(), 
        label='Kcal (average)', color='orange', linestyle='--', marker='x')

# Confidence interval shading
ax.fill_between(dates.to_numpy(), 
                grouped_df_date['min_tot_calories_per_p_ci_lower'].to_numpy(), 
                grouped_df_date['max_tot_calories_per_p_ci_upper'].to_numpy(), 
                color='orange', alpha=0.2, label='95% CI')

# Add minimum calorie thresholds
ax.axhline(y=1050, color='black', linestyle='dashed', linewidth=1.5, label='Min calories - WFP 50%')
ax.axhline(y=525, color='grey', linestyle='dashed', linewidth=1.5, label='Min calories - WFP 25%')

# Format the x-axis with date labels
ax.set_xticks(dates)
ax.set_xticklabels(dates.dt.strftime('%d/%m/%Y'), rotation=45, ha='right')

# Labels and grid
ax.set_ylabel('Calories per person per day')
ax.set_xlabel('Date (Half-Month Period)')
ax.legend(title='Caloric Intake Trends')
ax.grid(axis='y', linestyle='--', alpha=0.7)

# Save and show the figure
plt.savefig('../visualization/calories_trends.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
grouped_df_date = pd.read_excel('../output/first_draft_cal_per_person_grouped_half_month.xlsx')
grouped_df_date = grouped_df_date.sort_values(['Year', 'Month', 'Half'])
# Generate the start date for each month (assuming 'Month' is in 'YYYY-MM' format)
start_dates = pd.to_datetime(grouped_df_date['Year'].astype(int).astype(str)+'-' + grouped_df_date['Month'].astype(str) + '-01', format='%Y-%m-%d')  # Start of the month

# Half-months column (1 for first half, 2 for second half)
half_months = grouped_df_date['Half']

# Calculate the mid of the month (15th for the second half)
dates = start_dates + pd.to_timedelta((half_months - 1) * 15, unit='D')

# Create figure and axis
fig, ax = plt.subplots(figsize=(10, 6))

# Plot the mean calorie intake
ax.plot(dates.to_numpy(), 
        grouped_df_date[['max_tot_calories_per_p_mean', 'min_tot_calories_per_p_mean']].mean(axis=1).to_numpy(), 
        label='Kcal estimation assuming pop. relies only on \n community kitchen', color='darkred', linestyle='--', marker='x',linewidth=3, markersize=10)

# Confidence interval shading for mean calorie intake
ax.fill_between(dates.to_numpy(), 
                grouped_df_date['min_tot_calories_per_p_ci_lower'].to_numpy(), 
                grouped_df_date['max_tot_calories_per_p_ci_upper'].to_numpy(), 
                color='darkred', alpha=0.2, label='95% CI')


# Plot the mean calorie intake considering potential access to food
ax.plot(dates.to_numpy(), 
        grouped_df_date[['min_20_mean_cal', 'max_20_mean_cal']].mean(axis=1).to_numpy(), 
        label='Kcal estimation assuming pop. relies on \n community kitchen and can afford 20% \n of the 2014 caloric intake (2,247 kcal) \n through own resources', 
        color='orange', linestyle='--', marker='x',linewidth=3, markersize=10)

# Confidence interval shading for the adjusted mean calorie intake
ax.fill_between(dates.to_numpy(), 
                grouped_df_date['min_20_cal_lower'].to_numpy(), 
                grouped_df_date['max_20_cal_upper'].to_numpy(), 
                color='orange', alpha=0.2, label='95% CI')

# Plot the mean calorie intake considering potential access to food
ax.plot(dates.to_numpy(), 
        grouped_df_date[['min_50_mean_cal', 'max_50_mean_cal']].mean(axis=1).to_numpy(), 
        label='Kcal estimation assuming pop. relies on \n community kitchen and can afford 50% \n of the 2014 caloric intake (2,247 kcal) \n through own resources', 
        color='peachpuff', linestyle='--', marker='x',linewidth=3, markersize=10)

# Confidence interval shading for the adjusted mean calorie intake
ax.fill_between(dates.to_numpy(), 
                grouped_df_date['min_50_cal_lower'].to_numpy(), 
                grouped_df_date['max_50_cal_upper'].to_numpy(), 
                color='peachpuff', alpha=0.2, label='95% CI')

# Add minimum calorie thresholds (WFP guidelines)
ax.axhline(y=1050, color='darkgreen', linestyle='dashed', linewidth=1.5, label='50% of nutritional requirement (1,050 kcal)')
ax.axhline(y=1470, color='forestgreen', linestyle='dashed', linewidth=1.5, label='70% of nutritional requirement (1,470 kcal)')
ax.axhline(y=2100, color='yellowgreen', linestyle='dashed', linewidth=1.5, label='Minimum nutritional requirement (2,100 kcal)')

# Format the x-axis with date labels
ax.set_xticks(dates)
ax.set_xticklabels(dates.dt.strftime('%d/%m/%Y'), rotation=45, ha='right', fontsize=16)

# Labels, legend, and grid
ax.set_ylabel('Estimation of Kcal per person per day', fontsize=16)
ax.set_xlabel('Date', fontsize=16)
ax.tick_params(axis='y', labelsize=16)

# Increase the legend font size
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=16, labelspacing=0.7)  # Adjust the fontsize here
ax.grid(axis='y', linestyle='--', alpha=0.7)

# Save and show the figure
plt.savefig('../visualization/calories_trends.png', dpi=300, bbox_inches='tight')
plt.show()
