In [11]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pickle


In [2]:
# Load dataset
df = pd.read_csv("calories.csv")
df = df[df["per100grams"] == "100g"]
df["Cals_per100grams"] = df["Cals_per100grams"].str.replace(" cal", "").astype(float)
df["FoodItem"] = df["FoodItem"].str.strip().str.lower()

In [3]:
# Generate synthetic data
synthetic_data = []
for _, row in df.iterrows():
    for _ in range(30):
        grams = np.random.randint(50, 501)
        total_cal = (row["Cals_per100grams"] * grams) / 100
        synthetic_data.append([row["FoodItem"], grams, total_cal])


In [4]:
synthetic_df = pd.DataFrame(synthetic_data, columns=["FoodItem", "Grams", "TotalCalories"])

# One-hot encoding
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
X_encoded = encoder.fit_transform(synthetic_df[["FoodItem"]])
X = np.concatenate([X_encoded, synthetic_df[["Grams"]].values], axis=1)
y = synthetic_df["TotalCalories"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [7]:
# Train MLPRegressor (Neural Network)
model = MLPRegressor(
    hidden_layer_sizes=(128, 64, 32),  # More neurons/layers
    max_iter=1000,
    activation='relu',
    solver='adam',
    random_state=42,
    verbose=1, 
    learning_rate_init=0.0001, 
    alpha=0.001, 
    batch_size='auto', 
    early_stopping=True,  
    n_iter_no_change=10 
)
model.fit(X_train, y_train)

# Save model and encoder
with open("calorie_model.pkl", "wb") as f:
    pickle.dump((model, encoder), f)

Iteration 1, loss = 368250.06258227
Validation score: -1.067259
Iteration 2, loss = 248590.97068696
Validation score: 0.065746
Iteration 3, loss = 120183.60670334
Validation score: 0.315786
Iteration 4, loss = 111698.58576352
Validation score: 0.316424
Iteration 5, loss = 111622.20807833
Validation score: 0.315993
Iteration 6, loss = 111560.56479642
Validation score: 0.317151
Iteration 7, loss = 111503.45070473
Validation score: 0.316872
Iteration 8, loss = 111424.23583084
Validation score: 0.316284
Iteration 9, loss = 111315.99123498
Validation score: 0.319116
Iteration 10, loss = 111279.02919382
Validation score: 0.318033
Iteration 11, loss = 111222.78557402
Validation score: 0.319332
Iteration 12, loss = 111137.46127977
Validation score: 0.319209
Iteration 13, loss = 111040.93994930
Validation score: 0.319971
Iteration 14, loss = 110962.93696281
Validation score: 0.320523
Iteration 15, loss = 110884.31532033
Validation score: 0.321679
Iteration 16, loss = 110784.53913532
Validation 

In [13]:
with open('calorie_prediction_model.pkl', 'wb') as f:
    pickle.dump((model, encoder), f)