In [None]:
# Cell 1: Import Libraries
import pandas as pd
from src.data_preprocessing import preprocess_recipes
from src.recommendation_model import ContentBasedRecommender
import os

# Cell 2: Load Data
# Adjust path if your notebook is not directly in 'notebooks' folder relative to 'data'
df = pd.read_csv('../data/recipes.csv')
print("Original DataFrame head:")
print(df.head())

# Cell 3: Preprocess Data
processed_df = preprocess_recipes(df.copy())
print("\nProcessed DataFrame head (with cleaned_ingredients):")
print(processed_df.head())

# Cell 4: Initialize and Fit Recommender Model
recommender = ContentBasedRecommender()
recommender.fit(processed_df)

# Cell 5: Save the Trained Model
# Ensure the 'models' directory exists
os.makedirs('../models', exist_ok=True)
recommender.save_model(path='../models/content_based_model.pkl')

# Cell 6: Load the Model (Demonstration)
loaded_recommender = ContentBasedRecommender()
if loaded_recommender.load_model(path='../models/content_based_model.pkl'):
    print("\nModel loaded successfully for testing.")

# Cell 7: Get and Display Recommendations (Example 1)
# Let's recommend for Spaghetti Carbonara (recipe_id=1)
print("\nRecommendations for 'Spaghetti Carbonara' (recipe_id=1):")
recommendations_carbonara = loaded_recommender.recommend(recipe_id=1, num_recommendations=3)
print(recommendations_carbonara)

# Cell 8: Get and Display Recommendations (Example 2)
# Let's recommend for Chicken Tikka Masala (recipe_id=2)
print("\nRecommendations for 'Chicken Tikka Masala' (recipe_id=2):")
recommendations_tikka = loaded_recommender.recommend(recipe_id=2, num_recommendations=3)
print(recommendations_tikka)

# Cell 9: Get and Display Recommendations (Example 3)
# Let's recommend for Vegetable Stir-Fry (recipe_id=4)
print("\nRecommendations for 'Vegetable Stir-Fry' (recipe_id=4):")
recommendations_stirfry = loaded_recommender.recommend(recipe_id=4, num_recommendations=3)
print(recommendations_stirfry)

# Cell 10: Further EDA and Visualization (Optional)
# This is where you would do more data analysis, e.g.,
# import matplotlib.pyplot as plt
# import seaborn as sns
# plt.figure(figsize=(10, 6))
# sns.countplot(data=df, y='cuisine', order=df['cuisine'].value_counts().index)
# plt.title('Distribution of Cuisines')
# plt.show()

# You could also explore the TF-IDF features:
# print("\nTop 10 features (ingredients) by TF-IDF score:")
# feature_names = recommender.tfidf_vectorizer.get_feature_names_out()
# # For demonstration, let's just show some feature names
# print(feature_names[:10])