In [None]:
%pip install isodate

In [None]:
import isodate
# Function to convert ISO 8601 duration to human-readable format
def convert_iso_duration(iso_duration):
    # Handle non-string values (e.g., None or NaN)
    if not isinstance(iso_duration, str):
        return "unknown"
    
    try:
        # Parse the ISO 8601 duration string using isodate
        duration = isodate.parse_duration(iso_duration)
        
        # Extract total seconds and break them into days, hours, minutes, seconds
        total_seconds = int(duration.total_seconds())
        days, remainder = divmod(total_seconds, 86400)  # 1 day = 86400 seconds
        hours, remainder = divmod(remainder, 3600)  # 1 hour = 3600 seconds
        minutes, seconds = divmod(remainder, 60)  # 1 minute = 60 seconds

        # Build human-readable string
        readable = []
        if days > 0:
            readable.append(f"{days} day{'s' if days > 1 else ''}")
        if hours > 0:
            readable.append(f"{hours} hour{'s' if hours > 1 else ''}")
        if minutes > 0:
            readable.append(f"{minutes} minute{'s' if minutes > 1 else ''}")
        if seconds > 0:
            readable.append(f"{seconds} second{'s' if seconds > 1 else ''}")

        return ", ".join(readable)
    except Exception as e:
        # If there is an error (e.g., invalid format), return an error message
        return "unknown"
# Example usage
iso_duration = "PT24H"  # 24 hours
print(convert_iso_duration(None))  # Output: 24 hours

In [None]:
# Define a cleaning function
def clean_ingredients(ingredient_str):
    # Remove 'c (' and ')', and clean double quotes and spaces
    ingredient_str = ingredient_str.replace('c(', '').replace(')', '')  # Remove 'c (' and ')'
    ingredient_str = ingredient_str.replace('"', '')  # Remove double quotes
    return ingredient_str

In [None]:
#convert RecipeIngredientParts and RecipeIngredientQuantities to a list
import ast

def parse_list(r_string):
    try:
        if r_string == "character(0)":
            return []  # Handle character(0) as an empty list
        return ast.literal_eval(r_string.replace('c(', '[').replace(')', ']'))
    except Exception:
        return []  # Return None for invalid rows


In [None]:
from fractions import Fraction

# Function to convert a string to a float, handling fractions like "1/4"
def convert_to_float(value):
    try:
        # Try to parse the string as a fraction
        return float(Fraction(value))
    except ValueError:
        # If it fails (e.g., not a fraction or float), return 0.0
        return 0.0
    
# Convert Quantities from strings to floats (handling fractions and decimals)
#data_sup['Quantities'] = data_sup['Quantities'].apply(
#    lambda x: [convert_to_float(q) for q in x]  # Convert each string in the list
#)

1. Getting the Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.float_format = '{:.2f}'.format

# Load dataset
data = pd.read_csv('recipes.csv')
print("Dataset Loaded Successfully")


2. Exploring the Data

In [None]:
# Overview of the dataset
print("Dataset Information:")
data.info()

In [None]:
# Shape and statistical description
print("\nDataset Shape:", data.shape)
print("\nStatistical Summary:")
print(data.describe(include='all'))

In [None]:
# Check for missing values
missing_values = data.isnull().sum()
missing_percentage = (missing_values / len(data)) * 100
print("\nMissing Values Count and Percentage:")
print(missing_percentage)

In [None]:
data.head(10)

3. Data Cleaning and Preprocessing

In [None]:
# Feature extraction
selected_columns = [
    'RecipeId', 'Name', 'CookTime','RecipeServings','RecipeCategory','RecipeIngredientQuantities','RecipeIngredientParts', 'AggregatedRating','Calories', 'FatContent', 'SaturatedFatContent',
    'CholesterolContent', 'SodiumContent', 'CarbohydrateContent','FiberContent', 'SugarContent', 'ProteinContent', 'RecipeInstructions'
]
data_extracted = data[selected_columns]
print("\nData Extracted Completed. Preview:")
data_extracted.shape

In [None]:
data_extracted.isnull().sum()

In [None]:
# Handling missing values
data_cleaned = data_extracted.copy() #data_cleaned.dropna(inplace=True)
data_cleaned.CookTime = data_cleaned.CookTime.apply(convert_iso_duration)
data_cleaned.CookTime.head(20)

In [None]:
data_cleaned = data_cleaned.dropna(subset=["RecipeCategory"])

In [None]:
data_cleaned = data_cleaned.dropna(subset=["RecipeIngredientQuantities"])
data_cleaned['RecipeIngredientParts'] = data_cleaned['RecipeIngredientParts'].apply(clean_ingredients)


In [None]:
data_cleaned["Description"] = data_cleaned["Description"].fillna("No Description")

In [None]:
data_cleaned["AggregatedRating"] = data_cleaned["AggregatedRating"].fillna(data_cleaned["AggregatedRating"].mean())

In [None]:
# Apply parsing safely
data_cleaned["RecipeInstructions"] = data_cleaned["RecipeInstructions"].apply(parse_list)
data_cleaned["RecipeIngredientQuantities"] = data_cleaned["RecipeIngredientQuantities"].apply(parse_list)


In [None]:
data_cleaned["RecipeServings"] = pd.to_numeric(data_cleaned["RecipeServings"], errors='coerce')

In [None]:
data_RecipeServings_nan = data_cleaned.loc[data_cleaned['RecipeServings'].isnull() == True]
data_RecipeServings_nan.head()


In [None]:
# Nutritional maximum thresholds for a single meal (assuming 3 meals a day)
max_thresholds_per_meal = {
    'Calories': 2000 / 3,  # Daily caloric intake divided by 3 meals
    'FatContent': 70 / 3,  # Fat content per meal
    'SaturatedFatContent': 22 / 3,  # Saturated fat per meal
    'CholesterolContent': 300 / 3,  # Cholesterol per meal
    'SodiumContent': 2300 / 3,  # Sodium per meal
    'CarbohydrateContent': 325 / 3,  # Carbohydrates per meal
    'FiberContent': 25 / 3,  # Fiber per meal
    'SugarContent': 50 / 3,  # Sugar per meal
    'ProteinContent': 175 / 3  # Protein per meal
}

data_cleaned_copy = data_cleaned.copy()

# Appliquer les filtres en fonction des seuils nutritionnels
def clean_recipe(row):
    for column, maximum in max_thresholds_per_meal.items():
    
        if column in row and row[column] >= maximum:
            return None  # Si une valeur dépasse le seuil, on supprime cette ligne
    # Si toutes les valeurs sont inférieures au seuil, on met 'RecipeServings' à 1
    row['RecipeServings'] = 1
    return row

data_cleaned_copy[data_cleaned_copy['RecipeServings'].isnull()].apply(clean_recipe, axis=1)


# Supprimer les lignes où 'clean_recipe' a renvoyé None (lignes à supprimer)
data_cleaned_copy = data_cleaned_copy.dropna(subset=['RecipeServings'])


# Afficher les premières lignes du DataFrame nettoyé
data_cleaned_copy.head()


In [None]:
data_cleaned_copy.shape


In [None]:
data_cleaned_copy.isnull().sum()

In [None]:
# Afficher les premières lignes du DataFrame après division
data_cleaned_copy.head()

In [None]:
print(data_cleaned_copy.shape)

4. Filtering Nutritional Information for Recommendations

In [None]:
# Nutritional maximum thresholds for a single meal (assuming 3 meals a day)
max_thresholds_per_meal = {
    'Calories': 2000 / 3,  # Daily caloric intake divided by 3 meals
    'FatContent': 70 / 3,  # Fat content per meal
    'SaturatedFatContent': 22 / 3,  # Saturated fat per meal
    'CholesterolContent': 300 / 3,  # Cholesterol per meal
    'SodiumContent': 2300 / 3,  # Sodium per meal
    'CarbohydrateContent': 325 / 3,  # Carbohydrates per meal
    'FiberContent': 25 / 3,  # Fiber per meal
    'SugarContent': 50 / 3,  # Sugar per meal
    'ProteinContent': 175 / 3  # Protein per meal
}
# Create a copy of the original dataset to preserve the data_cleaned
data_prepared = data_cleaned_copy.copy()

# Initialize a new column for health status
data_prepared['HealthStatus'] = 'Healthy'  # Assume healthy by default

# Apply the filtering conditions based on the nutritional thresholds for a single meal
for column, max_value in max_thresholds_per_meal.items():
    if column in data_cleaned.columns:
        # Mark recipes that exceed the threshold as 'Unhealthy'
        data_prepared['HealthStatus'] = data_prepared.apply(
            lambda row: 'Unhealthy' if row[column] > max_value else row['HealthStatus'],
            axis=1
        )

# Display the filtered data with health status
print("\nData Filtered Based on Nutritional Information (per meal):")
print(data_prepared.describe())  # Descriptive statistics for the filtered data
print(data_prepared.shape)  # The shape of the filtered data

In [None]:
# Example of viewing the health status column
print("\nHealth Status for Each Recipe (per meal):")
data_prepared.head()

In [None]:
data_prepared.head()

Deployment

In [None]:
# Save data for deployment
data_prepared.to_csv('cleaned_recipes_.csv', index=False)
print("Data Saved for Deployment.")

In [None]:
from fractions import Fraction

# Function to convert a string to a float, handling fractions like "1/4"
def convert_to_float(value):
    try:
        # Try to parse the string as a fraction
        return float(Fraction(value))
    except ValueError:
        # If it fails (e.g., not a fraction or float), return 0.0
        return 0.0

# Convert Quantities from strings to floats (handling fractions and decimals)
data_sup['Quantities'] = data_sup['Quantities'].apply(
    lambda x: [convert_to_float(q) for q in x]  # Convert each string in the list
)


In [None]:
data_sup['Quantities'].head(10)

In [None]:
# Fonction pour remplir les quantités manquantes
def fill_missing_quantities(row):
    ingredients = row['Ingredients']  # Liste des ingrédients (de type string)
    quantities = row['Quantities']  # Liste des quantités (de type string)
        
    # Si le nombre d'ingrédients est supérieur au nombre de quantités, compléter avec 1
    num_missing_quantities = len(ingredients) - len(quantities)
    if num_missing_quantities > 0:
        quantities.extend([1] * num_missing_quantities)  # Ajouter des '1' pour remplir les quantités manquantes
    
    return pd.Series([ingredients, quantities], index=['Ingredients', 'Quantities'])

# Appliquer la fonction pour remplir les quantités manquantes
data_sup[['Ingredients', 'Quantities']] = data_sup.apply(fill_missing_quantities, axis=1)


In [None]:
# Afficher les données après remplissage des quantités manquantes
data_sup.head(10)

In [None]:
def get_user_details():
    height = float(input("Enter your height (in cm): "))
    weight = float(input("Enter your weight (in kg): "))
    age = int(input("Enter your age: "))
    gender = input("Enter your gender (M/F): ").strip().upper()
    activity_level = input("Enter your activity level (sedentary, lightly_active, moderately_active, very_active): ").strip().lower()
    target_weight = float(input("Enter your target weight (in kg): "))
    
    return height, weight, age, gender, activity_level, target_weight


In [None]:
def calculate_bmr(height, weight, age, gender):
    if gender == "M":
        bmr = 88.362 + (13.397 * weight) + (4.799 * height) - (5.677 * age)
    else:
        bmr = 447.593 + (9.247 * weight) + (3.098 * height) - (4.330 * age)
    return bmr

def get_activity_factor(activity_level):
    activity_factors = {
        "sedentary": 1.2,
        "lightly_active": 1.375,
        "moderately_active": 1.55,
        "very_active": 1.725
    }
    return activity_factors.get(activity_level, 1.2)  # Default to sedentary if not specified

def calculate_tdee(bmr, activity_factor):
    return bmr * activity_factor


In [None]:
def calculate_target_calories(tdee, target_weight, current_weight):
    # Calculate the difference in weight and adjust accordingly
    if target_weight > current_weight:
        target_calories = tdee + 500  # Surplus for weight gain
    elif target_weight < current_weight:
        target_calories = tdee - 500  # Deficit for weight loss
    else:
        target_calories = tdee  # Maintenance calories
    return target_calories


In [None]:
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize MultiLabelBinarizer to handle one-hot encoding of ingredients
mlb = MultiLabelBinarizer()

# Apply one-hot encoding for ingredients
ingredients_one_hot = mlb.fit_transform(data_sup['RecipeIngredientParts'])

# Convert quantities to numpy array (list of quantities for each recipe)
quantities_array = data_sup['Quantities'].apply(lambda x: np.array(x))

# Find the maximum length of quantities (this will be the number of ingredients in the longest recipe)
max_length = max([len(q) for q in quantities_array])

# Pad the quantities_array to make all lists have the same length
quantities_array_padded = np.array([np.pad(q, (0, max_length - len(q)), 'constant') for q in quantities_array])

# Check that both arrays (ingredients_one_hot and quantities_array_padded) have compatible shapes
print(f"ingredients_one_hot shape: {ingredients_one_hot.shape}")
print(f"quantities_array_padded shape: {quantities_array_padded.shape}")

# Combine the one-hot encoded ingredients and the quantities array
X = np.hstack([ingredients_one_hot, quantities_array_padded])

# Display the shape of the final feature matrix
print(f"Feature matrix shape: {X.shape}")
X

In [None]:

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae:.4f}")
print(f"Mean Squared Error: {mse:.4f}")
print(f"R² Score: {r2:.4f}")