In [None]:
import pandas as pd

In [8]:
import numpy as np

In [10]:

# Load dataset
data = pd.read_csv('cleaned_recipes_.csv')
print("Dataset Loaded Successfully")

Dataset Loaded Successfully


In [11]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

scaled_data = data.copy()
# Features for similarity
features = [
    'RecipeCategory', 'RecipeIngredientParts',
    'Calories', 'FatContent', 'SaturatedFatContent',
    'CholesterolContent', 'SodiumContent', 'CarbohydrateContent',
    'FiberContent', 'SugarContent', 'ProteinContent'
]

# Extract only the relevant features
scaled_data = scaled_data[features]

# Preprocessing
# 1. Numerical Features: Standardize
# 2. Categorical Features: One-Hot Encode
numerical_features = [
    'Calories', 'FatContent', 'SaturatedFatContent', 'CholesterolContent',
    'SodiumContent', 'CarbohydrateContent', 'FiberContent', 'SugarContent', 'ProteinContent'
]
categorical_features = ['RecipeCategory', 'RecipeIngredientParts']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),  # Scale numerical features
        ('cat', OneHotEncoder(), categorical_features)  # One-hot encode categorical features
    ]
)

# Apply preprocessing
processed_data = preprocessor.fit_transform(scaled_data)
processed_data

<339121x325315 sparse matrix of type '<class 'numpy.float64'>'
	with 3730331 stored elements in Compressed Sparse Row format>

In [12]:
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split

# Train the k-Nearest Neighbors model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=10)
model_knn.fit(processed_data)

print("Model Training Completed.")


Model Training Completed.


In [17]:
def find_similar_recipes(recipe_id, data, knn_model, preprocessor, n_neighbors=10):
    """
    Find similar recipes to a given recipe.

    Args:
        recipe_id (int): Index of the recipe to find similar recipes for.
        data (DataFrame): Original dataset.
        knn_model (NearestNeighbors): Trained KNN model.
        preprocessor (ColumnTransformer): Preprocessing pipeline.
        n_neighbors (int): Number of similar recipes to return.

    Returns:
        DataFrame: Similar recipes with their distances.
    """
    # Preprocess the recipe features
    recipe_features = preprocessor.transform(data.iloc[[recipe_id]])
    
    # Find nearest neighbors
    distances, indices = knn_model.kneighbors(recipe_features, n_neighbors=n_neighbors)
    
    # Retrieve similar recipes
    similar_recipes = data.iloc[indices[0]].copy()
    similar_recipes['Distance'] = distances[0]
    
    return similar_recipes

# Example: Find similar recipes to the first recipe in the dataset
recipe_id = 1
similar_recipes = find_similar_recipes(recipe_id, data, model_knn, preprocessor, n_neighbors=10)

print("\nSimilar Recipes:")
similar_recipes



Similar Recipes:


Unnamed: 0,RecipeId,Name,CookTime,Description,RecipeServings,RecipeCategory,RecipeIngredientQuantities,RecipeIngredientParts,AggregatedRating,Calories,...,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeInstructions,HealthStatus,Distance
1,39,Biryani,25 minutes,Make and share this Biryani recipe from Food.com.,6.0,Chicken Breast,[],"saffron, milk, hot green chili peppers, onions...",3.0,185.116667,...,2.766667,62.133333,61.4,14.066667,1.5,3.4,10.566667,['Soak saffron in warm milk for 5 minutes and ...,Healthy,0.0
112398,192482,Chicken and Vegetable Toad in the Hole,35 minutes,"A quick, healthy meal that's so easy to make a...",2.0,Chicken Breast,"['1', '125', '2', '200', '125', '1', '50', '1'...","olive oil, plain flour, eggs, milk, carrot, br...",4.632096,316.8,...,3.0,126.15,130.85,36.8,2.0,1.8,18.35,"['Preheat the oven to 220 degrees C, 425 degre...",Unhealthy,0.353213
146463,245868,New Orleans Paneed Chicken With Shrimp and Fus...,25 minutes,I was in shock to find no recipes for paneed c...,4.0,Chicken Breast,"['2', '2', '1/3', '2', '3/4', '1/2', '4', '4',...","eggs, fresh parsley, fresh parsley, salt, grou...",5.0,262.7,...,4.7,86.9,289.5,22.1,1.35,1.525,15.25,['Place eggs in shallow bowl and beat to blend...,Healthy,0.354201
66186,116352,Stuffed Portabella Asian,15 minutes,I think that Asian foods are probably my favor...,2.0,Chicken Breast,[],"white wine, portabella mushrooms, chicken brea...",5.0,315.15,...,5.15,95.75,649.75,29.35,2.6,10.9,16.7,"['Soak apricots overnite in wine.', 'Wipe outs...",Healthy,0.356926
163260,272172,Chicken Parmesan,30 minutes,This is very easy to make and doesn't take muc...,4.0,Chicken Breast,[],"chicken breasts, flour, eggs, parmesan cheese,...",5.0,232.825,...,4.225,83.275,368.725,19.025,1.4,3.1,15.95,"['Preheat oven to 350 degrees.', 'Pound down t...",Healthy,0.357054
149726,250814,Chicken Scaloppine With Limoncello Sauce,10 minutes,I came up with this recipe in a effort to crea...,2.0,Chicken Breast,[],"boneless skinless chicken breasts, Dijon musta...",4.5,234.25,...,3.35,91.9,505.9,16.1,1.35,1.65,18.1,"['Whisk mustard and egg in a small bowl.', 'Pu...",Healthy,0.357244
236406,385635,Lightly Fried Chicken Breasts With Basil Tomatoes,15 minutes,I made this up because I was the only adult ho...,2.0,Chicken Breast,"['2', '1/4', '1/4', '1', '1/2', '1/2', '1', '2...","boneless skinless chicken breasts, flour, corn...",5.0,210.4,...,1.6,87.1,67.8,14.3,1.55,1.4,17.15,['Rinse and thoroughly dry chicken breasts. P...,Healthy,0.358298
160415,267464,Coconut Curry Chicken Strips,10 minutes,I invented these the other night with random i...,2.0,Chicken Breast,[],"chicken breasts, flour, curry paste, egg, coco...",4.632096,299.9,...,6.05,99.3,185.8,14.2,1.0,4.95,18.8,"['Dust chicken strips with flour.', 'Rub chick...",Healthy,0.359355
103307,177649,Smoky Beer Chicken Balls,10 minutes,Very crisp and tasty main dish or they would a...,2.0,Chicken Breast,[],"chicken breasts, beer, flour, egg, liquid smok...",5.0,276.45,...,2.9,100.9,157.25,19.4,0.65,0.15,19.8,['Heat one tablespoon of oil in a wok or fry p...,Unhealthy,0.359368
215596,353636,Chicken Cordon Bleu II,35 minutes,This is a combination of several recipes using...,2.0,Chicken Breast,"['2', '4', '2', '1', '1', '1/4', '1/2', '1', '1']","boneless skinless chicken breast halves, ham, ...",5.0,276.35,...,5.7,104.95,237.9,16.55,0.8,1.1,20.55,['Place chicken breasts between 2 pieces of pl...,Unhealthy,0.360494


In [18]:
query_index = 0  # Index of the recipe you want to evaluate
distances, indices = model_knn.kneighbors(processed_data[query_index])

average_distance = np.mean(distances)
print(f"Average distance for query recipe {query_index}: {average_distance:.5f}")

Average distance for query recipe 0: 0.39757


In [19]:
threshold = 0.2  # Cosine distance threshold for similarity

# Count how many neighbors are considered similar
similar_neighbors = np.sum(distances < threshold)
print(f"Number of similar neighbors (distance < {threshold}): {similar_neighbors}")

Number of similar neighbors (distance < 0.2): 1


In [20]:
import joblib

joblib.dump(model_knn, 'knn_unsupervised_model_.pkl')

['knn_unsupervised_model_.pkl']