In [None]:
ximport pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
# Load dataset
df = pd.read_csv("/content/sample_data/Dataset .csv")

# View first rows
df.head()


Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [None]:
# Fill missing values
df['Cuisines'] = df['Cuisines'].fillna('Unknown')
df['Price range'] = df['Price range'].fillna(df['Price range'].mode()[0])
df['Aggregate rating'] = df['Aggregate rating'].fillna(df['Aggregate rating'].mean())
print(df[['Cuisines', 'Price range', 'Aggregate rating']].head())



                           Cuisines  Price range  Aggregate rating
0        French, Japanese, Desserts            3               4.8
1                          Japanese            3               4.5
2  Seafood, Asian, Filipino, Indian            4               4.4
3                   Japanese, Sushi            4               4.9
4                  Japanese, Korean            4               4.8


In [None]:
features = df[['Cuisines', 'Price range', 'Aggregate rating']]
print(features)

                              Cuisines  Price range  Aggregate rating
0           French, Japanese, Desserts            3               4.8
1                             Japanese            3               4.5
2     Seafood, Asian, Filipino, Indian            4               4.4
3                      Japanese, Sushi            4               4.9
4                     Japanese, Korean            4               4.8
...                                ...          ...               ...
9546                           Turkish            3               4.1
9547   World Cuisine, Patisserie, Cafe            3               4.2
9548            Italian, World Cuisine            4               3.7
9549                   Restaurant Cafe            4               4.0
9550                              Cafe            2               4.0

[9551 rows x 3 columns]


In [None]:


encoder = OneHotEncoder(sparse_output=False)
encoded_features = encoder.fit_transform(features[['Cuisines']])
print(encoded_features)
print(encoder.get_feature_names_out(['Cuisines']))


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
['Cuisines_Afghani' 'Cuisines_Afghani, Mughlai, Chinese'
 'Cuisines_Afghani, North Indian' ... 'Cuisines_World Cuisine'
 'Cuisines_World Cuisine, Mexican, Italian'
 'Cuisines_World Cuisine, Patisserie, Cafe']


In [None]:
final_features = np.hstack((
    encoded_features,
    features[['Price range', 'Aggregate rating']]
))
print(final_features)

print("Final Feature Matrix Shape:", final_features.shape)


[[0.  0.  0.  ... 0.  3.  4.8]
 [0.  0.  0.  ... 0.  3.  4.5]
 [0.  0.  0.  ... 0.  4.  4.4]
 ...
 [0.  0.  0.  ... 0.  4.  3.7]
 [0.  0.  0.  ... 0.  4.  4. ]
 [0.  0.  0.  ... 0.  2.  4. ]]
Final Feature Matrix Shape: (9551, 1828)


In [None]:
similarity_matrix = cosine_similarity(final_features)
print(similarity_matrix)

[[1.         0.96791834 0.9555609  ... 0.93457565 0.94488206 0.95668921]
 [0.96791834 1.         0.95885402 ... 0.94029363 0.94951449 0.95222352]
 [0.9555609  0.95885402 1.         ... 0.96632484 0.96999695 0.92644231]
 ...
 [0.93457565 0.94029363 0.96632484 ... 1.         0.96782158 0.89810426]
 [0.94488206 0.94951449 0.96999695 ... 0.96782158 1.         0.91168461]
 [0.95668921 0.95222352 0.92644231 ... 0.89810426 0.91168461 1.        ]]


In [None]:



def recommend_restaurants(cuisine, price_range, top_n=5):

    print("ðŸ”¹ User Preference Input")
    print("Cuisine:", cuisine)
    print("Price Range:", price_range)
    print("-" * 40)

    # Create user profile
    user_df = pd.DataFrame({
        'Cuisines': [cuisine],
        'Price range': [price_range],
        'Aggregate rating': [df['Aggregate rating'].mean()]
    })
    print(user_df)

    # Encode user cuisine
    user_encoded = encoder.transform(user_df[['Cuisines']])
    print(user_encoded)

    # Combine features
    user_features = np.hstack((
        user_encoded,
        user_df[['Price range', 'Aggregate rating']]
    ))
    print(user_features)

    # Calculate similarity
    similarity_scores = cosine_similarity(user_features, final_features)
    print(similarity_scores)

    # Get top recommendations
    top_indices = similarity_scores.argsort()[0][-top_n:][::-1]
    print(top_indices)

    print("\nðŸ”¸ Recommended Restaurants")
    return df.iloc[top_indices][
        ['Restaurant Name', 'Cuisines', 'Price range', 'Aggregate rating']
    ]


In [None]:
recommend_restaurants(
    cuisine='North Indian',
    price_range=2,
    top_n=5
)


ðŸ”¹ User Preference Input
Cuisine: North Indian
Price Range: 2
----------------------------------------
       Cuisines  Price range  Aggregate rating
0  North Indian            2           2.66637
[[0. 0. 0. ... 0. 0. 0.]]
[[0.         0.         0.         ... 0.         2.         2.66637001]]
[[0.9398127  0.94040294 0.94036436 ... 0.92673333 0.9337241  0.91965092]]
[4935 8554 1034 6345 6407]

ðŸ”¸ Recommended Restaurants


Unnamed: 0,Restaurant Name,Cuisines,Price range,Aggregate rating
4935,Ruby Dhaba,North Indian,2,2.7
8554,The Kitchen,North Indian,2,2.7
1034,The Tandoori Times,North Indian,2,2.7
6345,Kake Da Hotel,North Indian,2,2.7
6407,Khalsa,North Indian,2,2.7
