# **Imports**

In [None]:
import gzip
import json
import pandas as pd
import pickle

# **Preprocess Restaurant Rating Data**

In [None]:
file_path = '/content/meta-Massachusetts.json.gz'

data = []
with gzip.open(file_path, 'rt', encoding='utf-8') as file:
    for line in file:
        try:
            # Load the JSON object from each line
            obj = json.loads(line)
            data.append(obj)
        except json.JSONDecodeError as e:
            # Handle JSON decoding errors
            print(f"Error decoding JSON: {e}")

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data)

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,name,address,gmap_id,description,latitude,longitude,category,avg_rating,num_of_reviews,price,hours,MISC,state,relative_results,url
0,Grace Ryder Housing Center,"Grace Ryder Housing Center, Marshfield, MA 02050",0x89e4a1f2d39fb621:0xfe62e9b7266e6262,,42.098673,-70.719751,[Senior citizen center],4.8,3,,,{'Accessibility': ['Wheelchair accessible entr...,,"[0x89e4a6ce94008f83:0x48244c8afd7f8d76, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...
1,Sowing Seeds,"Sowing Seeds, 974 Plain St, Marshfield, MA 02050",0x89e4a103dd188585:0xfadc47938337579f,,42.107184,-70.757536,[Non-profit organization],4.6,8,,,{'Accessibility': ['Wheelchair accessible entr...,,"[0x89e4a6d00319316d:0x5d22d9b17e568648, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...
2,TLC Kennels,"TLC Kennels, 729 Center St, Pembroke, MA 02359",0x89e49871023b3407:0xd93aabe6f7940a6b,,42.032471,-70.82077,[Kennel],4.0,4,,,,,"[0x89e4a1c4b9d12e9b:0xc13185ea1dbed01a, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...
3,Pine Hill Sand & Gravel,"Pine Hill Sand & Gravel, 498 Old Westport Rd, ...",0x89e4fcdcfa2fdba5:0xb10b326e00a0fd0,,41.62668,-71.022437,[Sand & gravel supplier],5.0,2,,,,,"[0x89e4fd3555b4674f:0x6d70e26b40185165, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...
4,Workout Club,"Workout Club, Webster Square, Marshfield, MA 0...",0x89e4a6c554aa0e59:0x45965ec598f57a3,,42.089767,-70.708462,[Gym],4.5,4,,"[[Thursday, 5:30AM–1PM], [Friday, 5:30AM–1PM],...",{'Accessibility': ['Wheelchair accessible entr...,Closes soon ⋅ 1PM ⋅ Reopens 4PM,"[0x89e4a6d001adbf99:0xdeb84714d054a18c, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...


In [None]:
restaurant_data = df[df['category'].apply(lambda x: 'restaurant' in str(x).lower() if x is not None else False)]
# Drop the ones without address/ description/ MISC options
restaurant_data.dropna(subset=['address', 'description', 'MISC', 'hours'], inplace=True)
# Maintain uniform symbol for prices
restaurant_data['price'].replace('₩', '$',regex=True, inplace=True)
# Map prices from symbols to numbers
price_mapping = {None: 0, '$': 1, '$$': 2, '$$$': 3, '$$$$': 4}
restaurant_data['price'] = restaurant_data['price'].replace(price_mapping)
# Remove permanently closed ones
restaurant_data = restaurant_data[restaurant_data['state'] != 'Permanently closed']
# If there are no opening hours present, fill them with the State Massachussets
restaurant_data['state'].fillna('Massachussets', inplace=True)
restaurant_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  restaurant_data.dropna(subset=['address', 'description', 'MISC', 'hours'], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  restaurant_data['price'].replace('₩', '$',regex=True, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  restaurant_data['price'].replace('₩', '$',r

Unnamed: 0,name,address,gmap_id,description,latitude,longitude,category,avg_rating,num_of_reviews,price,hours,MISC,state,relative_results,url
252,Honey Dew Donuts,"Honey Dew Donuts, MA-118, Rehoboth, MA 02769",0x89e45e492eda50b1:0x666de2dcb5417546,"Informal chain serving up coffee drinks, donut...",41.902888,-71.249229,"[Donut shop, Cafe, Coffee shop, Dessert shop, ...",4.4,18,1,"[[Wednesday, 6AM–8PM], [Thursday, 6AM–8PM], [F...","{'Service options': ['In-store shopping', 'Tak...",Open ⋅ Closes 8PM,"[0x89e45f29b389030b:0xac7a45b7231a16bb, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...
876,Honey Dew Donuts,"Honey Dew Donuts, MA-118, Rehoboth, MA 02769",0x89e45e492eda50b1:0x666de2dcb5417546,"Informal chain serving up coffee drinks, donut...",41.902888,-71.249229,"[Donut shop, Cafe, Coffee shop, Dessert shop, ...",4.4,18,1,"[[Wednesday, 6AM–8PM], [Thursday, 6AM–8PM], [F...","{'Service options': ['In-store shopping', 'Tak...",Open ⋅ Closes 8PM,"[0x89e45f29b389030b:0xac7a45b7231a16bb, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...
1614,Dunkin',"Dunkin', In Mobil Gas Station, 185 Park Ave, W...",0x89e407f99a47ba47:0x9387988afe7799c1,Long-running chain serving signature breakfast...,42.272809,-71.813540,"[Coffee shop, Bagel shop, Bakery, Breakfast re...",3.4,14,1,"[[Tuesday, 5AM–8PM], [Wednesday, 5AM–7PM], [Th...","{'Service options': ['Delivery', 'Takeout', 'D...",Closed ⋅ Opens 5AM,"[0x89e40426a7dafbf9:0x6c68b73cd0eca176, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...
2610,Dunkin',"Dunkin', 1931 Dorchester Ave, Dorchester, MA 0...",0x89e37b86cff22fa5:0x9ec7d8b55a77706b,Long-running chain serving signature breakfast...,42.285003,-71.064857,"[Coffee shop, Bagel shop, Bakery, Breakfast re...",3.9,38,1,"[[Saturday, 5AM–7PM], [Sunday, 6AM–6PM], [Mond...","{'Service options': ['Delivery', 'Takeout', 'D...",Open ⋅ Closes 7PM,"[0x89e37b838314ce8d:0x59a32d5dea9e3899, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...
2645,Dunkin',"Dunkin', Logan Airport Logan Intl Airport, Gat...",0x89e371881e42c5a3:0x27fd1b9af0d87940,Long-running chain serving signature breakfast...,42.365359,-71.014714,"[Coffee shop, Bagel shop, Bakery, Breakfast re...",2.7,16,1,"[[Saturday, 6AM–1PM], [Sunday, 6AM–1PM], [Mond...","{'Service options': ['Takeout', 'Delivery', 'D...",Open ⋅ Closes 1PM,"[0x89e3703cb3fd8465:0xf73f9799696e64a6, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92504,Rocco's Pizzeria,"Rocco's Pizzeria, in the Tisbury Marketplace, ...",0x89e52894d8bbeba1:0x3154f8fc3bc21315,Pizza by the pie or slice along with sandwiche...,41.452166,-70.598973,[Pizza restaurant],4.6,144,2,"[[Friday, 11:30AM–8PM], [Saturday, 4–8PM], [Su...","{'Service options': ['Outdoor seating', 'Takeo...",Massachussets,"[0x89e5289388f24cf1:0x88c91d0daa4e6ec0, 0x89e5...",https://www.google.com/maps/place//data=!4m2!3...
92505,Tasty Burger,"Tasty Burger, 1301 Boylston St, Boston, MA 02215",0x89e379f5e3d9fe1b:0xa265a864b0f05b6a,Late-night Fenway branch of a local retro burg...,42.344762,-71.098375,"[Hamburger restaurant, American restaurant, Fa...",4.5,2713,1,"[[Friday, 11AM–12AM], [Saturday, 11AM–12AM], [...","{'Service options': ['Outdoor seating', 'Curbs...",Massachussets,"[0x89e37a19e56a4565:0xbb2aefc495e8c505, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...
92508,Hong Kong Eatery,"Hong Kong Eatery, 79 Harrison Ave, Boston, MA ...",0x89e37a7828570e3d:0x65a59e5e04b797bf,"Standard Chinese fare, including BBQ'd pork & ...",42.350961,-71.061531,"[Cantonese restaurant, Asian restaurant, Noodl...",4.1,708,1,"[[Friday, 11AM–8:30PM], [Saturday, 11AM–8:30PM...","{'Service options': ['No-contact delivery', 'D...",Massachussets,"[0x89e37a783f12a0b1:0x2f772b596594e285, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...
92511,Clover Food Lab,"Clover Food Lab, 5 Cambridge Center, Cambridge...",0x89e370af398b981b:0xfc69c09d337bc2e0,The menu changes daily at this quick-serve spo...,42.362698,-71.087728,"[Fast food restaurant, Breakfast restaurant, C...",4.4,778,1,"[[Friday, 7AM–8PM], [Saturday, 7AM–8PM], [Sund...","{'Service options': ['Curbside pickup', 'Deliv...",Massachussets,"[0x89e370adc5c60ab7:0x4f154a255d379c99, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...


In [None]:
restaurant_data.shape

(7824, 15)

In [None]:
# Dropping duplicate entries
restaurant_data = restaurant_data.drop_duplicates(subset="gmap_id", keep="first").reset_index(drop=True)
restaurant_data.shape

(7823, 15)

# **Preprocess Review Data**

In [None]:
file_path = '/content/review-Massachusetts_10.json.gz'
gmap_values = set(restaurant_data['gmap_id'])
data_review = []

with gzip.open(file_path, 'rt', encoding='utf-8') as file:
    for line in file:
        try:
            obj_review = json.loads(line)
            if obj_review['gmap_id'] in gmap_values:
                data_review.append(obj_review)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")

df_review = pd.DataFrame(data_review)

# Display the first few rows of the DataFrame
df_review.head()

Unnamed: 0,user_id,name,time,rating,text,pics,resp,gmap_id
0,108932598687147151209,Tc Pendarvis,1608699447441,5,Love the Sale Associates But The Machines I do...,,,0x89e45e492eda50b1:0x666de2dcb5417546
1,109115279620765929977,Scotty Wilshire,1609634311474,3,Good for a short stop,,,0x89e45e492eda50b1:0x666de2dcb5417546
2,117010150918910515361,Kaitlynn Clarke,1517844338810,5,My favorite coffee place around. Friendly staf...,,,0x89e45e492eda50b1:0x666de2dcb5417546
3,114980144744098714236,Rusty Da,1577661883869,4,Nice pastry is Ward of the money coffee good,,,0x89e45e492eda50b1:0x666de2dcb5417546
4,104265840298214603897,Joseph Marinaro,1543766300884,3,"Great service , but to much money for 2 coffee...",,,0x89e45e492eda50b1:0x666de2dcb5417546


In [None]:
df_review = df_review[~df_review['text'].isin([None, 'None'])]
df_review = df_review[['gmap_id', 'text']]
df_review = df_review.groupby('gmap_id')['text'].apply(lambda x: ' '.join(x)).reset_index()
df_review.shape

(7702, 2)

# **Merge Restaurant Data and Reviews**

In [None]:
doc_df = pd.merge(restaurant_data, df_review, on='gmap_id', how='inner')

def concat_dict_values(d):
    if d is None:
        return ''
    return ', '.join([f"{' '.join(value)}" for key, value in d.items()])


doc_df['doc_information'] = doc_df['name'].fillna(' ')+' '+doc_df['description'].fillna(' ') + ' ' + doc_df['category'].apply(lambda x: ' '.join(x) if isinstance(x, list) else str(x)) + ' ' + doc_df['MISC'].apply(concat_dict_values) + doc_df['text'].fillna('')

doc_df

Unnamed: 0,name,address,gmap_id,description,latitude,longitude,category,avg_rating,num_of_reviews,price,hours,MISC,state,relative_results,url,text,doc_information
0,Honey Dew Donuts,"Honey Dew Donuts, MA-118, Rehoboth, MA 02769",0x89e45e492eda50b1:0x666de2dcb5417546,"Informal chain serving up coffee drinks, donut...",41.902888,-71.249229,"[Donut shop, Cafe, Coffee shop, Dessert shop, ...",4.4,18,1,"[[Wednesday, 6AM–8PM], [Thursday, 6AM–8PM], [F...","{'Service options': ['In-store shopping', 'Tak...",Open ⋅ Closes 8PM,"[0x89e45f29b389030b:0xac7a45b7231a16bb, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...,Love the Sale Associates But The Machines I do...,Honey Dew Donuts Informal chain serving up cof...
1,Dunkin',"Dunkin', 1931 Dorchester Ave, Dorchester, MA 0...",0x89e37b86cff22fa5:0x9ec7d8b55a77706b,Long-running chain serving signature breakfast...,42.285003,-71.064857,"[Coffee shop, Bagel shop, Bakery, Breakfast re...",3.9,38,1,"[[Saturday, 5AM–7PM], [Sunday, 6AM–6PM], [Mond...","{'Service options': ['Delivery', 'Takeout', 'D...",Open ⋅ Closes 7PM,"[0x89e37b838314ce8d:0x59a32d5dea9e3899, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,Pretty hit or miss Dunkin opposite the red lin...,Dunkin' Long-running chain serving signature b...
2,Dunkin',"Dunkin', General Saints Campus, 1 Hospital Dr,...",0x89e3a43b5cccd095:0xac005005b8232057,Long-running chain serving signature breakfast...,42.645806,-71.301246,"[Coffee shop, Bagel shop, Bakery, Breakfast re...",4.3,33,1,"[[Saturday, 5AM–4PM], [Sunday, 5AM–4PM], [Mond...","{'Service options': ['Delivery', 'Takeout', 'D...",Open ⋅ Closes 4PM,"[0x89e3a467f0c2b439:0xa8bce92b785f6a32, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,"Its good, of course you know what to expect, ...",Dunkin' Long-running chain serving signature b...
3,Subway,"Subway, 330 Main St, Holyoke, MA 01040",0x89e6dc2386a9c681:0xce5704692516ba47,Casual counter-serve chain for build-your-own ...,42.200072,-72.606007,"[Sandwich shop, Caterer, Fast food restaurant,...",3.8,38,1,"[[Saturday, 10AM–8PM], [Sunday, 10AM–8PM], [Mo...","{'Service options': ['Curbside pickup', 'Deliv...",Closed ⋅ Opens 10AM,"[0x89e6dc1f42e4c371:0xf4eebcb15be067f4, 0x89e6...",https://www.google.com/maps/place//data=!4m2!3...,The best way to eat fresh and healthy 😋 I love...,Subway Casual counter-serve chain for build-yo...
4,Great Barbecue,"Great Barbecue, 15 Hudson St, Boston, MA 02111",0x89e37a784834f1b7:0x10b63ece55edd451,Ducks hang in the window of this basement Chin...,42.350741,-71.060224,[Barbecue restaurant],4.3,24,0,"[[Wednesday, 8AM–8PM], [Thursday, 8AM–8PM], [F...","{'Service options': ['Takeout', 'Dine-in', 'De...",Closed ⋅ Opens 8AM Thu,"[0x89e37a785ec445d7:0x6fca53319c6d705a, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,Cheap and great food! Loved the char siu here....,Great Barbecue Ducks hang in the window of thi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7697,Rocco's Pizzeria,"Rocco's Pizzeria, in the Tisbury Marketplace, ...",0x89e52894d8bbeba1:0x3154f8fc3bc21315,Pizza by the pie or slice along with sandwiche...,41.452166,-70.598973,[Pizza restaurant],4.6,144,2,"[[Friday, 11:30AM–8PM], [Saturday, 4–8PM], [Su...","{'Service options': ['Outdoor seating', 'Takeo...",Massachussets,"[0x89e5289388f24cf1:0x88c91d0daa4e6ec0, 0x89e5...",https://www.google.com/maps/place//data=!4m2!3...,Get you some Rocco's!\n\nTheir pizza/food/cust...,Rocco's Pizzeria Pizza by the pie or slice alo...
7698,Tasty Burger,"Tasty Burger, 1301 Boylston St, Boston, MA 02215",0x89e379f5e3d9fe1b:0xa265a864b0f05b6a,Late-night Fenway branch of a local retro burg...,42.344762,-71.098375,"[Hamburger restaurant, American restaurant, Fa...",4.5,2713,1,"[[Friday, 11AM–12AM], [Saturday, 11AM–12AM], [...","{'Service options': ['Outdoor seating', 'Curbs...",Massachussets,"[0x89e37a19e56a4565:0xbb2aefc495e8c505, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,We stopped here for a quick bit along our trip...,Tasty Burger Late-night Fenway branch of a loc...
7699,Hong Kong Eatery,"Hong Kong Eatery, 79 Harrison Ave, Boston, MA ...",0x89e37a7828570e3d:0x65a59e5e04b797bf,"Standard Chinese fare, including BBQ'd pork & ...",42.350961,-71.061531,"[Cantonese restaurant, Asian restaurant, Noodl...",4.1,708,1,"[[Friday, 11AM–8:30PM], [Saturday, 11AM–8:30PM...","{'Service options': ['No-contact delivery', 'D...",Massachussets,"[0x89e37a783f12a0b1:0x2f772b596594e285, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,"Their roasted ducks, chickens, and porks are v...","Hong Kong Eatery Standard Chinese fare, includ..."
7700,Clover Food Lab,"Clover Food Lab, 5 Cambridge Center, Cambridge...",0x89e370af398b981b:0xfc69c09d337bc2e0,The menu changes daily at this quick-serve spo...,42.362698,-71.087728,"[Fast food restaurant, Breakfast restaurant, C...",4.4,778,1,"[[Friday, 7AM–8PM], [Saturday, 7AM–8PM], [Sund...","{'Service options': ['Curbside pickup', 'Deliv...",Massachussets,"[0x89e370adc5c60ab7:0x4f154a255d379c99, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,"Whenever I visit Cambridge from the UK, I alwa...",Clover Food Lab The menu changes daily at this...


In [None]:
for column in doc_df.columns:
    null_count = doc_df[column].isna().sum()
    print(f"{column}: {null_count}")

name: 0
address: 0
gmap_id: 0
description: 0
latitude: 0
longitude: 0
category: 0
avg_rating: 0
num_of_reviews: 0
price: 0
hours: 0
MISC: 0
state: 0
relative_results: 63
url: 0
text: 0
doc_information: 0


In [None]:
with open('restaurent_docs_ma.pickle', 'wb') as f:
    pickle.dump(doc_df, f)

In [None]:
from google.colab import files
files.download('restaurent_docs_ma.pickle')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **Retrieval in action**

In [None]:
! pip install rank-bm25

Collecting rank-bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank-bm25
Successfully installed rank-bm25-0.2.2


In [None]:
from rank_bm25 import BM25Okapi
from sklearn.preprocessing import MinMaxScaler

# Tokenization using basic split (can be replaced with advanced tokenizers)
documents = doc_df['doc_information'].to_list()
document_ids = [i for i in range(len(documents))]
tokenized_docs = [doc.lower().split(" ") for doc in documents]  # Lowercased for consistency

# Initialize BM25 and tokenize query
bm25 = BM25Okapi(tokenized_docs)

# BM25 Scores
query = "Chinese Restaurants".lower()
query_tokens = query.split(" ")
doc_scores = bm25.get_scores(query_tokens)

# Normalize BM25 Scores
scaler = MinMaxScaler()
normalized_doc_scores = scaler.fit_transform(doc_scores.reshape(-1, 1)).flatten()

# Normalize Avg Ratings (if they exist)
avg_ratings = doc_df['avg_rating'].fillna(0).to_numpy()  # Handle missing values
normalized_avg_ratings = scaler.fit_transform(avg_ratings.reshape(-1, 1)).flatten()

# Combine Scores (Weighted Sum)
weight_bm25 = 0.7
weight_rating = 0.3
combined_scores = (
    weight_bm25 * normalized_doc_scores + weight_rating * normalized_avg_ratings
)

# Combine Document IDs, Scores, and Ratings
doc_scores_with_ids = list(
    zip(document_ids, doc_scores, avg_ratings, combined_scores)
)

# Sort by Combined Score
sorted_docs_with_ids = sorted(doc_scores_with_ids, key=lambda x: -x[3])

# Extract the top 10 indices
ind = [i for i, _, _, _ in sorted_docs_with_ids[:10]]

# Retrieve corresponding rows from doc_df
top_docs = doc_df.loc[ind, :]

# Display results
top_docs


Unnamed: 0,name,address,gmap_id,description,latitude,longitude,category,avg_rating,num_of_reviews,price,hours,MISC,state,relative_results,url,text,doc_information
2619,Min's Wok,"Min's Wok, 582 W Boylston St, Worcester, MA 01606",0x89e407907f2c0ec3:0xade514e9f31bd611,"Small, simple restaurant turning out an extens...",42.31131,-71.79759,[Chinese restaurant],4.6,264,1,"[[Thursday, 11AM–10PM], [Friday, 11AM–11PM], [...","{'Service options': ['Takeout', 'Delivery', 'D...",Open ⋅ Closes 10PM,"[0x89e407e05064db67:0xc65b74e4ebd23164, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...,Great food very clean n they r really polite I...,"Min's Wok Small, simple restaurant turning out..."
4691,Great Wok Wellesley,"Great Wok Wellesley, 180 Worcester St, Wellesl...",0x89e383d2374a02b5:0x62039de83ba6f097,"Modern Chinese eatery & bar offering Hunan, Si...",42.315372,-71.244962,"[Chinese restaurant, Asian restaurant, Deliver...",4.5,166,2,"[[Friday, 11:30AM–11PM], [Saturday, 11:30AM–11...","{'Service options': ['No-contact delivery', 'D...",Closed ⋅ Opens 11:30AM,"[0x89e38150c442bc85:0x101db8f4e1433fee, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,Excellent service with minimal waiting.\nGreat...,Great Wok Wellesley Modern Chinese eatery & ba...
6962,Mu Lan,"Mu Lan, 228 Broadway, Cambridge, MA 02139",0x89e370ada76b8b6b:0x1f052f56a2e28ec2,A large menu of veggie-friendly Taiwanese dish...,42.366346,-71.094302,"[Taiwanese restaurant, Asian restaurant, Chine...",4.5,646,2,"[[Saturday, 11AM–9PM], [Sunday, 11AM–9PM], [Mo...","{'Service options': ['Delivery', 'Takeout', 'D...",Massachussets,"[0x89e377546fdb064d:0x33da3bdf6ebd5e0d, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,Amazing Taiwanese Cuisine. The chicken was ama...,Mu Lan A large menu of veggie-friendly Taiwane...
5030,Lotus Blossom,"Lotus Blossom, 394 Boston Post Rd, Sudbury, MA...",0x89e38f1a05d016e5:0x59e1e7412694887e,"Chinese & Japanese staples, including sushi, a...",42.361494,-71.418851,"[Chinese restaurant, Restaurant]",4.4,318,2,"[[Wednesday, 10:30AM–9:30PM], [Thursday, 10:30...","{'Service options': ['Delivery', 'Takeout', 'D...",Closed ⋅ Opens 10:30AM,"[0x89e3860c33b612ff:0xb348bb092edc4c2e, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,Such great food and maybe the best takeout set...,"Lotus Blossom Chinese & Japanese staples, incl..."
3323,Lee Chen Chinese Cuisine,"Lee Chen Chinese Cuisine, 230 Winthrop Ave, La...",0x89e307cac6a5bed9:0xc513907d7d69116f,Upscale eatery whose Sichuan menu features spi...,42.684834,-71.139416,[Chinese restaurant],4.4,358,2,"[[Saturday, 11AM–9:30PM], [Sunday, 11AM–9PM], ...","{'Service options': ['Takeout', 'Delivery', 'D...",Open ⋅ Closes 9:30PM,"[0x89e30659baac334d:0x61c941d4bd61b525, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,My party was given good service and served del...,Lee Chen Chinese Cuisine Upscale eatery whose ...
5296,Sichuan Gourmet,"Sichuan Gourmet, 271 Worcester Rd, Framingham,...",0x89e3888ad057c257:0x3341c2f6289b8d0b,Casual Chinese restaurant supplying spiced-to-...,42.29889,-71.405663,"[Sichuan restaurant, Asian restaurant, Chinese...",4.4,1008,2,"[[Monday, 11:30AM–8:30PM], [Tuesday, 11:30AM–8...","{'Service options': ['Takeout', 'Delivery', 'D...",Massachussets,"[0x89e388f60741960b:0xaa95640e78027b47, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,"This place is easily one of the best ,in and a...",Sichuan Gourmet Casual Chinese restaurant supp...
2237,Sichuan Gourmet Sharon,"Sichuan Gourmet Sharon, 388 S Main St, Sharon,...",0x89e47d78ddc7f2f9:0x6558347983c7a85e,Dan dan noodles & other Sichuan specialties at...,42.107472,-71.198817,"[Sichuan restaurant, Chinese restaurant, Resta...",4.3,206,2,"[[Saturday, 11:30AM–9:30PM], [Sunday, 11:30AM–...","{'Service options': ['Takeout'], 'Highlights':...",Closed ⋅ Opens 11:30AM,"[0x89e48768edc272c9:0x77861ebdb6ab466, 0x89e47...",https://www.google.com/maps/place//data=!4m2!3...,I legit LOVE this place you guys. The food is ...,Sichuan Gourmet Sharon Dan dan noodles & other...
3375,Cheng-Du Restaurant,"Cheng-Du Restaurant, 249 N Main St, Mansfield,...",0x89e4625db090ba8d:0xa225b355850ea7ef,Easygoing eatery specializing in Sichuan- & Ma...,42.030005,-71.218208,[Sichuan restaurant],4.3,267,2,"[[Saturday, 12–11PM], [Sunday, 12–10PM], [Mond...","{'Service options': ['Delivery', 'Takeout', 'D...",Closed ⋅ Opens 12PM,"[0x89e4625e1f6a6229:0x378ecf7802440605, 0x89e4...",https://www.google.com/maps/place//data=!4m2!3...,CELIAC FRIENDLY\n\nI haven’t had anything remo...,Cheng-Du Restaurant Easygoing eatery specializ...
4586,Peppercorn House,"Peppercorn House, 318 Main St, Woburn, MA 01801",0x89e375a942fed05d:0xd864b8d957eae274,Family-owned eatery providing familiar Chinese...,42.47848,-71.151708,"[Chinese restaurant, Japanese restaurant, Rest...",4.7,378,2,"[[Friday, 12–10:45PM], [Saturday, 12–10:45PM],...",{'From the business': ['Identifies as women-le...,Closes soon ⋅ 10:45PM ⋅ Opens 12PM Sat,"[0x89e375a97055aae3:0x90e1985f715cb87b, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,Best Asian cuisine I've EVER had! Every thing ...,Peppercorn House Family-owned eatery providing...
1429,CK Shanghai,"CK Shanghai, 15 Washington St, Wellesley, MA 0...",0x89e383c17a985a6d:0x8b713649b1039c20,Popular Chinese restaurant boasting a charming...,42.325323,-71.25995,"[Chinese restaurant, Restaurant]",4.3,145,2,"[[Friday, 11:30AM–9PM], [Saturday, 11:30AM–9PM...","{'Service options': ['Takeout'], 'Health & saf...",Closed ⋅ Opens 11:30AM Sat,"[0x89e38150c442bc85:0x101db8f4e1433fee, 0x89e3...",https://www.google.com/maps/place//data=!4m2!3...,Awesome food selection and ambiance . Original...,CK Shanghai Popular Chinese restaurant boastin...


In [None]:
from rank_bm25 import BM25Okapi
from sklearn.preprocessing import MinMaxScaler

#Restaurant document data
print("Loading Started")
data = pd.read_pickle('restaurent_docs_ma.pickle')
documents = data['doc_information'].to_list()
document_ids = [i for i in range(len(documents))]
tokenized_docs = [doc.lower().split(" ") for doc in documents]
bm25 = BM25Okapi(tokenized_docs)
print("Loading done")



# BM25 Scores
query = "Chinese"


query_tokens = query.split(" ")
# Get document scores
doc_scores = bm25.get_scores(query_tokens)
data['doc_scores'] = doc_scores

# latitude, longitude = get_lat_lon(zipcode)

# docs = get_documents_within_25miles(data,latitude,longitude)

# Normalize BM25 Scores
scaler = MinMaxScaler()
normalized_doc_scores = scaler.fit_transform(doc_scores.reshape(-1, 1)).flatten()

# Normalize Avg Ratings (if they exist)
avg_ratings = data['avg_rating'].fillna(0).to_numpy()  # Handle missing values
normalized_avg_ratings = scaler.fit_transform(avg_ratings.reshape(-1, 1)).flatten()

# Combine Scores (Weighted Sum)
weight_bm25 = 0.7
weight_rating = 0.3
combined_scores = (
    weight_bm25 * normalized_doc_scores + weight_rating * normalized_avg_ratings
)

# Combine Document IDs, Scores, and Ratings
doc_scores_with_ids = list(
    zip(document_ids, doc_scores, avg_ratings, combined_scores)
)

# Sort by Combined Score
sorted_docs_with_ids = sorted(doc_scores_with_ids, key=lambda x: -x[3])

# Extract the top 10 indices
ind = [i for i, _, _, _ in sorted_docs_with_ids[:10]]

# Retrieve corresponding rows from doc_df
top_10_docs = data.loc[ind, :]

# sorted_docs = docs.sort_values(by=['doc_scores', 'avg_rating'], ascending=[False, False])
# print(sorted_docs)
# top_10_docs = sorted_docs.head(10)
top_10_docs.fillna(' ', inplace=True)
top_10_docs_list = top_10_docs.to_dict(orient='records')
search_info ={}
search_info['items'] = top_10_docs_list
print(search_info)

Loading Started
Loading done
{'items': [{'name': "Mo'Rockin Fusion", 'address': "Mo'Rockin Fusion, 100 Hanover St, Boston, MA 02108", 'gmap_id': '0x89e49b80770784c5:0xd41d713711e42bc1', 'description': 'Casual option in Boston Public Market for create-your-own bowls & pita wraps, plus salads & shakes.', 'latitude': 42.3620534, 'longitude': -71.05704399999999, 'category': ['Moroccan restaurant'], 'avg_rating': 5.0, 'num_of_reviews': 46, 'price': 2, 'hours': [['Tuesday', 'Closed'], ['Wednesday', '11AM–5PM'], ['Thursday', '11AM–5PM'], ['Friday', '11AM–5PM'], ['Saturday', '11AM–5PM'], ['Sunday', 'Closed'], ['Monday', 'Closed']], 'MISC': {'Service options': ['No-contact delivery', 'Delivery', 'Takeout', 'Dine-in'], 'Health & safety': ['Mask required', 'Staff wear masks', 'Staff get temperature checks', 'Staff required to disinfect surfaces between visits'], 'Popular for': ['Lunch', 'Dinner', 'Solo dining'], 'Offerings': ['Comfort food', 'Healthy options', 'Quick bite'], 'Amenities': ['Good f