<a href="https://colab.research.google.com/github/vickyzai/hk-specialty-cofee-shops/blob/main/HK_Coffee_Shops_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
import time
import re
import geopandas
from shapely.geometry import Point
import folium

#Step 1: The quest for the data

# --- Configuration ---
API_KEY = 'AIzaSyC_OxTIl1FHPWFUnBOnm7nx5jYqJIcyk1o' # <--- REPLACE THIS WITH YOUR ACTUAL GOOGLE PLACES API KEY!
BASE_URL = 'https://maps.googleapis.com/maps/api/place/textsearch/json?'

# List of Hong Kong's 18 Districts
hong_kong_districts = [
    # Hong Kong Island
    "Central and Western District, Hong Kong Island, Hong Kong",
    "Eastern District, Hong Kong Island, Hong Kong",
    "Southern District, Hong Kong Island, Hong Kong",
    "Wan Chai District, Hong Kong Island, Hong Kong",
    # Kowloon
    "Kowloon City District, Kowloon, Hong Kong",
    "Kwun Tong District, Kowloon, Hong Kong",
    "Sham Shui Po District, Kowloon, Hong Kong",
    "Wong Tai Sin District, Kowloon, Hong Kong",
    "Yau Tsim Mong District, Kowloon, Hong Kong",
    # New Territories
    "Islands District, New Territories, Hong Kong",
    "Kwai Tsing District, New Territories, Hong Kong",
    "North District, New Territories, Hong Kong",
    "Sai Kung District, New Territories, Hong Kong",
    "Sha Tin District, New Territories, Hong Kong",
    "Tai Po District, New Territories, Hong Kong",
    "Tsuen Wan District, New Territories, Hong Kong",
    "Tuen Mun District, New Territories, Hong Kong",
    "Yuen Long District, New Territories, Hong Kong"
]

# List of common coffee chain names to filter out (case-insensitive)
CHAIN_NAMES_TO_EXCLUDE = [
    'Starbucks', 'Pacific Coffee', 'McCafé',
    'Pret A Manger', '% Arabica','The Coffee Academïcs',
    'Nespresso', 'Habitu'
]

# Hong Kong Bounding Box Coordinates (approximate)
HK_MIN_LAT = 22.15
HK_MAX_LAT = 22.56
HK_MIN_LON = 113.82
HK_MAX_LON = 114.45

# List to store all collected coffee shop data from all districts
all_coffee_shops_raw = []

# --- Function to fetch data from Google Places API ---
def fetch_places(query, api_key, pagetoken=None):
    """
    Fetches places data from Google Places API using a text search.
    Handles pagination using 'pagetoken'.
    """
    params = {
        'query': query,
        'key': api_key,
        'language': 'en'
    }
    if pagetoken:
        params['pagetoken'] = pagetoken

    response = requests.get(BASE_URL, params=params)
    results = response.json()

    if results.get('status') == 'ZERO_RESULTS':
        return [], None
    elif results.get('status') != 'OK':
        print(f"API Error for query '{query}': {results.get('status')} - {results.get('error_message', 'No error message provided.')}")
        return None, None

    return results.get('results'), results.get('next_page_token')

# --- Main data collection loop for all districts ---
print("Starting comprehensive search for coffee shops across all 18 districts of Hong Kong...")

for district in hong_kong_districts:
    current_search_query = f'coffee shop in {district}'
    next_page_token = None
    page_count = 0

    print(f"\n--- Searching in: {district} ---")

    while True:
        page_count += 1
        print(f"  Fetching page {page_count} for {district}...")

        results, next_page_token = fetch_places(current_search_query, API_KEY, next_page_token)

        if results is None:
            print(f"  Stopped fetching for {district} due to an API error.")
            break
        elif not results:
            print(f"  No more results found for {district} (or reached end of Text Search limit for this query).")
            break

        for place in results:
            name = place.get('name')
            address = place.get('formatted_address')
            latitude = place['geometry']['location']['lat']
            longitude = place['geometry']['location']['lng']
            rating = place.get('rating')
            user_ratings_total = place.get('user_ratings_total')
            place_id = place.get('place_id')

            all_coffee_shops_raw.append({
                'Name': name,
                'Address': address,
                'Latitude': latitude,
                'Longitude': longitude,
                'Rating': rating,
                'Total Reviews': user_ratings_total,
                'Place ID': place_id,
                'District_Queried': district
            })

        if next_page_token:
            time.sleep(2)
        else:
            break

# --- Process and Consolidate Data ---
if all_coffee_shops_raw:
    df_raw = pd.DataFrame(all_coffee_shops_raw)

    # Remove duplicates based on 'Place ID'
    df_unique = df_raw.drop_duplicates(subset=['Place ID'], keep='first').copy()

    # Filter by Hong Kong Bounding Box
    print(f"\nTotal unique shops before bounding box filter: {len(df_unique)}")
    df_hk_only = df_unique[
        (df_unique['Latitude'] >= HK_MIN_LAT) & (df_unique['Latitude'] <= HK_MAX_LAT) &
        (df_unique['Longitude'] >= HK_MIN_LON) & (df_unique['Longitude'] <= HK_MAX_LON)
    ].copy()
    print(f"Total shops after bounding box filter (Hong Kong only): {len(df_hk_only)}")

    # Filter out chain stores
    print(f"Total HK-only shops before chain filter: {len(df_hk_only)}")
    chain_pattern = '|'.join([re.escape(name) for name in CHAIN_NAMES_TO_EXCLUDE])
    df_specialty = df_hk_only[~df_hk_only['Name'].str.contains(chain_pattern, case=False, na=False)].copy()
    print(f"Total specialty coffee shops (after chain filter): {len(df_specialty)}")

    # Load District Boundary Data and Perform Spatial Join
    HK_DISTRICTS_GEOJSON_URL = 'hksar_18_district_boundary.json'
    print(f"\nLoading Hong Kong district boundaries from: {HK_DISTRICTS_GEOJSON_URL}")
    try:
        gdf_districts = geopandas.read_file(HK_DISTRICTS_GEOJSON_URL)
        gdf_districts = gdf_districts[['geometry', 'District']]
        print(f"Successfully loaded {len(gdf_districts)} district polygons.")
    except Exception as e:
        print(f"Error loading district GeoJSON: {e}")
        print("Please ensure 'HAD_18_Districts.json' (or your file name) is uploaded to Colab's Files tab.")
        gdf_districts = None # Set to None if loading fails

    if gdf_districts is not None:
        df_specialty_gdf = geopandas.GeoDataFrame(
            df_specialty,
            geometry=geopandas.points_from_xy(df_specialty.Longitude, df_specialty.Latitude),
            crs="EPSG:4326"
        )

        print("\nPerforming spatial join to assign accurate districts to coffee shops...")
        df_final = geopandas.sjoin(
            df_specialty_gdf,
            gdf_districts,
            how="left",
            predicate="within"
        )

        df_final.rename(columns={'District': 'Actual_District_from_Spatial_Join'}, inplace=True)
        df_final['Actual_District_from_Spatial_Join'] = df_final['Actual_District_from_Spatial_Join'].fillna('Unknown/Outside Boundary')

        df_final.drop(columns=['index_right'], inplace=True)

        print("\nSpatial join complete. Preview of data with accurate district:")
        print(df_final[['Name', 'District_Queried', 'Actual_District_from_Spatial_Join', 'Rating', 'Total Reviews']].head())
    else:
        print("Skipping spatial join due to GeoJSON loading error. Output will not have accurate district assignments.")
        df_final = df_specialty.copy()
        df_final['Actual_District_from_Spatial_Join'] = 'Spatial Join Failed'

#### to delete
    # --- Calculate Bayesian Average (apply to the final DataFrame) ---
    df_final['Rating'] = pd.to_numeric(df_final['Rating'], errors='coerce').fillna(df_final['Rating'].mean())
    df_final['Total Reviews'] = pd.to_numeric(df_final['Total Reviews'], errors='coerce').fillna(0).astype(int)

    C = df_final['Rating'].mean()
    m = 100 # Bayesian 'm' value
    print(f"\nCalculating Bayesian Average with m = {m}")
    df_final['Bayesian_Weighted_Rating'] = df_final.apply(lambda row: (row['Total Reviews'] * row['Rating'] + m * C) / (row['Total Reviews'] + m) if row['Total Reviews'] > 0 else C, axis=1)
### to delete

    output_filename = 'hong_kong_specialty_coffee_shops_final.csv'
    if 'geometry' in df_final.columns:
        df_final.drop(columns=['geometry'], inplace=True)

    df_final.to_csv(output_filename, index=False, encoding='utf-8')

    print(f"\n--- Data Collection and Filtering Complete ---")
    print(f"Final number of specialty coffee shops in Hong Kong: {len(df_final)}")
    print(f"Data saved to {output_filename}")
    print("\nHere's a preview of the final, filtered data:")
    print(df_final.head())



Starting comprehensive search for coffee shops across all 18 districts of Hong Kong...

--- Searching in: Central and Western District, Hong Kong Island, Hong Kong ---
  Fetching page 1 for Central and Western District, Hong Kong Island, Hong Kong...
  Fetching page 2 for Central and Western District, Hong Kong Island, Hong Kong...
  Fetching page 3 for Central and Western District, Hong Kong Island, Hong Kong...

--- Searching in: Eastern District, Hong Kong Island, Hong Kong ---
  Fetching page 1 for Eastern District, Hong Kong Island, Hong Kong...
  Fetching page 2 for Eastern District, Hong Kong Island, Hong Kong...
  Fetching page 3 for Eastern District, Hong Kong Island, Hong Kong...

--- Searching in: Southern District, Hong Kong Island, Hong Kong ---
  Fetching page 1 for Southern District, Hong Kong Island, Hong Kong...
  Fetching page 2 for Southern District, Hong Kong Island, Hong Kong...
  Fetching page 3 for Southern District, Hong Kong Island, Hong Kong...

--- Searching 

NameError: name 'MarkerCluster' is not defined

In [None]:
import pandas as pd

# Step 3: Brewing a Fair Ranking - The Bayesian Average
df_coffeeshops = pd.read_csv('hong_kong_coffee_shops_comprehensive.csv')

# Bayesian Average
# Calculate C (mean rating of all specialty coffee shops)
C = df_coffeeshops['Rating'].mean()
print(f"Overall average rating (C): {C:.2f}")

# Define m (minimum reviews threshold)
m = 100

# Apply the Bayesian Average formula
def bayesian_average(row, C, m):
    v = row['Total Reviews']
    R = row['Rating']
    return (v * R + m * C) / (v + m)

df_coffeeshops['Bayesian_Weighted_Rating'] = df_coffeeshops.apply(bayesian_average, axis=1, C=C, m=m)

print(f"\nTop 10 Coffee Shops by Bayesian Weighted Rating (m={m}):")
print(df_coffeeshops[['Name', 'Rating', 'Total Reviews', 'Bayesian_Weighted_Rating']].sort_values(by='Bayesian_Weighted_Rating', ascending=False).head(10))
print("\n---")

# Optional: Export the updated DataFrame with new scores ---
output_filename_with_scores = 'hong_kong_specialty_coffee_shops_with_scores.csv'
df_coffeeshops.to_csv(output_filename_with_scores, index=False, encoding='utf-8')
print(f"\nData with new scores saved to {output_filename_with_scores}")

Original Data Head (with filled NaNs):
                              Name  Rating  Total Reviews
0    Chart Coffee (Central Market)     4.8            440
1  Hazel & Hershey Coffee Roasters     4.2            474
2                      Nook Coffee     4.9             79
3                   Halfway Coffee     4.5           1082
4       Islet Coffee Lab (Central)     4.8             65

---
Overall average rating (C): 4.15

Top 10 Coffee Shops by Bayesian Weighted Rating (m=100):
                                        Name  Rating  Total Reviews  \
63   Happy Kitchen Cafe & Bar by German Pool     4.9            841   
439  sensory ZERO [ Kwai Chung | The Field ]     5.0            355   
0              Chart Coffee (Central Market)     4.8            440   
684                   Nekos Cube 方塊貓 | 貓cafe     4.9            217   
596                      Quality Life Coffee     4.9            136   
104                      Black Cherry Coffee     4.8            180   
438                 

In [4]:
# Step 4: The Map Unfolds - Visualizing Hong Kong's Coffee Landscape
# Top 10 Coffee Shops and Average Rating by District

import pandas as pd
import folium
from folium.plugins import MarkerCluster
import numpy as np # For handling potential NaN values in rating/reviews
import re # For the chain filtering if you reload comprehensive data

input_filename = 'hong_kong_coffee_shops_final.csv'


# Load Data
try:
    df_coffeeshops = pd.read_csv(input_filename)
    print(f"Successfully loaded {len(df_coffeeshops)} coffee shops from {input_filename}")
except FileNotFoundError:
    print(f"Error: {input_filename} not found. Please make sure the CSV file is in the correct directory.")
    print("If you just ran the data collection code, remember to download the CSV from Colab's Files tab and then upload it, or run the full data collection script first.")
    # Exit or handle gracefully if file not found
    exit()


# Generate Top 10 Coffee Shops
print("\n--- Top 10 Specialty Coffee Shops by Bayesian Weighted Rating) ---")
top_10_shops = df_coffeeshops.sort_values(by='Bayesian_Weighted_Rating', ascending=False).head(10)
# Select only relevant columns for display
top_10_display = top_10_shops[['Name', 'Actual_District_from_Spatial_Join', 'Rating', 'Total Reviews', 'Bayesian_Weighted_Rating']]
top_10_display['Bayesian_Weighted_Rating'] = top_10_display['Bayesian_Weighted_Rating'].round(2)
print(top_10_display.to_string(index=False)) # .to_string() for better console formatting

# Styling the Top 10 Table
styled_top_10_table = top_10_display.style \
    .background_gradient(subset=['Bayesian_Weighted_Rating'], cmap='Blues') \
    .format({
        'Rating': "{:.2f}",
        'Total Reviews': "{:,.0f}", # Format as integer with comma separator
        'Bayesian_Weighted_Rating': "{:.2f}"
    }) \
    .set_caption("Top 10 Specialty Coffee Shops in Hong Kong") \
    .set_properties(**{'font-size': '11pt', 'text-align': 'center'})

# Display the styled table
display(styled_top_10_table)


# Calculate Average Scores by District
print("\n--- Average Bayesian Score and Total Shops by District ---")
# Ensure 'Actual_District_from_Spatial_Join' exists and is used for grouping
if 'Actual_District_from_Spatial_Join' in df_coffeeshops.columns:
    avg_scores_by_district = df_coffeeshops.groupby('Actual_District_from_Spatial_Join').agg(
        Average_Bayesian_Score=('Bayesian_Weighted_Rating', 'mean'), # Calculate mean of Bayesian score
        Average_Rating=('Rating', 'mean'), # Calculate mean of rating
        Number_Of_Coffee_Shops=('Name', 'count'), # Count the number of shops (using 'Name' column as a proxy for rows)
        Average_Total_Reviews=('Total Reviews', lambda x: round(x.mean())) # Calculate mean of Total Reviews for context
    ).sort_values(by='Average_Bayesian_Score', ascending=False) # Sort by the average Bayesian score

    avg_scores_by_district = avg_scores_by_district.round(2) #round to 2 decimal places


# Styling the District Averages Table
    styled_district_table = avg_scores_by_district.style \
        .background_gradient(subset=['Average_Bayesian_Score'], cmap='Blues') \
        .background_gradient(subset=['Average_Rating'], cmap='Blues') \
        .format({
            'Average_Bayesian_Score': "{:.2f}",
            'Average_Rating': "{:.2f}",
            'Number_Of_Coffee_Shops': "{:,.0f}",
            'Average_Total_Reviews': "{:,.0f}"
        }) \
        .set_caption("Average Coffee Shop Performance by District") \
        .set_properties(**{'font-size': '11pt', 'text-align': 'center'})

    # Display the styled table
    display(styled_district_table)


Successfully loaded 563 coffee shops from hong_kong_coffee_shops_final.csv

--- Top 10 Specialty Coffee Shops by Bayesian Weighted Rating) ---
                         Name Actual_District_from_Spatial_Join  Rating  Total Reviews  Bayesian_Weighted_Rating
Chart Coffee (Central Market)                 Central & Western     4.8            440                      4.69
           Chart Coffee (KCC)                        Kwai Tsing     5.0             93                      4.59
          Black Cherry Coffee                          Southern     4.8            180                      4.59
       The Missing Piece Cafe                 Central & Western     4.7            271                      4.57
               Halfway Coffee                 Central & Western     4.6           1086                      4.57
     Island Table Grocer Cafe                           Islands     4.7            263                      4.56
   Blend & Grind Kennedy Town                 Central & Western   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_10_display['Bayesian_Weighted_Rating'] = top_10_display['Bayesian_Weighted_Rating'].round(2)


Unnamed: 0,Name,Actual_District_from_Spatial_Join,Rating,Total Reviews,Bayesian_Weighted_Rating
0,Chart Coffee (Central Market),Central & Western,4.8,440,4.69
1,Chart Coffee (KCC),Kwai Tsing,5.0,93,4.59
2,Black Cherry Coffee,Southern,4.8,180,4.59
3,The Missing Piece Cafe,Central & Western,4.7,271,4.57
4,Halfway Coffee,Central & Western,4.6,1086,4.57
5,Island Table Grocer Cafe,Islands,4.7,263,4.56
6,Blend & Grind Kennedy Town,Central & Western,4.6,685,4.55
7,HIKARI Coffee,Tsuen Wan,4.6,615,4.55
8,Blend & Grind Mid-Levels,Central & Western,4.6,491,4.53
9,Good Day Coffee Co.,Yau Tsim Mong,4.8,122,4.53



--- Average Bayesian Score and Total Shops by District ---


Unnamed: 0_level_0,Average_Bayesian_Score,Average_Rating,Number_Of_Coffee_Shops,Average_Total_Reviews
Actual_District_from_Spatial_Join,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Southern,4.42,4.55,4,200
Central & Western,4.33,4.42,55,266
Eastern,4.32,4.53,15,64
Wan Chai,4.29,4.4,51,172
Yau Tsim Mong,4.25,4.3,54,258
Islands,4.23,4.26,17,78
Sham Shui Po,4.23,4.34,49,132
Kwun Tong,4.23,4.39,50,100
Tsuen Wan,4.23,4.32,35,129
Sai Kung,4.22,4.18,38,146


In [None]:
# Step 4: The Map Unfolds - Visualizing Hong Kong's Coffee Landscape (cont'd)
# Coffee Shop Density by District

# Calculate and Style Coffee Shop Density by District (using new population data)
print("\n--- Calculating Coffee Shop Density by District (by Area and Population) ---")

# Load the population density data
df_pop_density = pd.read_csv('Hong_Kong_Population_Density.csv')

# Get the count of coffee shops per district from our processed data
shop_counts = df_coffeeshops.groupby('Actual_District_from_Spatial_Join').agg(
    Number_Of_Coffee_Shops=('Name', 'count')
).reset_index() # Convert index back to a column for merging
shop_counts.rename(columns={'Actual_District_from_Spatial_Join': 'District'}, inplace=True) # Rename for merging

# Merge shop counts with population and area data
density_data = shop_counts.merge(
    df_pop_density[['District', 'Total_population', 'Area']], # Select relevant columns from pop_density
    on='District',
    how='left'
)

# Handle potential NaNs if a district from coffee shops isn't in population data
density_data[['Total_population', 'Area']] = density_data[['Total_population', 'Area']].fillna(0)


# --- DENSITY CALCULATIONS ---
# Calculate: 1 coffee shop per how many people / per how many sq km

# Handle cases where Number_Of_Coffee_Shops is 0 to avoid division by zero
density_data['People_Per_Coffee_Shop'] = density_data.apply(
    lambda row: row['Total_population'] / row['Number_Of_Coffee_Shops']
    if row['Number_Of_Coffee_Shops'] > 0 else np.nan, axis=1 # Use NaN if no shops
)

density_data['Sq_Km_Per_Coffee_Shop'] = density_data.apply(
    lambda row: row['Area'] / row['Number_Of_Coffee_Shops']
    if row['Number_Of_Coffee_Shops'] > 0 else np.nan, axis=1 # Use NaN if no shops
)


density_results_people = density_data[['District','People_Per_Coffee_Shop','Number_Of_Coffee_Shops','Total_population']]
density_results_people_sorted = density_results_people.sort_values(by='People_Per_Coffee_Shop', ascending=True).copy()

density_results_area = density_data[['District','Sq_Km_Per_Coffee_Shop','Number_Of_Coffee_Shops','Area']]
density_results_area_sorted = density_results_area.sort_values(by='Sq_Km_Per_Coffee_Shop', ascending=True).copy()

# --- Styling the Density Table ---
styled_density_table_people = density_results_people_sorted.style \
    .background_gradient(subset=['People_Per_Coffee_Shop'], cmap='Blues_r') \
    .format({
        'Number_Of_Coffee_Shops': "{:,.0f}",
        'Total_population': "{:,.0f}", # Population as integer with comma
        'People_Per_Coffee_Shop': "{:,.0f}", # People per coffee shop (rounded to whole person)
    }) \
    .set_caption("Coffee Shop Density by District (People per Shop)") \
    .set_properties(**{'font-size': '11pt', 'text-align': 'center'})

styled_density_table_area = density_results_area_sorted.style \
    .background_gradient(subset=['Sq_Km_Per_Coffee_Shop'], cmap='Blues_r') \
    .format({
        'Number_Of_Coffee_Shops': "{:,.0f}",
        'Area': "{:,.2f}", # Area in km2 with 2 decimal places
        'Sq_Km_Per_Coffee_Shop': "{:,.2f}" # Sq Km per coffee shop (2 decimal places)
    }) \
    .set_caption("Coffee Shop Density by District (Area per Shop)") \
    .set_properties(**{'font-size': '11pt', 'text-align': 'center'})

print("\nCoffee Shop Density Table:")
display(styled_density_table_area)
display(styled_density_table_people)


--- Calculating Coffee Shop Density by District (by Area and Population) ---

Coffee Shop Density Table:


Unnamed: 0,District,Sq_Km_Per_Coffee_Shop,Number_Of_Coffee_Shops,Area
16,Yau Tsim Mong,0.13,54,6.99
9,Sham Shui Po,0.19,49,9.36
14,Wan Chai,0.21,51,10.56
5,Kwun Tong,0.23,50,11.28
0,Central & Western,0.23,55,12.55
3,Kowloon City,0.4,25,10.02
15,Wong Tai Sin,1.03,9,9.3
1,Eastern,1.2,15,17.99
4,Kwai Tsing,1.3,18,23.34
12,Tsuen Wan,1.77,35,61.94


Unnamed: 0,District,People_Per_Coffee_Shop,Number_Of_Coffee_Shops,Total_population
14,Wan Chai,3269,51,166695
0,Central & Western,4290,55,235953
16,Yau Tsim Mong,5753,54,310647
9,Sham Shui Po,8798,49,431090
12,Tsuen Wan,9146,35,320094
2,Islands,10899,17,185282
7,Sai Kung,12869,38,489037
5,Kwun Tong,13463,50,673166
11,Tai Po,13760,23,316470
13,Tuen Mun,15840,32,506879


In [5]:
# Step 4: The Map Unfolds - Visualizing Hong Kong's Coffee Landscape
# Interactive map of HK specialty coffee shops

import folium
from folium.plugins import MarkerCluster
import pandas as pd

BAYESIAN_M_VALUE = 100 # Ensure this matches the 'm' used in your calculation

HK_CENTER_LAT = 22.3193
HK_CENTER_LON = 114.1694
INITIAL_MAP_ZOOM = 12

# --- Create the Interactive Map ---
print("\n--- Generating Interactive Map ---")

# Create the base map
m = folium.Map(location=[HK_CENTER_LAT, HK_CENTER_LON], zoom_start=INITIAL_MAP_ZOOM,
               tiles='CartoDB positron')

# Create a MarkerCluster for better visualization of many points
marker_cluster = MarkerCluster().add_to(m)

print("Adding coffee shop markers to the map...")
# Loop through the DataFrame to add markers
for index, row in df_coffeeshops.iterrows():
    lat = row['Latitude']
    lon = row['Longitude']
    name = row['Name']
    rating = row['Rating']
    total_reviews = row['Total Reviews']
    bayesian_score = row['Bayesian_Weighted_Rating']
    # Use the actual district from the cleaned data (using .get() for safety)
    actual_district = row.get('Actual_District_from_Spatial_Join', 'N/A')

    # Create popup text
    popup_text = f"""
    <b>{name}</b><br>
    **Actual District:** {actual_district}<br>
    Rating: {rating:.1f} ({total_reviews} reviews)<br>
    Bayes Score (m={BAYESIAN_M_VALUE}): {bayesian_score:.2f}<br>
    Address: {row['Address']}
    """

    # Customize marker color based on Bayesian Score
    if bayesian_score >= 4.5:
        marker_color = 'green'
    elif bayesian_score >= 4.0:
        marker_color = 'blue'
    elif bayesian_score >= 3.5:
        marker_color = 'orange'
    else:
        marker_color = 'red'

    folium.Marker(
        location=[lat, lon],
        popup=folium.Popup(popup_text, max_width=300),
        tooltip=f"{name} ({rating:.1f} stars in {actual_district})",
        icon=folium.Icon(color=marker_color, icon='coffee', prefix='fa')
    ).add_to(marker_cluster)

# Add a Layer Control to toggle districts on/off (optional, but good for interactive maps)
folium.LayerControl().add_to(m)

print("\nMap created. Displaying map in Colab output.")
# Display the map in Colab
m


--- Generating Interactive Map ---
Adding coffee shop markers to the map...

Map created. Displaying map in Colab output.


In [3]:
# Save the map as an HTML file
map_filename = 'hong_kong_coffee_shops_map.html'
m.save(map_filename)
print(f"Map saved to {map_filename}")

Map saved to hong_kong_coffee_shops_map.html
