In [25]:
import pandas as pd

In [26]:
df = pd.read_csv('../../../data/processed/realestates_kh_v7.csv')

In [29]:
df = df[df['price_per_m2'] >= 100]


In [32]:
import folium
from folium.plugins import MarkerCluster
import matplotlib
import matplotlib.pyplot as plt

# Assign a unique color to each h_id
unique_hids = df['h_id'].unique()
cmap = plt.get_cmap('tab20', len(unique_hids))  # or use another qualitative colormap
h_id_to_color = {hid: matplotlib.colors.rgb2hex(cmap(i)) for i, hid in enumerate(unique_hids)}

# Center map
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)
# ...existing code...

for _, row in df.iterrows():
    color = h_id_to_color[row['h_id']]

    popup_html = f"""
    <b>id:</b> {row['id']}<br>
    <b>h_id:</b> {row['h_id']}<br>
    <b>Address:</b> {row['address_line_2']}<br>
    <b>Property Type:</b> {row['category_name']}<br>
    <b>Bedrooms:</b> {row['bedrooms']}<br>
    <b>Area:</b> {row['land_area']:.0f} m²<br>
    <b>location:</b> {row['latitude']},{row['longitude']}<br>
    <b>Price/m²:</b> ${row['price_per_m2']:.2f}
    """

    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=4,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        popup=folium.Popup(popup_html, max_width=300)
    ).add_to(m)

m.save('realestate_price_map_more_info.html')

In [33]:
# Calculate outliers for price_per_m2 in each h_id group using IQR
outlier_dict = {}

for h_id, group in df.groupby('h_id'):
    q1 = group['price_per_m2'].quantile(0.25)
    q3 = group['price_per_m2'].quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    outliers = group[(group['price_per_m2'] < lower_bound) | (group['price_per_m2'] > upper_bound)]
    outlier_dict[h_id] = outliers

# Example: print number of outliers for each h_id
for h_id, outliers in outlier_dict.items():
    print(f"h_id: {h_id}, outliers: {len(outliers)}")

h_id: 886580d243fffff, outliers: 0
h_id: 886580d28dfffff, outliers: 0
h_id: 886580d2d9fffff, outliers: 0
h_id: 886580d369fffff, outliers: 0
h_id: 8865846001fffff, outliers: 0
h_id: 8865846005fffff, outliers: 0
h_id: 886584600bfffff, outliers: 0
h_id: 886584600dfffff, outliers: 0
h_id: 8865846019fffff, outliers: 0
h_id: 8865846027fffff, outliers: 0
h_id: 8865846029fffff, outliers: 0
h_id: 886584602dfffff, outliers: 0
h_id: 886584603bfffff, outliers: 0
h_id: 8865846067fffff, outliers: 0
h_id: 8865846085fffff, outliers: 0
h_id: 88658460a9fffff, outliers: 0
h_id: 88658460adfffff, outliers: 0
h_id: 88658460c5fffff, outliers: 0
h_id: 88658460e5fffff, outliers: 0
h_id: 8865846101fffff, outliers: 0
h_id: 8865846105fffff, outliers: 0
h_id: 8865846107fffff, outliers: 0
h_id: 886584610bfffff, outliers: 1
h_id: 886584610dfffff, outliers: 0
h_id: 8865846113fffff, outliers: 0
h_id: 886584611bfffff, outliers: 2
h_id: 8865846121fffff, outliers: 0
h_id: 8865846123fffff, outliers: 0
h_id: 8865846125ffff