In [14]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Load UHI dataset
uhi_df = pd.read_csv("../data/raw/UHI_data.csv")

# Convert UHI to GeoDataFrame using Longitude & Latitude
uhi_gdf = gpd.GeoDataFrame(
    uhi_df, 
    geometry=gpd.points_from_xy(uhi_df["Longitude"], uhi_df["Latitude"]),
    crs="EPSG:4326")

# Load Weather Data


weather_df = pd.read_csv("../data/processed/weather_data.csv")
weather_df.rename(columns={"Date / Time": "datetime"}, inplace=True)
weather_df["datetime"] = weather_df["datetime"].str.replace(r" EDT", "", regex=True)
weather_df["datetime"] = pd.to_datetime(weather_df["datetime"], format="%d-%m-%Y %H:%M", errors="coerce", dayfirst=True)

# Load Building Footprints (GeoJSON / KML)
building_gdf = gpd.read_file("../data/processed/building_footprints.csv")

print("✅ Converted both datasets to GeoDataFrames.")

# Convert Building Footprints to GeoDataFrame
building_gdf = gpd.GeoDataFrame(building_df, geometry=gpd.GeoSeries.from_wkt(building_df["geometry"]), crs="EPSG:4326")

# Ensure correct GeoDataFrame conversion
print("✅ Converted Building Footprints to GeoDataFrame:", building_gdf.geometry.head())

# Perform spatial join: Assign each UHI location a building polygon
uhi_gdf = gpd.sjoin(uhi_gdf, building_gdf, how="left", predicate="within")

# Assign unique IDs to buildings
uhi_gdf["location_id"] = uhi_gdf["index_right"].astype(str)
print("✅ Spatial join completed. Assigned buildings to UHI locations.")

# Drop unnecessary columns
uhi_gdf.drop(columns=["index_right", "geometry", "Longitude", "Latitude"], errors="ignore", inplace=True)

# Ensure datetime is in correct format
uhi_gdf["datetime"] = pd.to_datetime(uhi_gdf["datetime"], format="%d-%m-%Y %H:%M", errors="coerce", dayfirst=True)
weather_df["datetime"] = pd.to_datetime(weather_df["datetime"], errors="coerce")

# Merge UHI Data with Weather Data
uhi_gdf = uhi_gdf.merge(weather_df, on="datetime", how="left")
print("✅ Merged UHI data with Weather data.")

# Extract Time Features
uhi_gdf['hour'] = uhi_gdf['datetime'].dt.hour
uhi_gdf['day'] = uhi_gdf['datetime'].dt.day
uhi_gdf['month'] = uhi_gdf['datetime'].dt.month
uhi_gdf['weekday'] = uhi_gdf['datetime'].dt.weekday
print("✅ Extracted time-based features.")

# Fill missing values
uhi_gdf.ffill(inplace=True)
print("✅ Missing values handled.")

# Save processed UHI data with location_id
uhi_gdf.to_csv("../data/processed/UHI_features.csv", index=False)
print("✅ Feature Engineering Completed. Saved `UHI_features.csv` with location_id.")


✅ Converted both datasets to GeoDataFrames.
✅ Converted Building Footprints to GeoDataFrame: 0    MULTIPOLYGON (((-73.91903 40.8482, -73.91933 4...
1    MULTIPOLYGON (((-73.92195 40.84963, -73.92191 ...
2    MULTIPOLYGON (((-73.9205 40.85011, -73.92045 4...
3    MULTIPOLYGON (((-73.92056 40.8514, -73.92053 4...
4    MULTIPOLYGON (((-73.91234 40.85218, -73.91247 ...
Name: geometry, dtype: geometry
✅ Spatial join completed. Assigned buildings to UHI locations.
✅ Merged UHI data with Weather data.
✅ Extracted time-based features.
✅ Missing values handled.
✅ Feature Engineering Completed. Saved `UHI_features.csv` with location_id.
