# Property Orientation (Offline)

Compute each property's facing orientation using local roads data only.

- Inputs:
  - listings: `data/listings_domain.csv`
- Roads (required): `data/roads.gpkg`
- No API calls are used.

Output: one row per property with address, lat/lon, orientation degrees and 8-way cardinal direction.


In [54]:
# Configuration
from pathlib import Path

# Sample size 
SAMPLE_SIZE = 500 # None to process all

ROOT = Path('.')
LISTINGS_CSV = ROOT / 'data' / 'listings_domain.csv'
ROADS_GPKG = ROOT / 'data' / 'roads.gpkg'
OUTPUT_CSV = ROOT / 'output' / 'property_orientation.csv'

# Columns to retain from listings
KEEP_COLS = [
    'listing_id', 'weekly_rent', 'bond', 'bedrooms', 'bathrooms', 'car_spaces',
    'property_type_primary', 'property_type_secondary', 'address', 'suburb', 'state', 'postcode'
]

In [55]:
import math
from typing import Optional, Tuple, List

import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
import shapely
from shapely.strtree import STRtree


def normalize_bearing_degrees(degrees: float) -> float:
    value = degrees % 360.0
    if value < 0:
        value += 360.0
    return value


def segment_bearing_degrees(a_lon: float, a_lat: float, b_lon: float, b_lat: float) -> float:
    # Bearing from point A to B (0Â° = North, clockwise)
    d_lon = math.radians(b_lon - a_lon)
    lat1 = math.radians(a_lat)
    lat2 = math.radians(b_lat)
    x = math.sin(d_lon) * math.cos(lat2)
    y = math.cos(lat1) * math.sin(lat2) - math.sin(lat1) * math.cos(lat2) * math.cos(d_lon)
    bearing = math.degrees(math.atan2(x, y))
    return normalize_bearing_degrees(bearing)


def bearing_to_cardinal_8(degrees: float) -> str:
    dirs = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"]
    idx = int((degrees + 22.5) // 45) % 8
    return dirs[idx]


def explode_to_lines(geom: shapely.Geometry) -> List[LineString]:
    if geom is None:
        return []
    if isinstance(geom, LineString):
        return [geom]
    if geom.geom_type == "MultiLineString":
        return [ls for ls in geom.geoms if isinstance(ls, LineString)]
    return []


def _bounds_in_melbourne(b: Tuple[float, float, float, float]) -> bool:
    minx, miny, maxx, maxy = b  # lon_min, lat_min, lon_max, lat_max
    return (miny >= -38.8 and maxy <= -37.0 and minx >= 144.0 and maxx <= 146.2)


def _infer_to_wgs84(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    if gdf.crs:
        return gdf if gdf.crs.to_epsg() == 4326 else gdf.to_crs(4326)
    # Default to WGS84 if CRS is missing
    return gdf.set_crs(4326, allow_override=True)


def load_local_roads(roads_path: Path) -> Tuple[List[LineString], Optional[STRtree]]:
    if not roads_path.exists():
        raise FileNotFoundError(f"Roads gpkg not found: {roads_path}")
    roads_gdf = gpd.read_file(roads_path)
    if roads_gdf.empty:
        raise RuntimeError("Roads gpkg is empty")
    roads_gdf = _infer_to_wgs84(roads_gdf)
    line_geoms: List[LineString] = []
    for geom in roads_gdf.geometry:
        line_geoms.extend(explode_to_lines(geom))
    if not line_geoms:
        raise RuntimeError("No LineString geometries found in roads")
    tree = STRtree(line_geoms)
    return line_geoms, tree


def nearest_line_with_strtree(point: Point, lines: List[LineString], tree: Optional[STRtree]) -> Optional[LineString]:
    if tree is not None:
        best = tree.nearest(point)
        if isinstance(best, LineString):
            return best
    return min(lines, key=lambda g: point.distance(g))


def compute_orientation_from_local_roads(lat: float, lon: float, lines: List[LineString], tree: Optional[STRtree]) -> Tuple[Optional[float], Optional[str]]:
    pt = Point(lon, lat)
    line = nearest_line_with_strtree(pt, lines, tree)
    if line is None:
        return None, None
    proj_dist = line.project(pt)
    nearest_on_road = line.interpolate(proj_dist)
    facing = segment_bearing_degrees(nearest_on_road.x, nearest_on_road.y, pt.x, pt.y)
    cardinal = bearing_to_cardinal_8(facing)
    return facing, cardinal


def filter_melbourne_bbox(df: pd.DataFrame, lat_col: str, lon_col: str) -> pd.DataFrame:
    min_lat, max_lat = -38.3, -37.4
    min_lon, max_lon = 144.4, 145.6
    mask = (
        df[lat_col].astype(float).between(min_lat, max_lat)
        & df[lon_col].astype(float).between(min_lon, max_lon)
    )
    return df.loc[mask].copy()


In [56]:
# Load roads
lines, tree = load_local_roads(ROADS_GPKG)
len(lines)


173

In [57]:
# Helper to detect lat/lon columns

def find_lat_lon_columns(df: pd.DataFrame) -> Tuple[str, str]:
    cols = {c.lower(): c for c in df.columns}
    lat_candidates = ['lat', 'latitude', 'y']
    lon_candidates = ['lon', 'longitude', 'x']
    lat_col = next((cols[c] for c in lat_candidates if c in cols), None)
    lon_col = next((cols[c] for c in lon_candidates if c in cols), None)
    if not lat_col or not lon_col:
        raise ValueError(f"Could not detect lat/lon columns. Columns present: {list(df.columns)}")
    return lat_col, lon_col



In [58]:
# Load source points (listings only)
df = pd.read_csv(LISTINGS_CSV)
address_col = 'address'

lat_col, lon_col = find_lat_lon_columns(df)

# Filter Melbourne and drop NaNs
pts = df.dropna(subset=[lat_col, lon_col]).copy()
pts = filter_melbourne_bbox(pts, lat_col, lon_col)
if SAMPLE_SIZE:
    pts = pts.head(SAMPLE_SIZE).copy()

print('listings', len(pts))
pts[[address_col] if address_col in pts.columns else []].head(3)


listings 500


Unnamed: 0,address
0,802/6 Leicester Street St
1,5/30 Park Drive
2,10/1526 High Street


In [59]:
# Compute orientations
results = []
for i, row in pts.iterrows():
    lat = float(row[lat_col])
    lon = float(row[lon_col])
    deg, card = compute_orientation_from_local_roads(lat, lon, lines, tree)
    base = {
        'lat': lat,
        'lon': lon,
        'orientation_degrees': deg,
        'orientation_cardinal': card,
    }
    carry = {k: row[k] for k in KEEP_COLS if k in row.index}
    results.append({**carry, **base})

out = pd.DataFrame(results)
out.head()


Unnamed: 0,listing_id,weekly_rent,bond,bedrooms,bathrooms,car_spaces,property_type_primary,property_type_secondary,address,suburb,state,postcode,lat,lon,orientation_degrees,orientation_cardinal
0,10014450,550.0,,2.0,1.0,0.0,Apartment,Apartment / Unit / Flat,802/6 Leicester Street St,Carlton,VIC,3053.0,-37.806138,144.960287,229.911711,SW
1,10052357,420.0,1825.0,1.0,1.0,1.0,Apartment,Apartment / Unit / Flat,5/30 Park Drive,Parkville,VIC,3052.0,-37.796129,144.954425,230.007856,SW
2,10066970,440.0,1912.0,1.0,1.0,1.0,Apartment,Apartment / Unit / Flat,10/1526 High Street,Glen Iris,VIC,3146.0,-37.85926,145.048375,229.161081,SW
3,10105510,515.0,2238.0,2.0,1.0,1.0,Apartment,Apartment / Unit / Flat,3/38 Creswick Street,Hawthorn,VIC,3122.0,-37.818144,145.01598,229.591889,SW
4,10117843,400.0,1738.0,1.0,1.0,1.0,Apartment,Apartment / Unit / Flat,3/36 Clarke Street,Prahran,VIC,3181.0,-37.849243,145.000557,229.435169,SW


In [60]:
# Save + quick QA
OUTPUT_CSV.parent.mkdir(parents=True, exist_ok=True)
out.to_csv(OUTPUT_CSV, index=False)

print(f"Wrote {len(out)} rows -> {OUTPUT_CSV}")
print(out['orientation_cardinal'].value_counts(dropna=False).sort_index())


Wrote 500 rows -> output/property_orientation.csv
orientation_cardinal
SW    500
Name: count, dtype: int64
