# Demo: Calculating $q_K^{null}$ for comparison with the empirical patterns

This notebook demonstrates how the null model using the **Density-EPR (d-EPR)** framework, and the consequently null local alignment **$q_K^{null}$** is computed.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import os
from pathlib import Path

# Setup project root path
project_root = Path.cwd()
if 'notebooks' in str(project_root):
    project_root = project_root.parent

# Add src to path
src_path = str(project_root / 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Now import modules
import mobility_utils as mu
from k_visitation import calculate_both_k_places, calculate_qk_alignment

# Configuration
min_locations = 10
random_seed = 33

## Generating synthesised visitation based on d-EPR model

### Mock data generation
For privacy reasons, we are not able to share the raw mobile phone stays used for this part of the study. 
Therefore, we replaced the real data with a fully randomised data that mimic the data structure. This data is generated in the same format as the real mobility data, but all the attributes are randomly generated; it neither contains real locational data nor follows the empirically mobility pattern. 

We only use it to demonstrate the process of the d-EPR model.

In [None]:
# Generate
mock_city_grid, mock_user_home, mock_stays, amenity_cols = mu.generate_mock_data()
print(f"Generated data for {mock_stays['user_id'].nunique()} users and {len(mock_city_grid)} locations.")
mock_stays.head()

### Calibrating parameters for the d-EPR model

We use the visitation data to calibrate the exploration parameters for the d-EPR model. $\rho$ is user-specific: Each user is calibrated by themselves.

In [None]:
unique_users = mock_stays['user_id'].unique()
user_rho_dict = {}

for uid in unique_users:
    r = mu.calculate_user_specific_rho(mock_stays, uid)
    if not np.isnan(r):
        user_rho_dict[uid] = r

rhos = list(user_rho_dict.values())

### Simulation: d-EPR

This process generates mobility trajectories for each individuals, and we aggregate their visitations in the same format as the empirical data.

In [None]:
# Pick sample home grids to simulate
sample_grids = mock_user_home['home_gid9'].unique()[:5] 

all_synthetic_data = []

for grid in sample_grids:
    # Run simulation for one grid
    df_synth_grid = mu.simulate_depr_for_grid(
        home_gid9=grid,
        df_visitation=mock_stays,
        user_home_lookup=mock_user_home,
        city_grid=mock_city_grid,
        user_rho_dict=user_rho_dict,
        max_steps=200 # Short simulation for demo
    )
    all_synthetic_data.append(df_synth_grid)

# Combine results
df_synthetic = pd.concat(all_synthetic_data, ignore_index=True)
df_synthetic.head()

In [None]:
# Get home coordinates for each user
home_coords = mock_user_home.merge(
    mock_city_grid[['h3_index', 'lat', 'lng']], 
    left_on='home_gid9', 
    right_on='h3_index'
).rename(columns={'lat': 'home_lat', 'lng': 'home_lng'})[['user_id', 'home_lat', 'home_lng']]

# Merge home coordinates into synthetic data
df_synthetic = df_synthetic.merge(home_coords, on='user_id')

# Link back location details (coordinates and amenities)
# utilizing the grid to get lat/lng and amenity counts for the stay location
df_synthetic = df_synthetic.merge(
    mock_city_grid[['h3_index', 'lat', 'lng'] + amenity_cols],
    left_on='stay_gid10',
    right_on='h3_index',
    how='left'
).rename(columns={'lat': 'stay_lat', 'lng': 'stay_lng'})

# Calculate distance from home for each synthetic visit (Vectorized)
df_synthetic['home_dist'] = mu.haversine_distance_vectorized(
    df_synthetic['home_lat'].values, 
    df_synthetic['home_lng'].values, 
    df_synthetic['stay_lat'].values, 
    df_synthetic['stay_lng'].values
)

# Drop temporary columns (keep stay_gid10 and amenity columns)
df_synthetic = df_synthetic.drop(columns=['home_lat', 'home_lng', 'stay_lat', 'stay_lng', 'h3_index'])

df_synthetic.rename(columns={'visit_freq_synth': 'visit_freq'}, inplace=True)

df_synthetic.head()

## K-visitation and $q_K$ on null model

In [None]:
amenity_list = [
    'CIVIC_RELIGION', 'CULTURE', 'DINING', 'EDUCATION', 'FITNESS', 
    'GROCERIES', 'HEALTHCARE', 'RETAIL', 'SERVICE', 'TRANSPORT'
]

smallest_values = np.ones(len(amenity_list), dtype=int)

# Calculate K-freq and K-dist for all users
places_k_null = calculate_both_k_places(df_synthetic, amenity_list, smallest_values)

# Calculate qK alignment for
user_qk = calculate_qk_alignment(
    places_df=places_k_null,
    user_col='user_id', 
    k_freq_col='k_freq',
    k_dist_col='k_dist'
)

print(user_qk.head())