<a href="https://colab.research.google.com/github/yadavrishikesh/BayesNF/blob/main/EcoCounter_DailyBike_DataAnalysis_2021-2022.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install necessary packages
!pip install -q bayesnf cartopy contextily geopandas

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.0/56.0 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.7/11.7 MB[0m [31m102.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.6/323.6 kB[0m [31m24.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.9/23.9 MB[0m [31m77.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.2/9.2 MB[0m [31m114.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.5/46.5 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m77.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.4/125.4 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import warnings
warnings.simplefilter('ignore')

import contextily as ctx
import geopandas as gpd
import jax
import numpy as np
import pandas as pd
import os
import datetime
from urllib.parse import urlparse
from bayesnf.spatiotemporal import BayesianNeuralFieldMAP

In [3]:
def run_model(train_url, test_url):
    """
    Trains a Bayesian Neural Field MAP model on specified train data and tests on test data.

    Parameters:
    - train_url (str): URL to the training dataset CSV file.
    - test_url (str): URL to the test dataset CSV file.

    Returns:
    - predictions_df (DataFrame): DataFrame containing true values, predicted values, and quantiles.
    """
    # Determine base directory from train_url and create unique outputs directory
    train_name = os.path.basename(urlparse(train_url).path).split('.')[0]
    test_name = os.path.basename(urlparse(test_url).path).split('.')[0]
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = os.path.join(os.getcwd(), 'outputs', f'{train_name}_{test_name}_{timestamp}')
    os.makedirs(output_dir, exist_ok=True)

    # Download and load datasets
    !wget -q {train_url} -O train_data.csv
    !wget -q {test_url} -O test_data.csv
    df_train = pd.read_csv('train_data.csv', index_col=0, parse_dates=['datetime'])
    df_test = pd.read_csv('test_data.csv', index_col=0, parse_dates=['datetime'])

    # Log-transform response in training data
    df_train_log = df_train.copy()
    df_train_log['response'] = np.log1p(df_train_log['response'])

    # Define and train the Bayesian Neural Field model
    model = BayesianNeuralFieldMAP(
        width=512,
        depth=2,
        freq='D',
        seasonality_periods=['W'],
        num_seasonal_harmonics=[2],
        feature_cols=['datetime', 'lat', 'lon', 'elev', 'walkscore', 'num_ppo', 'temp', 'precp.dummy', 'weeknd.dummy', 'year.dummy-2022',
                      'visbl', 'wsp'],
        target_col='response',
        observation_model='NORMAL',
        timetype='index'
    )

    model = model.fit(df_train_log,
                      seed=jax.random.PRNGKey(0),
                      ensemble_size=32,
                      num_epochs=2500)

    # Process test data
    df_test_cleaned = df_test.dropna(subset=['response'])
    df_test_cleaned_log = df_test_cleaned.copy()
    df_test_cleaned_log['response'] = np.log1p(df_test_cleaned_log['response'])

    # Make predictions
    yhat, yhat_quantiles = model.predict(df_test_cleaned, quantiles=(0.025, 0.5, 0.975))

    # Compile results into DataFrame
    predictions_df = pd.DataFrame({
        'datetime': df_test_cleaned.index,
        'True_Values': df_test_cleaned_log['response'],
        'Predicted_Mean': yhat_quantiles[1],
        'Quantile_0.025': yhat_quantiles[0],
        'Quantile_0.975': yhat_quantiles[2]
    })

    # Save output in the unique outputs directory
    output_path = os.path.join(output_dir, 'predictions_output.csv')
    predictions_df.to_csv(output_path, index=False)

    return predictions_df


In [4]:
# spatial interpolation at 50 %
train_data_url = 'https://raw.githubusercontent.com/yadavrishikesh/BayesNF/main/data/train_data_Eco_2021-2022_pred-type_spatIntpl_miss_prop_50.csv'
test_data_url = 'https://raw.githubusercontent.com/yadavrishikesh/BayesNF/main/data/test_data_Eco_2021-2022_pred-type_spatIntpl_miss_prop_50.csv'
predictions_df = run_model(train_data_url, test_data_url)
predictions_df.head()

Unnamed: 0,datetime,True_Values,Predicted_Mean,Quantile_0.025,Quantile_0.975
1,1,5.762051,6.000031,4.737728,7.256806
2,2,5.978886,6.018563,4.667389,7.183027
3,3,7.295735,7.530823,6.318292,8.421696
4,4,7.662468,7.791656,6.519702,8.653493
5,5,7.427144,7.66347,6.628036,8.44629


In [5]:
# spatial interpolation at 60% missing
train_data_url = 'https://raw.githubusercontent.com/yadavrishikesh/BayesNF/main/data/train_data_Eco_2021-2022_pred-type_spatIntpl_miss_prop_60.csv'
test_data_url = 'https://raw.githubusercontent.com/yadavrishikesh/BayesNF/main/data/test_data_Eco_2021-2022_pred-type_spatIntpl_miss_prop_60.csv'
predictions_df = run_model(train_data_url, test_data_url)
predictions_df.head()

Unnamed: 0,datetime,True_Values,Predicted_Mean,Quantile_0.025,Quantile_0.975
1,1,5.762051,6.143446,4.372943,7.2054
2,2,5.978886,6.111694,4.415033,7.161429
3,3,7.295735,7.598288,6.37319,8.48627
4,4,7.662468,7.835866,6.79361,8.741181
5,5,7.427144,7.701569,6.627741,8.590141


In [6]:
# spatial interpolation at 80% missing
train_data_url = 'https://raw.githubusercontent.com/yadavrishikesh/BayesNF/main/data/train_data_Eco_2021-2022_pred-type_spatIntpl_miss_prop_80.csv'
test_data_url = 'https://raw.githubusercontent.com/yadavrishikesh/BayesNF/main/data/test_data_Eco_2021-2022_pred-type_spatIntpl_miss_prop_80.csv'
predictions_df = run_model(train_data_url, test_data_url)
predictions_df.head()

Unnamed: 0,datetime,True_Values,Predicted_Mean,Quantile_0.025,Quantile_0.975
1,1,5.762051,6.054533,4.487356,7.195521
2,2,5.978886,6.071364,4.446388,7.135068
3,3,7.295735,7.519632,6.123805,8.741311
4,4,7.662468,7.655693,6.526039,8.989957
5,5,7.427144,7.583431,6.395247,8.7461


In [10]:
# Mount Google Drive to access it from Colab
#from google.colab import drive
#drive.mount('/content/drive')

# Copy the 'outputs' directory to Google Drive
!cp -r /content/outputs /content/drive/MyDrive/BayesNF/outputs_Eco_daily