
*Technical University of Munich<br>
Professorship of Environmental Sensing and Modeling<br><br>*
**Author:**  Daniel Kühbacher<br>
**Date:**  15.11.2023

--- 

# Exploring the counting data

<!--Notebook description and usage information-->


In [1]:
import sys

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn as sns

from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()

# import custom modules
sys.path.append('../utils')
from excel_calendar import Calendar
import data_paths


# Import and prepare datasets

In [2]:
# import counting data
mst_file_path = data_paths.MST_COUNTING_PATH + 'preprocessed_lhm_counting_data.parquet'
bast_file_path = data_paths.BAST_COUNTING_PATH + 'preprocessed_bast_counting_data.parquet'

counting_data = pd.concat([pd.read_parquet(mst_file_path),
                           pd.read_parquet(bast_file_path)], axis=0)

# import visum_links data 
visum_file_path = data_paths.VISUM_FOLDER_PATH + 'visum_links.gpkg'
visum = gpd.read_file(visum_file_path)

# import calender
cal_obj = Calendar()

### Aggregate single detector counts and divide in "volume" and "speed" dataset

In [11]:
# aggregate and reduce to volume dataset
volume  = counting_data.groupby(['metric','road_link_id', 
                               'vehicle_class','date']).sum(numeric_only = True).loc['volume']
speed = counting_data.groupby(['metric','road_link_id', 
                               'vehicle_class','date']).mean(numeric_only = True).loc['speed']
volume = volume.drop(['detector_id'], axis =1)
volume = volume.reset_index()
speed = speed.reset_index()

### Append road type information

In [4]:
# insert road type
road_types = visum.set_index('road_link_id')['road_type'].to_dict()
volume.insert(4,'road_type' , volume['road_link_id'].map(road_types))

### Append day type information

In [9]:
dates = volume['date'].unique()
day_types = {date:cal_obj.get_day_type(date) for date in dates}
volume.insert(5, 'day_type', volume['date'].map(day_types))

# Cleaning of the counting data

In [179]:
# only take data into account if daily value >1 and < 40000

# sum of all hour values of the day needs to be consistent with the sum of the day -> error_bound e=2%
e = 0.02
volume_processed = volume[volume.iloc[:,2:].sum(axis=1).between(
    volume['daily_value']*(1-e),volume['daily_value']*(1+e))]

# how to remove the outliers?

In [196]:
road_link = 1.28950000e+04
#road_link = 5.62782535e+08
#road_link = 9.02000000e+02
#road_link = 1.23500000e+04
#road_link = 2723.0
#road_link = 1.82410000e+04

p = figure(x_axis_type="datetime", width=800, height=350)
source = ColumnDataSource(volume_processed.loc[road_link,'PC',:])
p.line('date', 'daily_value', source=source)
show(p)

In [184]:
volume_processed.reset_index()['road_link_id'].unique()

array([0.00000000e+00, 3.80000000e+01, 6.00000000e+01, 7.20000000e+01,
       4.19000000e+02, 7.92000000e+02, 8.10000000e+02, 9.02000000e+02,
       1.06800000e+03, 2.72300000e+03, 9.65500000e+03, 1.15550000e+04,
       1.21380000e+04, 1.22420000e+04, 1.23080000e+04, 1.23500000e+04,
       1.23700000e+04, 1.24180000e+04, 1.24960000e+04, 1.25040000e+04,
       1.26810000e+04, 1.28890000e+04, 1.28950000e+04, 1.29860000e+04,
       1.31490000e+04, 1.31760000e+04, 1.35870000e+04, 1.36820000e+04,
       1.55710000e+04, 1.60250000e+04, 1.61440000e+04, 1.61840000e+04,
       1.82410000e+04, 1.82620000e+04, 1.82840000e+04, 3.08840000e+04,
       3.57480000e+04, 4.67350000e+04, 4.68330000e+04, 4.71580000e+04,
       4.73910000e+04, 4.79850000e+04, 4.80420000e+04, 4.81160000e+04,
       4.81910000e+04, 4.81990000e+04, 4.82220000e+04, 8.02360000e+04,
       8.02390000e+04, 8.06090000e+04, 8.06450000e+04, 8.11020000e+04,
       5.27997340e+07, 5.28043160e+07, 5.28043390e+07, 5.28043410e+07,
      