In [1]:
import os

from database import data_utils

import contextily as cx
import geopandas
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import shapely.geometry

In [2]:
# Get traces from all data that was collected during a validation day
data = data_utils.combine_all_folder_data("../data/kcm_validation")
traces = data_utils.calculate_trace_df(data, 'file', 'tripid', 'locationtime', 'lat', 'lon', ['orientation','scheduledeviation','tripdistance','locationtime'])
# Calculate and remove speeds that are unreasonable
traces['speed_m_s'] = traces['tripdistance_diff'] / traces['locationtime_diff']
traces = traces.loc[traces['speed_m_s']>0]
traces = traces.loc[traces['speed_m_s']<35]
traces

Unnamed: 0,tripid,vehicleid,lat,lon,orientation,scheduledeviation,totaltripdistance,tripdistance,closeststop,nextstop,locationtime,collectedtime,file,orientation_diff,scheduledeviation_diff,tripdistance_diff,locationtime_diff,speed_m_s
1,347458161.0,7421.0,47.755872,-122.155458,294.0,0.0,20172.267567,512.141257,78000.0,78000.0,1.662559e+09,1.662559e+09,2022_09_07.pkl,24.0,0.0,503.751811,60.0,8.395864
2,347458161.0,7421.0,47.754482,-122.157012,180.0,0.0,20172.267567,804.312167,76000.0,76000.0,1.662559e+09,1.662559e+09,2022_09_07.pkl,-114.0,0.0,292.170910,35.0,8.347740
5,347458161.0,7421.0,47.754986,-122.155205,270.0,121.0,20172.267567,613.170059,78000.0,78000.0,1.662559e+09,1.662559e+09,2022_09_07.pkl,90.0,0.0,453.770592,54.0,8.403159
6,347458161.0,7421.0,47.754482,-122.155345,180.0,121.0,20172.267567,679.654270,78000.0,76000.0,1.662559e+09,1.662559e+09,2022_09_07.pkl,-90.0,0.0,66.484212,8.0,8.310526
7,347458161.0,7421.0,47.754482,-122.158791,180.0,110.0,20172.267567,937.280590,76000.0,76000.0,1.662559e+09,1.662559e+09,2022_09_07.pkl,0.0,-11.0,257.626320,37.0,6.962874
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273479,589919432.0,7234.0,47.784676,-122.298828,180.0,18.0,14144.174147,13038.784477,1695.0,1696.0,1.668272e+09,1.668272e+09,2022_11_12.pkl,0.0,10.0,89.681765,22.0,4.076444
273480,589919432.0,7234.0,47.784722,-122.303917,180.0,-10.0,14144.174147,13419.028095,1697.0,1698.0,1.668272e+09,1.668272e+09,2022_11_12.pkl,0.0,-28.0,380.243618,21.0,18.106839
273481,589919432.0,7234.0,47.784784,-122.308344,179.0,-25.0,14144.174147,13749.825384,1699.0,1699.0,1.668272e+09,1.668272e+09,2022_11_12.pkl,-1.0,-15.0,330.797289,32.0,10.337415
273482,589919432.0,7234.0,47.784813,-122.310631,179.0,-13.0,14144.174147,13920.729743,1699.0,85250.0,1.668272e+09,1.668272e+09,2022_11_12.pkl,0.0,12.0,170.904358,35.0,4.882982


In [3]:
# Map every trace to a network segment
with open('../data/processed_networks/kcm_segments.pkl', 'rb') as f:
    kcm_segments = pickle.load(f)
kcm_segments

Unnamed: 0,segment_id,geometry
0,47.1879807_-122.007378_47.1879807_-122.007454,"LINESTRING (-122.00738 47.18798, -122.00745 47..."
1,47.1879807_-122.007454_47.1879921_-122.007538,"LINESTRING (-122.00754 47.18799, -122.00745 47..."
2,47.1879921_-122.005493_47.1879807_-122.007378,"LINESTRING (-122.00549 47.18799, -122.00738 47..."
3,47.1879921_-122.007538_47.1880188_-122.007607,"LINESTRING (-122.00761 47.18802, -122.00754 47..."
4,47.1880188_-122.007607_47.1880722_-122.007774,"LINESTRING (-122.00777 47.18807, -122.00761 47..."
...,...,...
74289,47.8718987_-122.008453_47.8720436_-122.008316,"LINESTRING (-122.00845 47.87190, -122.00832 47..."
74290,47.8720436_-122.008316_47.8721886_-122.008179,"LINESTRING (-122.00818 47.87219, -122.00832 47..."
74291,47.8721886_-122.008179_47.8724899_-122.007881,"LINESTRING (-122.00788 47.87249, -122.00818 47..."
74292,47.8722839_-122.007362_47.8708878_-122.003891,"LINESTRING (-122.00389 47.87089, -122.00736 47..."
