In [1]:
import numpy as np
import pandas as pd
import h5py
import os
import gc
import time
from tqdm import tqdm_notebook as tqdm

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

pd.set_option('display.float_format', lambda x: '%.3f' % x)

  from ._conv import register_converters as _register_converters


In [2]:
SWIFT_folder = r'C:\Projects\SWIFT\SWIFT_Workspace\Scenarios'
SCEN = 'Scenario_2045_S12_Beta'
flat_skim_file = os.path.join(SWIFT_folder, SCEN, r'STM\STM_A\01_DynusT\03_Model\Vehicle_Skim.csv')

In [3]:
def read_trajectory_skim_file(flat_skim_file, chunksize=5_000_000):
    
    """
    
    Read the csv file generated from vehicle trajectory itf file
    
    """
    
    df_list = []
    
    dtypes = {
        'vid': np.int32, 
        'tag': np.int8,
        'origz': np.int16,
        'destz': np.int16,
        'class': np.uint8,
        'stime': np.float16,
        'travel_time': np.float16,
        'nodes': np.int32,
        'vehtype': np.uint8,
        'vot': np.float16,
        'toll': np.float16,
    }
    
    usecols = tuple(dtypes.keys())
    
    #  
    for df_chunk in tqdm(pd.read_csv(flat_skim_file, chunksize=chunksize, usecols=usecols, dtype=dtypes)):
    
#         expand_data(df_chunk)
        df_list.append(df_chunk) 
    
    df = pd.concat(df_list)
    del df_list
    return df

In [18]:
def read_vehicle_skim_file(flat_skim_file, chunksize=5_000_000):
    """
    
    Read the csv file generated from b03 trajectories
    
    """
    df_list = []
    
    dtypes = {
        'vid': np.int32, 
        'tag': np.int8,
        'uclass': np.int8,
        'origz': np.int16,
        'destz': np.int16,
        'start': np.float16,
        'end': np.float16,
        'delay': np.float16,
        'dist': np.float16,
        'nodes': np.int32,
        'node0': np.int32,
        'node1': np.int32,
        'vtyp': np.uint8,
        'totveh': np.int64,
        'purpose': np.uint8,
    }
    
    usecols = tuple(dtypes.keys())
    
    for df_chunk in tqdm(pd.read_csv(flat_skim_file, chunksize=chunksize, usecols=usecols, dtype=dtypes)):
        df_list.append(df_chunk) 
    
    df = pd.concat(df_list)
    del df_list
    return df

In [19]:
df = read_vehicle_skim_file(flat_skim_file)

A Jupyter Widget




In [7]:
df.columns

Index(['vid', 'tag', 'uclass', 'vtyp', 'origz', 'destz', 'nodes', 'node0',
       'node1', 'start', 'end', 'delay', 'dist', 'totveh', 'purpose'],
      dtype='object')

In [8]:
df.shape

(30804331, 15)

In [21]:
df.tail()

Unnamed: 0,vid,tag,uclass,vtyp,origz,destz,nodes,node0,node1,start,end,delay,dist,totveh,purpose
30804326,30804166,1,3,5,3662,3668,1,390038,364069,1438.0,1620.0,0.6,0.357,3568392492,20
30804327,30804184,1,3,6,3668,3662,2,390038,369558,1438.0,1620.0,0.7,5.742,3568392496,2
30804328,30804205,1,3,1,3932,4072,27,396120,362414,1438.0,1620.0,0.525,20.734,3568392504,20
30804329,30804240,1,3,1,4216,1104,7,430382,426224,1438.0,1620.0,0.0,2.906,3568392612,10
30804330,30804278,1,3,1,3669,3693,1,390038,364069,1439.0,1620.0,0.0,0.396,3568392640,20


In [23]:
df['travel_time'] = df.end - df.start

In [16]:
# which completed (tag == 2) trip has the longest travel time
df.loc[df.loc[df.tag == 2].travel_time.idxmax()]

vid              9654552.000
tag                    2.000
uclass                 3.000
vtyp                   1.000
origz               3668.000
destz               3662.000
nodes                 14.000
node0             390038.000
node1             364069.000
start                661.000
end                 1591.000
delay                661.500
dist                  27.578
totveh        -729470076.000
purpose               10.000
travel_time          930.000
Name: 30685567, dtype: float64

In [24]:
# for each vtyp, which trip has the longest travel time; Split-apply-combine paradigm
def longest_travel_time(g):
    return g.loc[g.loc[g.tag == 2].travel_time.idxmax()]

df.groupby('vtyp').apply(longest_travel_time)


Unnamed: 0_level_0,vid,tag,uclass,vtyp,origz,destz,nodes,node0,node1,start,end,delay,dist,totveh,purpose,travel_time
vtyp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,9654552.0,2.0,3.0,1.0,3668.0,3662.0,14.0,390038.0,364069.0,661.0,1591.0,661.5,27.578,3565497220.0,10.0,930.0
2,10876293.0,2.0,3.0,2.0,3668.0,5252.0,39.0,390038.0,364069.0,702.0,1620.0,23.125,69.125,3566151592.0,3.0,918.0
3,10978697.0,2.0,3.0,3.0,3669.0,3534.0,53.0,390038.0,364069.0,705.5,1601.0,23.953,37.438,3565734012.0,50.0,895.5
4,12282648.0,2.0,3.0,4.0,3871.0,5261.0,40.0,398165.0,397516.0,750.5,1573.0,796.0,17.281,3565040400.0,6.0,822.5
5,9701882.0,2.0,3.0,5.0,3662.0,3662.0,14.0,390038.0,364069.0,662.5,1591.0,662.5,27.047,3565502424.0,20.0,928.5
6,8973484.0,2.0,3.0,6.0,3668.0,3672.0,11.0,390038.0,364069.0,637.0,1564.0,656.5,23.016,3564747060.0,10.0,927.0
9,11168666.0,2.0,3.0,9.0,3669.0,3668.0,10.0,390038.0,364069.0,712.0,1589.0,19.5,22.672,3565451572.0,40.0,877.0


In [22]:
np.sum(df.travel_time.values, dtype=np.float64) / 60.0

18749044.308287453

In [17]:
df.loc[df.vid == 24476]

Unnamed: 0,vid,tag,origz,destz,class,stime,travel_time,nodes,vehtype,vot,toll
18150,24476,2,909,915,3,35.094,4.648,11,2,30.797,0.0


In [18]:
df.dtypes

vid              int32
tag               int8
origz            int16
destz            int16
class            uint8
stime          float16
travel_time    float16
nodes            int32
vehtype          uint8
vot            float16
toll           float16
dtype: object

In [25]:
transims_skim_file = os.path.join(SWIFT_folder, SCEN, r'STM\STM_A\02_TrafficPredictor\03_Demand\FINAL_Skims.csv')

In [27]:
df_transims_skim = pd.read_csv(transims_skim_file)

KeyboardInterrupt: 

In [None]:
df_transims_skim.columns

In [None]:
df_transims_skim.columns.loc[df_transims_skim.columns.HHOLD == 24476]

In [None]:
df_transims_tt = df_transims_skim.loc[:, ['HHOLD', 'TIME']]

In [None]:
df_join = pd.merge(df, df_transims_tt, left_on='vid', right_on='HHOLD')

In [None]:
df_join['TIME'] = df_join['TIME'] / 60.0

In [None]:
df_join['diff'] = df_join.travel-time = df_join.TIME

In [None]:
df_join.diff.describe()