# Estimation and Analysis of the ground-truth OD matrix

#### adding required packages

In [104]:
import pandas as pd
import numpy as np
import pickle
import time
# Install a conda package in the current Jupyter kernel
import sys
# !conda install --yes --prefix {sys.prefix} pyproj
# import sys
# !{sys.executable} -m pip install geopandas

# !pip uninstall shapely
# !conda install -c conda-forge shapely
# !pip uninstall pyproj
# !conda install -c conda-forge pyproj
from pyproj import Proj, transform
from shapely.geometry import Point
import geopandas as gpd

#### specifying the saving location 

In [105]:
savingLoc = "Y:/ZahraEftekhar/phase4/"

#### functions for proximity analysis

Here, we create classes and functions to automate the proximity analysis. These analyses include changing the coordinate system, mapping the location coordinates to their associated TAZ (traffic analysis zone).

In [106]:
#this class change coordinate system and map location coordinates to zones:
class LongLat:
    def __init__(self, *args):
        self.TAZ = 0
    def set_location(self, x, y):
        from shapely.geometry import Point
        self.location = Point(x, y)

    def changeCoordSys(self, initial: str = 'epsg:23031', final: str = 'epsg:28992'):
        from pyproj import Proj, transform
        from shapely.geometry import Point
        self.location = Point(transform(Proj(init=initial), Proj(init=final), self.location.x, self.location.y))

    def zoneMapping(self, onepolygon, polygonName):
        if (onepolygon.contains(self.location)):
            self.TAZ = polygonName
# this class only reads SHP:
class TAZmap:
    def __init__(self): pass

    def set_map(self, value):
        import geopandas as gpd
        self.map = gpd.read_file(value)

#### building the zero OD Matrix based on the number of zones in the SHP

In [120]:
# building the zero OD Matrix based on the number of zones in the SHP
map_mzr = TAZmap()
map_mzr.set_map("{a}amsterdamMezuroZones.shp".format(a=savingLoc))
inputs = map_mzr.map.geometry
amsterdamMezuroZones = pd.read_csv("{a}amsterdamMezuroZones.CSV".format(a=savingLoc), usecols=['mzr_id'])
tazNames = amsterdamMezuroZones['mzr_id']
zoneZero = pd.Series(0)
matrixRowColNames = tuple(zoneZero.append(tazNames))
odsize=len(matrixRowColNames)
del map_mzr
ODMatrix_df = pd.DataFrame(np.zeros((odsize, odsize), dtype=np.int32), columns=matrixRowColNames,
                           index=matrixRowColNames)  # creating empty OD matrix

## Specifying the OD matrix estimation interval
=========================================================

In [108]:
ODstart = "6:30:00"
ODend = "9:30:00"
startTime_OD = pd.to_timedelta(ODstart)
endTime_OD = pd.to_timedelta(ODend)

#### Reading the travel diaries to estimate the OD matrix

In [119]:
with open('{a}1.trueLocExperienced.pickle'.format(a=savingLoc), 'rb') as handle:
    itemlistExperienced = pickle.load(handle)

In [121]:
kk = ['10074', '100754', '100790', '100806', '10081', '100818', '10082', '100821', '100824', '100839', '100843', '100844', '10087', '100871', '100880', '100881', '100882', '100890', '100895', '100897', '100915', '100927', '10094', '100946', '10095', '100953', '10096', '10097', '100970', '100972', '100979', '10099', '100996', '100999', '10100', '101000', '101018', '10102', '101020', '101022', '10103', '101032', '101055', '101057', '101082', '101116', '101121', '101125', '101133', '101142', '101153', '101154', '101155', '101158', '10118', '10119', '101192', '101194', '1012', '10120', '101202', '101208', '101215', '101234', '101243', '10125', '101261', '101266', '101267', '101268', '101270', '101282', '101283', '101298', '10130', '101304', '101306', '101317', '101339', '101342', '101348', '10135', '101356', '101365', '10138', '101385', '101387', '101390', '101402', '101427', '101430', '101445', '10145', '101488', '101492', '10150', '101511', '101512', '101513', '10152', '10154', '101540', '101557', '101566', '101569', '101574', '101583', '101587', '101590', '101597', '101606', '101609', '101614', '101619', '101622', '101634', '10164', '101680', '101681', '101687', '101693', '101702', '101723', '101733', '10174', '101749', '10175', '101765', '10179', '101790', '101815', '101819', '101821', '101827', '101830', '101831', '101832', '101864', '101865', '101874', '10188', '1019', '101913', '10192', '101930', '101939', '101950', '101951', '10196', '101960', '101981', '101983', '101993', '102017', '102028', '102033', '102042', '102050', '102064', '102072', '102074', '102080', '102088', '1021', '10211', '102121', '102130', '102134', '102155', '10216', '102161', '102162', '102189', '102190', '102196', '102199', '102201', '102203', '102208', '102216', '102219', '10222', '102224', '102234', '102254', '102256', '102268', '10227', '102291', '102315', '102319', '102332', '10234', '102342', '102348', '102353', '102354', '102356', '102366', '102396', '102397', '102422', '10244', '102447', '102448', '102462', '102463', '102475', '102499', '102507', '102508', '102533', '102535', '102540', '102544', '102545', '10256', '102569', '102570', '10258', '102606', '102624', '102628']
len(kk)

223

======================================================================================================

#### estimating the ground-truth OD matrix

In [122]:
#we time the process
startTime = time.time()
for ID in itemlistExperienced.keys():#itemlistExperienced.keys() or ['100158'] or ['100048']
    activityListExperienced = itemlistExperienced[ID]
    activityListExperienced.loc[:,"end"].iloc[-1] = pd.to_timedelta(activityListExperienced.loc[:,"end"].iloc[-1]
                                                                  )+ pd.to_timedelta('24:00:00')
    j=1
    while j < len(np.arange(len(activityListExperienced))):
        if j==len(np.arange(len(activityListExperienced)))-1:
            start_time1 = pd.to_timedelta((activityListExperienced.loc[:,"start"]).iloc[j])
            end_time1 = pd.to_timedelta((activityListExperienced.loc[:,"end"]).iloc[j])
            start_time2 = pd.to_timedelta((activityListExperienced.loc[:,"start"]).iloc[0])+ pd.to_timedelta('24:00:00')
            endActivity = end_time1
            startNewActivity = start_time2
            if pd.to_timedelta('23:59:59')>=pd.to_timedelta(ODstart)>=pd.to_timedelta(start_time1):
                startTime_OD = pd.to_timedelta(ODstart)
            else:
                startTime_OD = pd.to_timedelta(ODstart) + pd.to_timedelta('24:00:00')
            if pd.to_timedelta('23:59:59')>= pd.to_timedelta(ODend) >=pd.to_timedelta(start_time1):
                endTime_OD =pd.to_timedelta(ODend)
            else:
                endTime_OD = pd.to_timedelta(ODend)+ pd.to_timedelta('24:00:00')
        else:
            start_time1 = pd.to_timedelta(activityListExperienced.loc[:,"start"].iloc[j])
            end_time1 = pd.to_timedelta(activityListExperienced.loc[:,"end"].iloc[j])
            start_time2 = pd.to_timedelta(activityListExperienced.loc[:,"start"].iloc[j+1])
            endActivity = end_time1 
            startNewActivity = start_time2 
        if pd.to_timedelta(start_time1) <= pd.to_timedelta(startTime_OD) < pd.to_timedelta(startNewActivity):
            if endTime_OD <= endActivity:
                break
            else:
                while pd.to_timedelta(endTime_OD) > pd.to_timedelta(endActivity):
                    point1 = LongLat()
                    point1.set_location(x=float(activityListExperienced.loc[:,"x"].iloc[j]),
                                        y=float(activityListExperienced.loc[:,"y"].iloc[j]))#*********ghablan:j-1
                    point1.changeCoordSys()
                    for k in range(len(tazNames)):
                        point1.zoneMapping(inputs[k], tazNames[k])
                    origin = point1.TAZ
#                     print(activityListExperienced.loc[:,"VEHICLE"].iloc[0])
                    point2 = LongLat()
                    if j == len(activityListExperienced) - 1:
                        point2.set_location(x=float(activityListExperienced.loc[:,"x"].iloc[0]),
                                            y=float(activityListExperienced.loc[:,"y"].iloc[0]))
                    else:
                        point2.set_location(x=float(activityListExperienced.loc[:,"x"].iloc[j+1]),
                                        y=float(activityListExperienced.loc[:,"y"].iloc[j+1])) #*****ghablan: j
                    point2.changeCoordSys()
                    for k in range(len(tazNames)):
                        point2.zoneMapping(inputs[k], tazNames[k])
                    destination = point2.TAZ
                    ODMatrix_df[origin][destination] = ODMatrix_df[origin][destination] + 1
                    j += 1
                    if j > len(np.arange(len(activityListExperienced))) - 1: break
                    if j== len(np.arange(len(activityListExperienced)))-1:
                        end_time1 = pd.to_timedelta((activityListExperienced.loc[:,"end"]).iloc[0])+ pd.to_timedelta('24:00:00')
                    else:
                        end_time1 = pd.to_timedelta((activityListExperienced.loc[:,"end"]).iloc[j])
                    endActivity = end_time1 # when using inconsistent timings :endActivity = min(end_time1, start_time2)
                break
            # continue

        j += 1
TXTFileName = "{a}OD({start1}-{start2}_{end1}-{end2}).pickle".format(a=,savingLoc,start1 = ODstart[0:2],
                                                                                     start2 = ODstart[3:5],
                                                                     end1 = ODend[0:2], end2 = ODend[3:5])
print((time.time() - startTime)//60,'minutes')

96.0 seconds


In [103]:
print(np.sum(np.sum(ODMatrix_df, axis=0)))

36
