# Estimation and Analysis of the ground-truth OD matrix

#### adding required packages

In [2]:
import pandas as pd
import numpy as np
import pickle
import time
import sys
# !conda install --yes --prefix {sys.prefix} pyproj
# !{sys.executable} -m pip install geopandas
from pyproj import Proj, transform
from shapely.geometry import Point
import geopandas as gpd

#### specifying the saving location 

In [3]:
savingLoc = "Y:/ZahraEftekhar/phase4/"

#### functions for proximity analysis

Here, we create classes and functions to automate the proximity analysis. These analyses include changing the coordinate system, mapping the location coordinates to their associated TAZ (traffic analysis zone).

In [4]:
#this class change coordinate system and map location coordinates to zones:
class LongLat:
    def __init__(self, *args):
        self.TAZ = 0
    def set_location(self, x, y):
        from shapely.geometry import Point
        self.location = Point(x, y)

    def changeCoordSys(self, initial: str = 'epsg:23031', final: str = 'epsg:28992'):
        from pyproj import Proj, transform
        from shapely.geometry import Point
        self.location = Point(transform(Proj(init=initial), Proj(init=final), self.location.x, self.location.y))

    def zoneMapping(self, onepolygon, polygonName):
        if (onepolygon.contains(self.location)):
            self.TAZ = polygonName
# this class only reads SHP:
class TAZmap:
    def __init__(self): pass

    def set_map(self, value):
        import geopandas as gpd
        self.map = gpd.read_file(value)

#### building the zero OD Matrix based on the number of zones in the SHP

In [5]:
# building the zero OD Matrix based on the number of zones in the SHP
map_mzr = TAZmap()
map_mzr.set_map("{a}amsterdamMezuroZones.shp".format(a=savingLoc))
inputs = map_mzr.map.geometry
amsterdamMezuroZones = pd.read_csv("{a}amsterdamMezuroZones.CSV".format(a=savingLoc), usecols=['mzr_id'])
tazNames = amsterdamMezuroZones['mzr_id']
zoneZero = pd.Series(0)
matrixRowColNames = tuple(zoneZero.append(tazNames))
odsize=len(matrixRowColNames)
del map_mzr
ODMatrix_df = pd.DataFrame(np.zeros((odsize, odsize), dtype=np.int32), columns=matrixRowColNames,
                           index=matrixRowColNames)  # creating empty OD matrix

## Specifying the OD matrix estimation interval
=========================================================

In [138]:
ODstart = "06:30:00"
ODend = "09:30:00"
startTime_OD = pd.to_timedelta(ODstart)
endTime_OD = pd.to_timedelta(ODend)

#### Reading the travel diaries to estimate the OD matrix

In [64]:
with open('{a}1.trueLocExperienced.pickle'.format(a=savingLoc), 'rb') as handle:
    itemlistExperienced = pickle.load(handle)

======================================================================================================

#### estimating the ground-truth OD matrix

In [None]:
#we time the process
startTime = time.time()
for ID in itemlistExperienced.keys():#itemlistExperienced.keys() or ['100158'] or ['100048']
    activityListExperienced = itemlistExperienced[ID]
    activityListExperienced.loc[:,"end"].iloc[-1] = pd.to_timedelta(activityListExperienced.loc[:,"end"].iloc[-1]
                                                                  )+ pd.to_timedelta('24:00:00')
    j=1
    while j < len(np.arange(len(activityListExperienced))):
        if j==len(np.arange(len(activityListExperienced)))-1:
            start_time1 = pd.to_timedelta((activityListExperienced.loc[:,"start"]).iloc[j])
            end_time1 = pd.to_timedelta((activityListExperienced.loc[:,"end"]).iloc[j])
            start_time2 = pd.to_timedelta((activityListExperienced.loc[:,"start"]).iloc[0])+ pd.to_timedelta('24:00:00')
            endActivity = end_time1
            startNewActivity = start_time2
            if pd.to_timedelta('23:59:59')>=pd.to_timedelta(ODstart)>=pd.to_timedelta(start_time1):
                startTime_OD = pd.to_timedelta(ODstart)
            else:
                startTime_OD = pd.to_timedelta(ODstart) + pd.to_timedelta('24:00:00')
            if pd.to_timedelta('23:59:59')>= pd.to_timedelta(ODend) >=pd.to_timedelta(start_time1):
                endTime_OD =pd.to_timedelta(ODend)
            else:
                endTime_OD = pd.to_timedelta(ODend)+ pd.to_timedelta('24:00:00')
        else:
            start_time1 = pd.to_timedelta(activityListExperienced.loc[:,"start"].iloc[j])
            end_time1 = pd.to_timedelta(activityListExperienced.loc[:,"end"].iloc[j])
            start_time2 = pd.to_timedelta(activityListExperienced.loc[:,"start"].iloc[j+1])
            endActivity = end_time1 
            startNewActivity = start_time2 
        if pd.to_timedelta(start_time1) <= pd.to_timedelta(startTime_OD) < pd.to_timedelta(startNewActivity):
            if endTime_OD <= endActivity:
                break
            else:
                while pd.to_timedelta(endTime_OD) > pd.to_timedelta(endActivity):
                    point1 = LongLat()
                    point1.set_location(x=float(activityListExperienced.loc[:,"x"].iloc[j]),
                                        y=float(activityListExperienced.loc[:,"y"].iloc[j]))#*********ghablan:j-1
                    point1.changeCoordSys()
                    for k in range(len(tazNames)):
                        point1.zoneMapping(inputs[k], tazNames[k])
                    origin = point1.TAZ
                    point2 = LongLat()
                    if j == len(activityListExperienced) - 1:
                        point2.set_location(x=float(activityListExperienced.loc[:,"x"].iloc[0]),
                                            y=float(activityListExperienced.loc[:,"y"].iloc[0]))
                    else:
                        point2.set_location(x=float(activityListExperienced.loc[:,"x"].iloc[j+1]),
                                        y=float(activityListExperienced.loc[:,"y"].iloc[j+1])) #*****ghablan: j
                    point2.changeCoordSys()
                    for k in range(len(tazNames)):
                        point2.zoneMapping(inputs[k], tazNames[k])
                    destination = point2.TAZ
                    ODMatrix_df[origin][destination] = ODMatrix_df[origin][destination] + 1
                    j += 1
                    if j > len(np.arange(len(activityListExperienced))) - 1: break
                    if j== len(np.arange(len(activityListExperienced)))-1:
                        end_time1 = pd.to_timedelta((activityListExperienced.loc[:,"end"]).iloc[0])+ pd.to_timedelta('24:00:00')
                    else:
                        end_time1 = pd.to_timedelta((activityListExperienced.loc[:,"end"]).iloc[j])
                    endActivity = end_time1
                break
        j += 1
TXTFileName = "{a}OD({start1}-{start2}_{end1}-{end2}).pickle".format(a=savingLoc,start1 = ODstart[0:2],
                                                                                     start2 = ODstart[3:5],
                                                                     end1 = ODend[0:2], end2 = ODend[3:5])
with open(TXTFileName, 'wb') as handle:
    pickle.dump(ODMatrix_df, handle, protocol=pickle.HIGHEST_PROTOCOL)
print((time.time() - startTime)//60,'minutes')

In [137]:
print(np.sum(np.sum(ODMatrix_df, axis=0)))

11948


================================================================================
### creating a `dict` file of all the trips that each user has in its travel diaries:

In [65]:
itemlistExperienced["1"]

Unnamed: 0,VEHICLE,activityType,x,y,start,end
0,1,work,629393.6676188618,5803949.115843206,06:52:04,16:49:20
0,1,home,632315.3322837545,5817000.086435355,17:10:47,06:29:59


In [66]:
#we time the process
startTime = time.time()

trips = {}
for ID in itemlistExperienced.keys():
    activities= itemlistExperienced[ID]
    activities.columns = ["VEHICLE","activityType","x","y","A_start","A_end"]
    activities["start"] = 0
    activities["duration"] = 0
    
    for j in np.arange(0,len(activities)):
            activities.loc[:,"start"].iloc[j] = pd.to_timedelta(activities.loc[:,"A_end"].iloc[j-1])
            activities.loc[:,"duration"].iloc[j]=pd.to_timedelta(activities.loc[:,"A_start"].iloc[j])-pd.to_timedelta(activities.loc[:,"A_end"].iloc[j-1])
    activities.drop(["activityType","x","y","A_start","A_end"],axis=1,inplace=True)
    trips[ID] = activities
TXTFileName = "{a}trips.pickle".format(a=savingLoc)
with open(TXTFileName, 'wb') as handle:
    pickle.dump(trips, handle, protocol=pickle.HIGHEST_PROTOCOL)

print((time.time() - startTime)//60,'minutes')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


1.0 minutes


In [69]:
trips["100007"]

Unnamed: 0,VEHICLE,start,duration
0,100007,0 days 06:16:14,0 days 00:16:30
0,100007,0 days 15:59:53,0 days 00:17:44
