In [1]:
import requests
import pandas as pd
import multiprocessing as mp
import datetime as dt

In [2]:
data = pd.read_csv('SperoniOSRMdata.csv')
keys = data.keys()
data = pd.DataFrame(data,columns=[keys[0],keys[1],keys[2],keys[3],keys[4],keys[9],keys[8]])
keys = data.keys()

In [4]:
cores = mp.cpu_count()
# rows = data.shape[0]
rows = 100
group = rows // cores

mpCount = [(group*i,group*(i+1)) for i in range(cores)]

print(mpCount)

[(0, 25), (25, 50), (50, 75), (75, 100)]


In [5]:
def mp_transitTime(index):
    cols = ['Trip ID','Duration (min)','Walking Time (min)','Transit Time (min)',
            'Walking Distance (m)','Transfers','Message']
    responses = {cols[0]:[],cols[1]:[],cols[2]:[],
                 cols[3]:[],cols[4]:[],cols[5]:[],cols[6]:[]}
    worker = mp.current_process()
    wid = worker.name

    for i in range(index[0],index[1]):
        # collecting values
        vals = []
        for j in range(0,len(keys)):
            vals.append(data[keys[j]][i])
        vals[6] = vals[6][0:-5]

        # url for calling the server
        localhost = 'http://127.0.0.1:8080/otp/routers/default/'
        url = localhost + 'plan?'
        url += 'fromPlace={0},{1}'.format(vals[1],vals[2])
        url += '&toPlace={0},{1}'.format(vals[3],vals[4])
        url += '&time={0}'.format(vals[5]) # 1:02pm
        url += '&date={0}'.format(vals[6]) # 11-13-2017
        url += '&mode=TRANSIT,WALK'
        url += '&maxWalkDistance=10000'
        url += '&arriveBy=true'
        response = requests.get(url).json()

        responses[cols[0]].append(vals[0])
        if 'plan' in response:
            r = response['plan']['itineraries']

            responses[cols[1]].append(r[0]['duration']/60)
            responses[cols[2]].append(r[0]['walkTime']/60)
            responses[cols[3]].append(r[0]['transitTime']/60)
            responses[cols[4]].append(r[0]['walkDistance'])
            responses[cols[5]].append(r[0]['transfers'])
            responses[cols[6]].append('Successful Run')
        else:
            responses[cols[1]].append(None)
            responses[cols[2]].append(None)
            responses[cols[3]].append(None)
            responses[cols[4]].append(None)
            responses[cols[5]].append(None)
            responses[cols[6]].append(response['error']['msg'][0:14])
            
    now = dt.datetime.now().strftime("%H%M")
    print('index',index,'done processing', now, wid)
    return responses

In [6]:
pool = mp.Pool(cores)

now = dt.datetime.now().strftime("%H%M%S")
print('Starting processing at', now)

results = pool.map(mp_transitTime,mpCount)

now = dt.datetime.now().strftime("%H%M%S")
print('Finished processing at', now)

df = pd.concat([pd.DataFrame(d) for d in results],ignore_index=True)
    
pool.close()

print(df.shape)
df.head()

Starting processing at 102743
index (75, 100) done processing 1028 ForkPoolWorker-3
index (0, 25) done processing 1028 ForkPoolWorker-1
index (25, 50) done processing 1028 ForkPoolWorker-2
index (50, 75) done processing 1028 ForkPoolWorker-4
Finished processing at 102824
(25, 7)


Unnamed: 0,Trip ID,Duration (min),Walking Time (min),Transit Time (min),Walking Distance (m),Transfers,Message
0,1394827,17.533333,17.533333,0.0,1347.135308,0.0,Successful Run
1,1364007,169.933333,148.9,21.0,11579.655218,0.0,Successful Run
2,1022291,56.766667,52.966667,3.766667,4119.655692,0.0,Successful Run
3,1507106,65.916667,65.916667,0.0,5100.425188,0.0,Successful Run
4,947540,68.816667,68.816667,0.0,5336.424481,0.0,Successful Run


In [None]:
now = dt.datetime.now().strftime("%Y%m%d-%H%M")
df.to_csv(now + 'transitTimes.csv')