In [1]:
import requests
import pandas as pd
import multiprocessing as mp
import datetime as dt

In [2]:
data = pd.read_csv('SperoniOSRMdata.csv')
keys = data.keys()
data = pd.DataFrame(data,columns=[keys[0],keys[1],keys[2],keys[3],keys[4],keys[9],keys[8]])
keys = data.keys()

In [3]:
cores = mp.cpu_count()
rows = data.shape[0]
group = rows // cores

mpCount = [(group*i,group*(i+1)) for i in range(cores)]

print(mpCount)

[(0, 8199), (8199, 16398), (16398, 24597), (24597, 32796)]


In [4]:
def mp_transitTime(index):
    cols = ['Trip ID','Duration (min)','Walking Time (min)','Transit Time (min)',
            'Walking Distance (Mi)','Transfers','Message']
    responses = {cols[0]:[],cols[1]:[],cols[2]:[],
                 cols[3]:[],cols[4]:[],cols[5]:[],cols[6]:[]}
    worker = mp.current_process()
    wid = worker.name

    for i in range(index[0],index[1]):
        # collecting values
        vals = []
        for j in range(0,len(keys)):
            vals.append(data[keys[j]][i])
        vals[6] = vals[6][0:-5]

        # url for calling the server
        localhost = 'http://127.0.0.1:8080/otp/routers/default/'
        url = localhost + 'plan?'
        url += 'fromPlace={0},{1}'.format(vals[1],vals[2])
        url += '&toPlace={0},{1}'.format(vals[3],vals[4])
        url += '&time={0}'.format(vals[5])
        url += '&date={0}'.format(vals[6])
        url += '&mode=TRANSIT,WALK'
        # 805, 3220, 8047
        url += '&maxWalkDistance=805'
        url += '&arriveBy=true'
        url += '&optimize=QUICK'
        response = requests.get(url).json()

        responses[cols[0]].append(vals[0])
        if 'plan' in response:
            r = response['plan']['itineraries']

            responses[cols[1]].append(r[0]['duration']/60)
            responses[cols[2]].append(r[0]['walkTime']/60)
            responses[cols[3]].append(r[0]['transitTime']/60)
            responses[cols[4]].append(r[0]['walkDistance']/1609.34)
            responses[cols[5]].append(r[0]['transfers'])
            responses[cols[6]].append('Successful Run')
        else:
            responses[cols[1]].append(None)
            responses[cols[2]].append(None)
            responses[cols[3]].append(None)
            responses[cols[4]].append(None)
            responses[cols[5]].append(None)
            responses[cols[6]].append(response['error']['msg'][0:14])
            
    now = dt.datetime.now().strftime("%H%M")
    print('index',index,'done processing', now, wid)
    return responses

In [5]:
pool = mp.Pool(cores)

now = dt.datetime.now().strftime("%H%M%S")
print('Starting processing at', now)

results = pool.map(mp_transitTime,mpCount)

now = dt.datetime.now().strftime("%H%M%S")
print('Finished processing at', now)

df = pd.concat([pd.DataFrame(d) for d in results],ignore_index=True)
    
pool.close()

print(df.shape)
df.head()

Starting processing at 154007
index (0, 8199) done processing 1607 ForkPoolWorker-1
index (16398, 24597) done processing 1608 ForkPoolWorker-3
index (24597, 32796) done processing 1608 ForkPoolWorker-4
index (8199, 16398) done processing 1611 ForkPoolWorker-2
Finished processing at 161127
(32796, 7)


Unnamed: 0,Trip ID,Duration (min),Walking Time (min),Transit Time (min),Walking Distance (m),Transfers,Message
0,1248167,16.966667,16.966667,0.0,1315.121301,0.0,Successful Run
1,1108571,22.15,18.5,3.616667,1400.756331,0.0,Successful Run
2,1229641,,,,,,No trip found.
3,1022430,,,,,,No trip found.
4,1327145,,,,,,No trip found.


In [6]:
now = dt.datetime.now().strftime("%Y%m%d-%H%M")
df.to_csv(now + 'transitTimesTwoMiles.csv')

In [13]:
df['Message'].value_counts()

No trip found.    25344
Successful Run     7417
Origin is with       35
Name: Message, dtype: int64