# Imports

In [14]:
import json
import requests

from datetime import datetime, timedelta, timezone, time, date
from itertools import product
from functools import reduce
from os import path

import pandas as pd
import numpy as np

In [15]:
%run desktop/opentransit-metrics/mykelu/eclipses.py
%run desktop/opentransit-metrics/mykelu/get_stops.py

In [16]:
stops = pd.read_csv('desktop/opentransit-metrics/mykelu/stops.csv')

# Get Possible Routes

In [17]:
test_stops = stops[stops.apply(lambda x: (x['ROUTE'] == 14) & (x['DID'] == '14___O_F00'), axis = 'columns')]

In [18]:
joined_stops = test_stops.join(test_stops.set_index('VID'), on = 'VID', how = 'inner', lsuffix = 'start', rsuffix = 'end')

In [19]:
valid_trips = joined_stops[joined_stops.apply(lambda x: (x['TIMEstart'] < x['TIMEend']) & (x['DATEstart'] == x['DATEend']), axis = 'columns')]

# Find Time Between Stops

In [20]:
sample_trips = valid_trips[valid_trips.apply(lambda x: (x['SIDstart'] == 5528) & (x['SIDend'] == 5545), axis = 'columns')]

In [21]:
sample_trips.head()

Unnamed: 0,DATEstart,DIDstart,ROUTEstart,SIDstart,TIMEstart,VID,timestampstart,DATEend,DIDend,ROUTEend,SIDend,TIMEend,timestampend
97,2018-11-12,14___O_F00,14,5528,1542010000.0,7225,2018-11-12 08:03:43-08:00,2018-11-12,14___O_F00,14,5545,1542011000.0,2018-11-12 08:17:44-08:00
97,2018-11-12,14___O_F00,14,5528,1542010000.0,7225,2018-11-12 08:03:43-08:00,2018-11-12,14___O_F00,14,5545,1542019000.0,2018-11-12 10:44:36-08:00
2246,2018-11-12,14___O_F00,14,5528,1542019000.0,7225,2018-11-12 10:29:51-08:00,2018-11-12,14___O_F00,14,5545,1542019000.0,2018-11-12 10:44:36-08:00
4775,2018-11-13,14___O_F00,14,5528,1542105000.0,7286,2018-11-13 10:22:29-08:00,2018-11-13,14___O_F00,14,5545,1542105000.0,2018-11-13 10:33:59-08:00
7111,2018-11-14,14___O_F00,14,5528,1542190000.0,7286,2018-11-14 10:04:52-08:00,2018-11-14,14___O_F00,14,5545,1542191000.0,2018-11-14 10:22:53-08:00


In [22]:
sample_trips['triplength'] = sample_trips['TIMEend'] - sample_trips['TIMEstart']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
sample_trips.pivot_table(values = ['triplength'], index = ['VID', 'timestampstart'], aggfunc = np.min)

Unnamed: 0_level_0,Unnamed: 1_level_0,triplength
VID,timestampstart,Unnamed: 2_level_1
7203,2018-11-14 08:03:31-08:00,540.0
7203,2018-11-14 10:36:53-08:00,961.0
7204,2018-11-12 09:46:33-08:00,961.0
7205,2018-11-13 08:53:40-08:00,721.0
7206,2018-11-14 08:35:47-08:00,706.0
7208,2018-11-13 08:41:39-08:00,601.0
7209,2018-11-13 09:51:13-08:00,1876.0
7211,2018-11-14 09:27:35-08:00,1021.0
7215,2018-11-12 10:02:34-08:00,1021.0
7216,2018-11-13 09:11:41-08:00,901.0


In [28]:
stops.iloc[[2777, 2801]]

Unnamed: 0,DATE,DID,ROUTE,SID,TIME,VID,timestamp
2777,2018-11-12,14___O_F00,14,5528,1542020000.0,7220,2018-11-12 11:00:37-08:00
2801,2018-11-12,14___O_F00,14,5529,1542020000.0,7220,2018-11-12 11:00:37-08:00


In [25]:
stops[stops.apply(lambda x: (x['VID'] == 7220) & (x['SID'] >= 5528) & (x['SID'] <= 5545), axis = 'columns')].sort_values('timestamp')

Unnamed: 0,DATE,DID,ROUTE,SID,TIME,VID,timestamp
87,2018-11-12,14___I_F00,14,5544,1.542010e+09,7220,2018-11-12 08:02:43-08:00
100,2018-11-12,14___I_F00,14,5542,1.542010e+09,7220,2018-11-12 08:03:59-08:00
111,2018-11-12,14___I_F00,14,5541,1.542010e+09,7220,2018-11-12 08:04:44-08:00
134,2018-11-12,14___I_F00,14,5537,1.542010e+09,7220,2018-11-12 08:06:44-08:00
152,2018-11-12,14___I_F00,14,5535,1.542010e+09,7220,2018-11-12 08:07:44-08:00
175,2018-11-12,14___I_F00,14,5533,1.542010e+09,7220,2018-11-12 08:09:44-08:00
211,2018-11-12,14___I_F00,14,5531,1.542010e+09,7220,2018-11-12 08:12:44-08:00
232,2018-11-12,14___I_F00,14,5530,1.542010e+09,7220,2018-11-12 08:14:44-08:00
543,2018-11-12,14___O_F00,14,5528,1.542012e+09,7220,2018-11-12 08:37:45-08:00
552,2018-11-12,14___O_F00,14,5529,1.542012e+09,7220,2018-11-12 08:38:45-08:00


In [26]:
# TODO:
# deal with buses switching routes?

# Find Duplicate Stops

In [30]:
duplicate_stops = stops.join(stops.set_index('timestamp'), on = 'timestamp', how = 'inner', lsuffix = 'start', rsuffix = 'end')

In [31]:
duplicate_stops = duplicate_stops[duplicate_stops.apply(lambda x: (x['VIDstart'] == x['VIDend']) & (x['SIDstart'] != x['SIDend']), axis = 'columns')]

In [32]:
duplicate_stops

Unnamed: 0,DATEstart,DIDstart,ROUTEstart,SIDstart,TIMEstart,VIDstart,timestamp,DATEend,DIDend,ROUTEend,SIDend,TIMEend,VIDend
0,2018-11-12,12___O_F00,12,7941,1.542010e+09,8776,2018-11-12 08:00:13-08:00,2018-11-12,12___O_F00,12,5859,1.542010e+09,8776
0,2018-11-12,12___O_F00,12,7941,1.542010e+09,8776,2018-11-12 08:00:13-08:00,2018-11-12,12___O_F00,12,5851,1.542010e+09,8776
1,2018-11-12,12___O_F00,12,6328,1.542010e+09,8728,2018-11-12 08:00:13-08:00,2018-11-12,12___O_F00,12,7863,1.542010e+09,8728
1,2018-11-12,12___O_F00,12,6328,1.542010e+09,8728,2018-11-12 08:00:13-08:00,2018-11-12,12___O_F00,12,6327,1.542010e+09,8728
2,2018-11-12,14___I_F00,14,5610,1.542010e+09,7283,2018-11-12 08:00:13-08:00,2018-11-12,14___I_F00,14,5582,1.542010e+09,7283
2,2018-11-12,14___I_F00,14,5610,1.542010e+09,7283,2018-11-12 08:00:13-08:00,2018-11-12,14___I_F00,14,5586,1.542010e+09,7283
2,2018-11-12,14___I_F00,14,5610,1.542010e+09,7283,2018-11-12 08:00:13-08:00,2018-11-12,14___I_F00,14,5615,1.542010e+09,7283
3,2018-11-12,14___I_F00,14,5582,1.542010e+09,7283,2018-11-12 08:00:13-08:00,2018-11-12,14___I_F00,14,5610,1.542010e+09,7283
3,2018-11-12,14___I_F00,14,5582,1.542010e+09,7283,2018-11-12 08:00:13-08:00,2018-11-12,14___I_F00,14,5586,1.542010e+09,7283
3,2018-11-12,14___I_F00,14,5582,1.542010e+09,7283,2018-11-12 08:00:13-08:00,2018-11-12,14___I_F00,14,5615,1.542010e+09,7283
