Will Geary

wcg2111@columbia.edu

In [1]:
import pandas as pd
import numpy as np

# Example Output Data

Here is an example of what we want the data to look like in order to animate it with Processing:

In [2]:
example = pd.read_csv("data/example.csv")
del example['Unnamed: 0']
example.head()

Unnamed: 0,type_id,starttime,stoptime,tripduration,start_lat,start_lon,end_lat,end_lon
0,1,2017-04-14 03:21:00,2017-04-14 03:23:00,120,45.516502,-122.461119,45.520723,-122.471093
1,1,2017-04-14 03:23:00,2017-04-14 03:25:00,120,45.520723,-122.471093,45.522065,-122.478074
2,1,2017-04-14 03:24:00,2017-04-14 03:26:00,120,45.505058,-122.841872,45.500249,-122.832785
3,1,2017-04-14 03:25:00,2017-04-14 03:27:00,120,45.522065,-122.478074,45.52214,-122.48714
4,1,2017-04-14 03:26:00,2017-04-14 03:28:00,120,45.500249,-122.832785,45.495102,-122.821238


# Raw  Data

Here is the raw data we have:

In [3]:
raw_data = pd.read_csv('data/05012013.csv')
raw_data.head()

Unnamed: 0,Device,Trip,Latitude,Longitude,Speed,Heading,LocalTime
0,1026,1864523,42.25979,-83.73022,4.8,267.2375,2013-05-01 09:16:38.000
1,1026,1864523,42.25979,-83.73023,4.96,268.85,2013-05-01 09:16:38.000
2,1026,1864523,42.25979,-83.73024,4.92,267.0875,2013-05-01 09:16:38.000
3,1026,1864523,42.25979,-83.73032,5.46,268.1375,2013-05-01 09:30:05.000
4,1026,1864523,42.25979,-83.73032,5.16,266.7375,2013-05-01 09:30:05.000


# Map the Data

In [4]:
from geojson import Feature, Point, LineString, FeatureCollection
import folium
%matplotlib inline

In [5]:
df = raw_data.copy()
device_ids = list(np.unique(df['Device']))

In [6]:
# Drop rows where lat = 90.0
df = df[df['Latitude'] != 90.0]

In [49]:
# Convert date times
df['LocalTime'] = pd.to_datetime(df['LocalTime'])

In [7]:
def to_map_line(input_df, device_id, zoom=12):
    data = input_df.copy()
    data = data[data['Device'] == device_id]
    
    my_map = folium.Map(location=[np.mean(data['Latitude']), np.mean(data['Longitude'])],
                   tiles='Stamen Toner', zoom_start=zoom)
    
    for trip_id in list(np.unique(data['Trip'])):
        trip = data[data['Trip'] == trip_id]
        lats = [i for i in data['Latitude']]
        lons = [i for i in data['Longitude']]
        coordinates = zip(lats, lons)
        my_PolyLine=folium.PolyLine(locations=coordinates, weight=4)
        my_map.add_child(my_PolyLine)

    return my_map

# User 1

In [8]:
to_map_line(df, device_ids[0])

# Example GeoJSON Data

In [10]:
import json

In [9]:
geojsonfile = "/Users/Will/Dropbox/Portfolio/united_airlines/sketch_airport/data/gmaps_routes.geojson"

In [14]:
with open(geojsonfile) as json_data:
    d = json.load(json_data)

In [16]:
d.keys()

[u'type', u'features']

In [18]:
d['type']

u'FeatureCollection'

In [20]:
len(d['features'])

3

In [22]:
d['features'][0].keys()

[u'geometry', u'type', u'properties']

In [21]:
d['features'][0]

{u'geometry': {u'coordinates': [[-73.97419, 40.69206],
   [-73.97415, 40.69248],
   [-73.97419, 40.69274],
   [-73.97429, 40.69287],
   [-73.97435, 40.6929],
   [-73.97453, 40.69297],
   [-73.97471, 40.69302],
   [-73.9749, 40.693],
   [-73.97518, 40.69299],
   [-73.97542, 40.69303],
   [-73.97557, 40.69307],
   [-73.97577, 40.69316],
   [-73.97584, 40.69325],
   [-73.9764, 40.69614],
   [-73.97658, 40.69707],
   [-73.9768, 40.69813],
   [-73.98004, 40.69826],
   [-73.98055, 40.69827],
   [-73.98051, 40.69895],
   [-73.98046, 40.69964],
   [-73.98046, 40.69975],
   [-73.9811, 40.69977],
   [-73.98276, 40.69983],
   [-73.98308, 40.69984],
   [-73.98309, 40.69979],
   [-73.98312, 40.69979],
   [-73.98438, 40.69981],
   [-73.98521, 40.69991],
   [-73.98558, 40.69995],
   [-73.98596, 40.69997],
   [-73.9862, 40.69999],
   [-73.98637, 40.7],
   [-73.9864, 40.69999],
   [-73.98638, 40.69994],
   [-73.98634, 40.69985],
   [-73.98618, 40.69985],
   [-73.98599, 40.69984],
   [-73.98567, 40.6997

# User 1 to GeoJSON

In [50]:
np.unique(df[df.Device == 1026].Trip)

array([1864523, 1864524, 1864525, 1864526, 1864527, 1864528])

In [51]:
df[df.Device == 1026][df.Trip == 1864524]

  if __name__ == '__main__':


Unnamed: 0,Device,Trip,Latitude,Longitude,Speed,Heading,LocalTime
11170,1026,1864524,42.26004,-83.73719,4.80,124.4500,2013-05-01 09:41:44
11171,1026,1864524,42.26004,-83.73718,4.72,123.6375,2013-05-01 09:41:44
11172,1026,1864524,42.26004,-83.73717,4.74,122.8125,2013-05-01 09:41:44
11173,1026,1864524,42.26002,-83.73711,4.52,104.5375,2013-05-01 09:55:11
11174,1026,1864524,42.26002,-83.73711,4.60,103.4500,2013-05-01 09:55:11
11175,1026,1864524,42.26002,-83.73711,4.62,101.0375,2013-05-01 09:55:11
11176,1026,1864524,42.26002,-83.73710,4.80,101.4875,2013-05-01 09:55:11
11177,1026,1864524,42.26002,-83.73709,5.06,102.2875,2013-05-01 09:55:11
11178,1026,1864524,42.26002,-83.73708,5.06,100.3000,2013-05-01 09:55:11
11179,1026,1864524,42.26002,-83.73708,4.90,97.6250,2013-05-01 09:55:11


In [62]:
coords_ = [list(pair) for pair in zip(list(df[df.Device == 1026][df.Trip == 1864524]['Longitude'].values), list(df[df.Device == 1026][df.Trip == 1864524]['Latitude'].values))]

  if __name__ == '__main__':


In [57]:
type_ = 'Feature'

In [58]:
mintime = list(df[df.Device == 1026][df.Trip == 1864524]['LocalTime'])[0]
maxtime = list(df[df.Device == 1026][df.Trip == 1864524]['LocalTime'])[-1]

  if __name__ == '__main__':
  from ipykernel import kernelapp as app


In [59]:
(maxtime - mintime) / np.timedelta64(1, 's')

911.0

In [61]:
properties_ = {
    'duration': (maxtime - mintime) / np.timedelta64(1, 's'),
    'start': mintime,
    'end': maxtime,
    'mode': 'driving'
}

In [63]:
feature = Feature(geometry=LineString(coords), properties={'mode': 'driving', 'duration': (maxtime - mintime) / np.timedelta64(1, 's'),'start': mintime.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3], 'end': maxtime.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]})

In [66]:
results = []
results.append(feature)

In [67]:
featureCollection = FeatureCollection(results)

with open('data/output.geojson', 'w') as outfile:
    json.dump(featureCollection, outfile)

# User 2

In [None]:
to_map_line(df, device_ids[1])

# User 3

In [None]:
to_map_line(df, device_ids[2])

# User 4

In [None]:
to_map_line(df, device_ids[3])

# User 5

In [None]:
to_map_line(df, device_ids[4], zoom=11)

# User 6

In [None]:
to_map_line(df, device_ids[5])

# User 7

In [None]:
to_map_line(df, device_ids[6], zoom=10)

# User 8

In [None]:
to_map_line(df, device_ids[7])

# Animate User 1

# User 9

In [None]:
raw_data[raw_data['Device'] == device_ids[1]]

In [None]:
to_map_line(raw_data, device_ids[1])

In [None]:
#feature = Feature(geometry=LineString([...]), properties={'device': device, 'duration': tripduration,'start': starttime.strftime('%Y-%m-%d %H:%M:%S'), 'end': stoptime.strftime('%Y-%m-%d %H:%M:%S')})

In [None]:
df = raw_data.copy()

In [None]:
device_ids = list(np.unique(df['Device']))

In [None]:
x = df[df['Device'] == device_ids[0]]

In [None]:
lats = [i for i in x['Latitude']]

In [None]:
lons = [i for i in x['Longitude']]

In [None]:
coordinates = zip(lats, lons)

In [None]:
# Use folium to plot the geojson on a map
my_map = folium.Map(location=[42.25979, -83.73022],
                   tiles='Stamen Toner', zoom_start=12)
my_PolyLine=folium.PolyLine(locations=coordinates, weight=4)
my_map.add_child(my_PolyLine)

my_map

In [None]:
results = []
for i in list(np.unique(df['Device'])):
    trips = df[df['Device'] == i]
    for j in list(np.unique(trips['Trip'])):
        trip = trips[trips['Trip'] == j]
        lons = trip['Longitude']
        lats = trip['Latitude']
        locations = list(zip(lons, lats))
        #line_string = LineString(locations)
        results.append(locations)

In [None]:
#featureCollection = FeatureCollection(results[0:2])

#features = featureCollection['features']

def features_to_LineString(features):
    # Gather all of the routes lat lon coords into a list of line strings
    routesLineString = []
    for r in range(len(features)):
        for j in range(len(features[r]['coordinates'])):
            lats = []
            lons = []
            for i in features[r]['coordinates']:
                lats.append(i[1])
                lons.append(i[0])
            coords = list(zip(lats, lons))
            routesLineString.append(coords)
    return routesLineString
            
#lineString = features_to_LineString(features)

In [None]:
results[0]

In [None]:
len(results)

In [None]:
# Use folium to plot the geojson on a map
my_map = folium.Map(location=[42.25979, -83.73022],
                   tiles='Stamen Toner', zoom_start=12)
my_PolyLine=folium.PolyLine(locations=results, weight=4)
my_map.add_child(my_PolyLine)

my_map

# Transform the Input Data

In [None]:
df = pd.DataFrame()

Create `typeid` column.

In [None]:
df['type_id'] = raw_data['Device']
df['trip_id'] = raw_data['Trip']

Create `starttime` column.

In [None]:
df['starttime'] = raw_data['LocalTime']
df['starttime'] = pd.to_datetime(df['starttime'])

Create `stoptime` column.

In [None]:
df['stoptime'] = raw_data['LocalTime'].shift(-1).fillna(method='ffill')
df['stoptime'] = pd.to_datetime(df['stoptime'])

Create `tripduration` column.

In [None]:
df['tripduration'] = df['stoptime'] - df['starttime']
df['tripduration'] = df['tripduration'] / np.timedelta64(1, 's')

Create `start_lat` column.

In [None]:
df['start_lat'] = raw_data['Latitude']

Create `start_lon` column.

In [None]:
df['start_lon'] = raw_data['Longitude']

Create `end_lat` column.

In [None]:
df['end_lat'] = df['start_lat'].shift(-1).fillna(method='ffill')

Create end_lon column.

In [None]:
df['end_lon'] = df['start_lon'].shift(-1).fillna(method='ffill')

# Clean the Transformed Data

In [None]:
df.head()

Drop duplicate rows.

In [None]:
df = df.drop_duplicates(['type_id', 'trip_id', 'starttime', 'stoptime'])

Drop rows with durations > 10.

In [None]:
#df = df[df['tripduration'] < 10.0]

Save to csv.

In [None]:
df.to_csv('data/trips.csv')

# Explore the Transformed Data

In [None]:
df.head()

In [None]:
df.shape

In [None]:
np.unique(df['type_id'])