**Задание**

* Загрузите агрегированные данные о поездках в мае 2016. Просуммируйте общее количество поездок такси из каждой географической зоны и посчитайте количество ячеек, из которых в мае не было совершено ни одной поездки.
* Нарисуйте статическую карту Нью-Йорка. Поставьте на карте точку там, где находится Эмпайр-Стейт-Билдинг.
* Поверх статической карты Нью-Йорка визуализируйте данные о поездках из каждой ячейки так, чтобы цветовая шкала, в которую вы окрашиваете каждую ячейку, показывала суммарное количество поездок такси из неё.
* Вставьте интерактивную карту Нью-Йорка — такую, которую можно прокручивать и увеличивать. Поставьте метку там, где находится статуя свободы.
* Нарисуйте на интерактивной карте Нью-Йорка ячейки так, чтобы их цвет показывал среднее за месяц количество поездок такси в час из этой зоны.
* Чтобы не выбирать из всех 2500 ячеек вручную, отфильтруйте ячейки, из которых в мае совершается в среднем меньше 5 поездок в час. Посчитайте количество оставшихся. Проверьте на карте, что среди этих ячеек нет таких, из которых поездки на самом деле невозможны.
* Сохраните ноутбук в формате html, запакуйте ipynb и html в один архив и загрузите его в форму (html облегчит задачу проверки вашим рецензентам — в ipynb динамические карты часто не сохраняются, а для выполнения кода рецензентам придётся устанавливать все библиотеки).



In [0]:
from IPython.display import clear_output

from google.colab import drive
drive.mount('/content/drive')
clear_output()

In [0]:
!pip install geojson
clear_output()

In [0]:
import folium
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
from geojson import Polygon, Feature, FeatureCollection, dump





**Loading regions data**

In [0]:
regions_path = '/content/drive/My Drive/Colab Notebooks/data/regions.csv'
regions = pd.read_csv(regions_path, ";")
regions.head()

Unnamed: 0,region,west,east,south,north
0,1,-74.25559,-74.244478,40.49612,40.504508
1,2,-74.25559,-74.244478,40.504508,40.512896
2,3,-74.25559,-74.244478,40.512896,40.521285
3,4,-74.25559,-74.244478,40.521285,40.529673
4,5,-74.25559,-74.244478,40.529673,40.538061


**Loading agregated data**

In [0]:
data_path = './drive/My Drive/Colab Notebooks/data/data_agregated.csv'
data = pd.read_csv(data_path)

data = data.set_index('Unnamed: 0')
data = data.rename_axis("Regions", axis="columns")
data = data.rename_axis("Date&time")

data.head()

Regions,Region_1,Region_2,Region_3,Region_4,Region_5,Region_6,Region_7,Region_8,Region_9,Region_10,Region_11,Region_12,Region_13,Region_14,Region_15,Region_16,Region_17,Region_18,Region_19,Region_20,Region_21,Region_22,Region_23,Region_24,Region_25,Region_26,Region_27,Region_28,Region_29,Region_30,Region_31,Region_32,Region_33,Region_34,Region_35,Region_36,Region_37,Region_38,Region_39,Region_40,...,Region_2461,Region_2462,Region_2463,Region_2464,Region_2465,Region_2466,Region_2467,Region_2468,Region_2469,Region_2470,Region_2471,Region_2472,Region_2473,Region_2474,Region_2475,Region_2476,Region_2477,Region_2478,Region_2479,Region_2480,Region_2481,Region_2482,Region_2483,Region_2484,Region_2485,Region_2486,Region_2487,Region_2488,Region_2489,Region_2490,Region_2491,Region_2492,Region_2493,Region_2494,Region_2495,Region_2496,Region_2497,Region_2498,Region_2499,Region_2500
Date&time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
2016-05-01 00:00:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2016-05-01 01:00:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2016-05-01 02:00:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2016-05-01 03:00:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2016-05-01 04:00:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


**Searching regions with no trips**

In [0]:
zero_trips_regions = (data.sum() == 0).sum()
print "Amount of regions with no trips:", zero_trips_regions

Amount of regions with no trips: 1283


**Plotting NY map with Folium, placing the Empire State Building marker on the map:**

In [0]:
ESB = [40.748644, -73.985664]

map = folium.Map(location=ESB, zoom_start = 10)
folium.Marker(location=ESB, popup = "Empire State Building").add_to(map)
map

In [0]:
def get_region_points(df, region): #get region bounds by region id
  return [[[round(df.west[region], 7), round(df.north[region], 7)], \
           [round(df.east[region], 7), round(df.north[region], 7)], \
           [round(df.east[region], 7), round(df.south[region], 7)], \
           [round(df.west[region], 7), round(df.south[region], 7)]]]

Folium choropleth layer requires special geo-json file. Let's build it:

In [0]:
zone_features = []

for reg_id in regions.index:
  zone = Polygon(get_region_points(regions, reg_id))
  zone_features.append(Feature(id=reg_id+1, geometry=zone))

geojson = FeatureCollection(zone_features)  

with open('./zones.geojson', 'w') as f:
   dump(geojson, f)

Creating simple dataframe for binding json with trips amount:

In [0]:
trips_amount = np.zeros([2500, 2])
trips_amount[:, 0] = [column[7:] for column in data.columns]
trips_amount[:, 1] = data.sum()

trips_amount_df = pd.DataFrame(trips_amount, columns=['reg_id', 'trips_amount'], dtype='int64')
trips_amount_df.reg_id = trips_amount_df.astype('int32')
trips_amount_df.head()The 

Unnamed: 0,reg_id,trips_amount
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0


**Plotting choropleth map with trips total amount:**

In [0]:
choropleth_total_amount = folium.Map(location=ESB, zoom_start = 10)

folium.Choropleth(geo_data='./zones.geojson',
                  name='choropleth',
                  data=trips_amount_df,
                  columns=['reg_id', 'trips_amount'],
                  key_on='feature.id',
                  fill_color='BuGn',
                  fill_opacity=0.6,
                  line_opacity=0.1,
                  legend_name='Trips amount').add_to(choropleth_total_amount)

folium.LayerControl().add_to(choropleth_total_amount)

choropleth_total_amount

The same preparations for plotting mean trips amount choropleth:

In [0]:
mean_trips_amount = np.zeros([2500, 2])
mean_trips_amount[:, 0] = [column[7:] for column in data.columns]
mean_trips_amount[:, 1] = data.mean()

mean_trips_amount_df = pd.DataFrame(mean_trips_amount, \
                                    columns=['reg_id', 'mean_trips_amount'], \
                                    dtype='float64')
mean_trips_amount_df.reg_id = mean_trips_amount_df.reg_id.astype('int16')
mean_trips_amount_df.head()

Unnamed: 0,reg_id,mean_trips_amount
0,1,0.0
1,2,0.0
2,3,0.0
3,4,0.0
4,5,0.0


**Plotting choropleth with mean trips per hour amount:**

In [0]:
choropleth_mean = folium.Map(location=ESB, zoom_start = 10)

folium.Choropleth(geo_data='./zones.geojson',
                  name='choropleth',
                  data=mean_trips_amount_df,
                  columns=['reg_id', 'mean_trips_amount'],
                  key_on='feature.id',
                  fill_color='BuGn',
                  fill_opacity=0.6,
                  line_opacity=0.1,
                  legend_name='Mean trips amount').add_to(choropleth_mean)

folium.LayerControl().add_to(choropleth_mean)

choropleth_mean

**Filter regions with mean trips per hour amount < 5:**

In [0]:
few_trips_regions = mean_trips_amount_df.loc[mean_trips_amount_df.mean_trips_amount < 5.0].index
mean_trips_amount_df.drop(few_trips_regions, inplace=True)

In [0]:
print "Regions amount after dropping few trips regions:", mean_trips_amount_df.shape[0]

Regions amount after dropping few trips regions: 102


**Plotting only high load regions with choropleth:**

In [0]:
zone_features = []

for reg_id in mean_trips_amount_df.reg_id:
  zone = Polygon(get_region_points(regions, reg_id-1))
  zone_features.append(Feature(id=reg_id, geometry=zone))

geojson = FeatureCollection(zone_features)  

with open('./high_load_zones.geojson', 'w') as f:
   dump(geojson, f)

In [0]:
choropleth_high_load_zones = folium.Map(location=ESB, zoom_start = 10)

folium.Choropleth(geo_data='./high_load_zones.geojson',
                  name='choropleth',
                  data=mean_trips_amount_df,
                  columns=['reg_id', 'mean_trips_amount'],
                  key_on='feature.id',
                  fill_color='BuGn',
                  fill_opacity=0.6,
                  line_opacity=0.1,
                  legend_name='Mean trips per hour amount in high load zones').add_to(choropleth_high_load_zones)

folium.LayerControl().add_to(choropleth_high_load_zones)

choropleth_high_load_zones

That's all folks!