In [1]:
import folium
import pandas as pd
import numpy as np
from ipywidgets import interact
import matplotlib.pyplot as plt

In [2]:
def date_parser(x):
    if int(x.split('_')[2]) < 10:
        date = pd.to_datetime(x.split('_')[1] + ' 0' + x.split('_')[2])
    else:
        date = pd.to_datetime(x.split('_')[1] + ' ' + x.split('_')[2])
    return date


def interact_map(day, hour, predictions, real_data):
    f = folium.Figure(width=750, height=750)
    
    m = folium.Map(location=[esb_lat, esb_lng], zoom_start=11, min_zoom=9).add_to(f)
    
    base_map = folium.FeatureGroup(name='Basemap', overlay=True, control=False)
    folium.TileLayer(tiles='OpenStreetMap').add_to(base_map)
    base_map.add_to(m)
    
    heat_map = real_data[real_data.time == day]
    mask1 = predictions.date == day
    mask2 = predictions.hour == hour
    predict_hours = predictions.predict_time.unique()
    
    for i in predict_hours:
        mask3 = predictions.predict_time == i
        heat_map = predictions[mask1 & mask2 & mask3]
        folium.features.Choropleth(geo_data=regs_json,
                                   data=heat_map,
                                   columns=['region','rides'],
                                   key_on="feature.region",
                                   line_weight=0,
                                   legend_name=f'{i} hour prediction',
                                   fill_opacity=1,
                                   fill_color='YlOrBr',
                                   nan_fill_opacity=0,
                                   name=f'{i} hour prediction',
                                   show=False,
                                   overlay=False,
                                   bins=6, highlight=True).add_to(m)
        
    folium.LayerControl().add_to(m)
    return f


def interact_graph(region, pred_hour, predictions, june_real_data):
    T = predictions[(predictions.region==region) & (predictions.predict_time==pred_hour)].date_hour + pd.Timedelta(hours=pred_hour-1)
    X = predictions[(predictions.region==region) & (predictions.predict_time==pred_hour)].rides
    plt.figure(figsize=(20,5))
    plt.plot(T, X)
    plt.plot(june_real_data.loc[:, str(region)])
    plt.legend([f'{region} predictions at {pred_hour}-hour model',f'{region} real june rides'], prop={'size': 16})
    plt.show()

Загрузка данных

In [3]:
data = pd.read_csv('may2016_agg.csv', index_col=0)
june_real_data = pd.read_csv('june_clear.csv', index_col=0, parse_dates=True)
regions = pd.read_csv('regions.csv', delimiter=';')
june_raw_data = pd.read_csv('answer.csv')

Обрабатываем данные

In [4]:
%%time
predictions = pd.DataFrame()
predictions['date'] = june_raw_data.iloc[:, 0].apply(lambda x: pd.to_datetime(x.split('_')[1]))
predictions['hour'] = june_raw_data.iloc[:, 0].apply(lambda x: int(x.split('_')[2]))
predictions['region'] = june_raw_data.iloc[:, 0].apply(lambda x: int(x.split('_')[0]))
predictions['predict_time'] = june_raw_data.iloc[:, 0].apply(lambda x: int(x.split('_')[-1]))
predictions['rides'] = june_raw_data['y']
predictions['date_hour'] = june_raw_data.iloc[:, 0].apply(date_parser)

Wall time: 1min 8s


Координаты необходимых мест

In [5]:
start_lng = -74.25559
end_lng = -73.70001
start_lat = 40.49612
end_lat = 40.91553

ny_lat = (start_lat + end_lat) / 2
ny_lng = (start_lng + end_lng) / 2


esb_lng = -73.985428
esb_lat = 40.748817

sof_lat = 40.689167
sof_lng = -74.044583

Представление регионов в json формате для отрисовки на карте

In [6]:
regs_json = {'type': "FeatureCollection", 'features': 
    [{'type': 'Feature', 'region': elem[0], 
     'geometry': {'type': 'Polygon', "coordinates": [
             [ [elem[2],elem[3]], [elem[1], elem[3]], [elem[1], elem[4]],[elem[2], elem[4]] ]
             ]},
     'properties': {'region': elem[0]}
    } for elem in regions.itertuples(index=False)]
    }

Карта с суммарным количеством поездок. Работатет медленно, но для choropleth быстрее не получается

In [7]:
interact(lambda day, hour: interact_map(day, hour, predictions, june_real_data),
         day=pd.date_range(start='2016-06-01', end='2016-06-30').strftime('%Y-%m-%d'),
         hour=np.sort(predictions.hour.unique()))

interactive(children=(Dropdown(description='day', options=('2016-06-01', '2016-06-02', '2016-06-03', '2016-06-…

<function __main__.<lambda>(day, hour)>

Графики для сравнения предсказаний с реальными значениями.

In [8]:
interact(lambda region, pred_hour: interact_graph(region, pred_hour, predictions, june_real_data),
        region=predictions.region.unique(),
        pred_hour=predictions.predict_time.unique())

interactive(children=(Dropdown(description='region', options=(1075, 1076, 1077, 1125, 1126, 1127, 1128, 1129, …

<function __main__.<lambda>(region, pred_hour)>