Импортируем все что нужно

In [None]:
import numpy as np
import pandas as pd

import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

import ipywidgets as ipyw
import ipyleaflet as ipyl
from IPython.display import display

from shapely.geometry import Polygon, mapping
import json

# Загрузим данные

In [None]:
regions = pd.read_csv('./regions.csv', sep=';')
regions.set_index('region', inplace=True)

june_predictions = pd.read_csv(
    './june_predictions.csv',
    index_col=['region', 'index'],
    parse_dates=['index'], )
june_y = pd.read_csv(
    './y_true_values.csv',
    index_col=['region', 'index'],
    parse_dates=['index'], )
june_errors = pd.read_csv(
    './june_errors.csv',
    index_col=['region', 'index'],
    parse_dates=['index'], )

june_predictions.sort_index(inplace=True)
june_y.sort_index(inplace=True)
june_errors.sort_index(inplace=True)

# Посмотрим на ошибки в каждом из регионов

Упорядочим регионы в порядке убывания средней ошибки

In [None]:
regions_ordered = june_errors.mean(axis=1).groupby(level=0).mean().sort_values(ascending=False)

In [None]:
region_selector = ipyw.SelectionSlider(
    options=list(regions_ordered.index),
    description=u'Выберите регион:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True)
display(region_selector)

horizon_selector = ipyw.ToggleButtons(
    options=[(1, 'err_y1'), (2, 'err_y2'), (3, 'err_y3'), (4, 'err_y4'),
	     (5, 'err_y5'), (6, 'err_y6')],
    value='err_y1',
    description=u'Прогноз (часов):',
    disabled=False)
display(horizon_selector)

tbl_top_errors = ipyw.HTML()
tbl_top_errors.add_class('rendered_html')
display(tbl_top_errors)


def wire_up_dependencies(region, field):
    selected = june_errors.loc[region]
    errors = selected.sort_values(field, ascending=False).head(10)
    tbl_top_errors.value = errors[[field]].to_html()


def on_region_change(change):
    region = change['new']
    wire_up_dependencies(region, horizon_selector.value)


def on_horizon_change(change):
    field = change['new']
    wire_up_dependencies(region_selector.value, field)


region_selector.observe(on_region_change, names='value')
horizon_selector.observe(on_horizon_change, names='value')
wire_up_dependencies(region_selector.value, horizon_selector.value)

# Построим график прогнозов

Заведем plotly

In [None]:
init_notebook_mode()

In [None]:
from pandas.tseries.offsets import Hour
index = pd.date_range('2016-06-01', '2016-06-30 17:00', freq='H')


def get_period(date):
    def as_slice(period_start, period_end):
	return slice(str(period_start), str(period_end))

    period_start = pd.to_datetime(date) - Hour(84)
    if period_start < index.min():
	period_start = index.min()
	period_end = period_start + Hour(168)
	return as_slice(period_start, period_end)

    period_end = pd.to_datetime(date) + Hour(83)
    if period_end > index.max():
	period_end = index.max()
	period_start = period_end - Hour(168)

    return as_slice(period_start, period_end)


def compare_forecast_to_actual(region, date, field):
    plot_output.clear_output()

    with plot_output:
	date_idx = get_period(date)
	forecast = june_predictions.loc[region].loc[date_idx]
	true_y = june_y.loc[region].loc[date_idx]

	trace1 = go.Scatter(
	    x=true_y.index,
	    y=true_y[field],
	    mode='lines+markers',
	    name='{} - Actual'.format(field), )

	trace2 = go.Scatter(
	    x=forecast.index,
	    y=forecast[field],
	    mode='lines+markers',
	    name='{} - Forecast'.format(field), )

	data = [trace1, trace2]
	layout = go.Layout(title='A Simple Plot', width=800, height=640)
	fig = go.Figure(data=data, layout=layout)

	iplot(data, show_link=False)

In [None]:
plot_region_selector = ipyw.SelectionSlider(
    options=list(regions_ordered.index),
    description=u'Выберите регион:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True)
display(plot_region_selector)

plot_date_selector = ipyw.SelectionSlider(
    options=list(pd.date_range('2016-06-01', '2016-06-30').astype(str)),
    description=u'Выберите дату:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True)
display(plot_date_selector)

plot_horizon_selector = ipyw.ToggleButtons(
    options=[(1, 'y1'), (2, 'y2'), (3, 'y3'), (4, 'y4'), (5, 'y5'), (6, 'y6')],
    value='y1',
    description=u'Прогноз (часов):',
    disabled=False)
display(plot_horizon_selector)

plot_output = ipyw.Output()
display(plot_output)


def plot_on_region_change(change):
    region = change['new']
    compare_forecast_to_actual(region, plot_date_selector.value,
			       plot_horizon_selector.value)


def plot_on_date_change(change):
    field = change['new']
    compare_forecast_to_actual(plot_region_selector.value, field,
			       plot_horizon_selector.value)


def plot_on_horizon_change(change):
    field = change['new']
    compare_forecast_to_actual(plot_region_selector.value,
			       plot_date_selector.value, field)


plot_region_selector.observe(plot_on_region_change, names='value')
plot_date_selector.observe(plot_on_date_change, names='value')
plot_horizon_selector.observe(plot_on_horizon_change, names='value')

compare_forecast_to_actual(plot_region_selector.value,
			   plot_date_selector.value,
			   plot_horizon_selector.value)

# Интерактивная карта

Средняя ошибка по региону (по всем y)

In [None]:
mean_errors = june_errors.mean(axis=1).reset_index().set_index(
    ['index', 'region'])
mean_errors.sort_index(inplace=True)
mean_errors.columns = ['mean_error']

Будем использовать метод для генерации GeoJSON

In [None]:
def get_geo_data(timestamp):
    regions_to_draw = mean_errors.loc[timestamp].join(regions)
    regions_to_draw.index = regions_to_draw.index.droplevel(0)
    regions_to_draw.loc[:, 'error_group'] = pd.cut(
	regions_to_draw['mean_error'], 5, labels=False)
    geo_features = regions_to_draw.apply(region_to_geofeature, axis=1).values
    return {'type': 'FeatureCollection', 'features': [x for x in geo_features]}

Функция для конвертации региона в его GeoJSON представление

In [None]:
def region_to_geofeature(row):
    def get_color(error_group):
	colormap = {
	    0: '#ffffb2',
	    1: '#fecc5c',
	    2: '#fd8d3c',
	    3: '#f03b20',
	    4: '#bd0026',
	}
	return colormap[error_group]

    lats = row[['north', 'south', 'south', 'north']].values
    lons = row[['west', 'west', 'east', 'east']].values

    coords = zip(lons, lats)
    poly = Polygon(coords)

    return {
	'type': 'Feature',
	'id': row.name,
	'geometry': mapping(poly),
	'properties': {
	    'style': {
		'color': 'grey',
		'weight': 1,
		'fillColor': get_color(row['error_group']),
		'fillOpacity': 0.6
	    }
	}
    }

Статистика для региона

In [None]:
def region_stats(region, timestamp):
    forecast = june_predictions.loc[region].loc[timestamp]
    y = june_y.loc[region].loc[timestamp]
    errors = june_errors.loc[region].loc[timestamp]
    errors.index = y.index
    df = pd.DataFrame(
	{
	    'Forecast': forecast,
	    'True value': y,
	    'Error': errors
	},
	index=june_y.columns)
    df = df[['Forecast', 'True value', 'Error']]
    return df

Весь интерактив тут

In [None]:
esb_lat = 40.7484
esb_lon = -73.9857

hours = pd.date_range('2016-05-31 23:00', '2016-06-30 17:00', freq='H')
hours_list = list(hours.astype(str))
map_date_selector = ipyw.SelectionSlider(
    options=hours_list,
    description=u'Выберите дату:',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    layout=ipyw.Layout(width='100%'))


map_region_label = ipyw.Label(description=u'Регион:')

tbl_region_info = ipyw.HTML()
tbl_region_info.add_class('rendered_html')

map = ipyl.Map(
    center=[esb_lat, esb_lon],
    zoom=11,
    min_zoom=10,
    layout=ipyw.Layout(
	height='500px', align_self='stretch'), )


def hover_handler(event=None, id=None, properties=None):
    map_region_label.value = str(id)
    tbl_region_info.value = region_stats(id, map_date_selector.value).to_html()


def draw_regions(timestamp):
    json_layer = [x for x in map.layers if isinstance(x, ipyl.GeoJSON)]
    if json_layer:
	map.remove_layer(json_layer[0])

    geo_data = get_geo_data(timestamp)
    layer = ipyl.GeoJSON(data=geo_data, hover_style={'fillColor': 'blue'})
    layer.on_hover(hover_handler)
    map.add_layer(layer)


def map_on_date_change(change):
    date = change['new']
    draw_regions(date)


map_date_selector.observe(map_on_date_change, names='value')

Покажем на карте регионы с высокой средней ошибкой (темным цветом). Статистика по выделенному региону приведена ниже.

In [None]:
display(map_date_selector)
display(map)

draw_regions(map_date_selector.value)

In [None]:
display(map_region_label)
display(tbl_region_info)