## iPyLeaflet
[ipyleaflet](https://github.com/ellisonbg/ipyleaflet) is a bridge between jupyter notebooks and the [leaflet](http://leafletjs.com/)  javascript library for drawing maps.

ipyleaflet comes with a few examples notebooks (this notebook was derived from one) but very little documentation,
for more documentation read the [Leaflet IPA](http://leafletjs.com/reference.html)

For installation directions, see the README on [ipyleaflet](https://github.com/ellisonbg/ipyleaflet)

In [1]:
from ipyleaflet import (
    Map,
    Marker,
    TileLayer, ImageOverlay,
    Polyline, Polygon, Rectangle, Circle, CircleMarker,
    GeoJSON,
    DrawControl
)

## Visualizing the distribution of the observations

## Load the required libraries

In [2]:
import pandas as pd
import numpy as np
import sklearn as sk
import urllib
import math

In [3]:
import findspark
findspark.init()

from pyspark import SparkContext
#sc.stop()
sc = SparkContext(master="local[3]",pyFiles=['lib/numpy_pack.py','lib/computeStats.py'])

from pyspark import SparkContext
from pyspark.sql import *
sqlContext = SQLContext(sc)

In [4]:
import sys
sys.path.append('./lib')

import numpy as np
from numpy_pack import packArray,unpackArray
from computeStats import computeOverAllDist, STAT_Descriptions

In [310]:
### Read the data frame from pickle file

data_dir='../../Data/Weather'
file_index='SBBBBBSB'#BBBSBBBB'
meas='TMIN'# 'SNWD'

from pickle import load

#read statistics
filename=data_dir+'/STAT_%s.pickle'%file_index
STAT,STAT_Descriptions = load(open(filename,'rb'))
print('keys from STAT=',STAT.keys())

#!ls -ld $data_dir/*.parquet

#read data
filename=data_dir+'/decon_%s_%s.parquet'%(file_index,meas)

df=sqlContext.read.parquet(filename)
print(df.count())
df.show(2)

('keys from STAT=', ['TMIN', 'TOBS', 'TMAX', 'SNOW', 'SNWD', 'PRCP'])
2706
+-------------------+------------------+-------------------+---------+--------+--------+---------+-----------+------------------+------------------+------------------+------------------+-----------+---------+------+--------------------+------+
|            coeff_1|           coeff_2|            coeff_3|elevation|   label|latitude|longitude|measurement|             res_1|             res_2|             res_3|          res_mean|    station|total_var|undefs|              vector|  year|
+-------------------+------------------+-------------------+---------+--------+--------+---------+-----------+------------------+------------------+------------------+------------------+-----------+---------+------+--------------------+------+
|-29.431414655516456|191.89314151406467|-57.479782112416274|    460.2|SBBBBBSB| 48.2675|-100.8439|       TMIN|0.9996872038169426|0.9892792103503947|0.9885681573478439|0.8448468256009848|USC0032

In [311]:
sqlContext.registerDataFrameAsTable(df,'weather')

In [312]:
# Plot the high and low snow density areas

Query="SELECT elevation, vector, station, latitude, longitude FROM weather\n\t"
print Query
df1 = sqlContext.sql(Query)
print df1.count(),'rows'
df1.show(5)
rows=df1.rdd.map(lambda row:[unpackArray(row['vector'],np.float16), row['elevation'],row['station'],row['latitude'],row['longitude']]).collect()
print (len(rows), len(rows[0]))

SELECT elevation, vector, station, latitude, longitude FROM weather
	
2706 rows
+---------+--------------------+-----------+--------+---------+
|elevation|              vector|    station|latitude|longitude|
+---------+--------------------+-----------+--------+---------+
|    460.2|[F0 DA A0 DB E8 D...|USC00323686| 48.2675|-100.8439|
|    403.0|[50 D5 18 DA 60 D...|CA005012545|   49.45|   -99.02|
|    445.0|[D8 D8 38 D8 00 C...|CA005012960|   49.83|  -100.95|
|    551.7|[30 D5 80 D4 40 C...|USC00324646| 48.6692|-102.0975|
|    507.5|[E0 D5 30 D4 00 0...|USW00024013| 48.2553|-101.2733|
+---------+--------------------+-----------+--------+---------+
only showing top 5 rows

(2706, 5)


In [313]:
pd_df = pd.DataFrame(rows)
pd_df.columns = ['vector','elevation','station','latitude','longitude']
pd_df.head()

Unnamed: 0,vector,elevation,station,latitude,longitude
0,"[-222.0, -244.0, -189.0, -161.0, -111.0, -183....",460.2,USC00323686,48.2675,-100.8439
1,"[-85.0, -195.0, -140.0, -60.0, -50.0, -35.0, -...",403.0,CA005012545,49.45,-99.02
2,"[-155.0, -135.0, -10.0, -120.0, -30.0, -40.0, ...",445.0,CA005012960,49.83,-100.95
3,"[-83.0, -72.0, -17.0, -56.0, -89.0, 39.0, 17.0...",551.7,USC00324646,48.6692,-102.0975
4,"[-94.0, -67.0, 0.0, -11.0, -78.0, -67.0, 33.0,...",507.5,USW00024013,48.2553,-101.2733


In [314]:
new_col = [np.nanmean(x) for x in pd_df['vector']]
len(new_col)

2706

In [315]:
pd_df['vecSum'] = new_col

In [316]:
pd_df.head()

Unnamed: 0,vector,elevation,station,latitude,longitude,vecSum
0,"[-222.0, -244.0, -189.0, -161.0, -111.0, -183....",460.2,USC00323686,48.2675,-100.8439,-6.527344
1,"[-85.0, -195.0, -140.0, -60.0, -50.0, -35.0, -...",403.0,CA005012545,49.45,-99.02,6.582031
2,"[-155.0, -135.0, -10.0, -120.0, -30.0, -40.0, ...",445.0,CA005012960,49.83,-100.95,-3.671875
3,"[-83.0, -72.0, -17.0, -56.0, -89.0, 39.0, 17.0...",551.7,USC00324646,48.6692,-102.0975,-0.660645
4,"[-94.0, -67.0, 0.0, -11.0, -78.0, -67.0, 33.0,...",507.5,USW00024013,48.2553,-101.2733,3.431641


In [317]:
pd_df = pd_df.replace([np.inf, -np.inf], np.NaN)

In [318]:
station_df = pd_df.groupby(['station', 'elevation', 'latitude', 'longitude'])['vecSum'].mean().reset_index()

In [319]:
station_df

Unnamed: 0,station,elevation,latitude,longitude,vecSum
0,CA004010080,580.0,49.2500,-102.2800,-26.505208
1,CA004011090,629.0,49.7200,-102.0300,-29.518750
2,CA004011160,631.0,49.6300,-102.2700,-18.808153
3,CA004012485,511.0,49.3300,-101.4500,-9.820352
4,CA004014913,633.0,49.6200,-102.1000,-13.054688
5,CA004015045,576.0,49.8300,-101.5200,-9.127960
6,CA004015800,582.0,49.2200,-102.1700,-11.440785
7,CA004016520,594.0,49.5800,-101.7000,-17.365932
8,CA004016521,596.0,49.6000,-101.7200,-9.726271
9,CA004018678,643.0,49.9300,-101.9700,-8.649940


In [320]:
station_df

Unnamed: 0,station,elevation,latitude,longitude,vecSum
0,CA004010080,580.0,49.2500,-102.2800,-26.505208
1,CA004011090,629.0,49.7200,-102.0300,-29.518750
2,CA004011160,631.0,49.6300,-102.2700,-18.808153
3,CA004012485,511.0,49.3300,-101.4500,-9.820352
4,CA004014913,633.0,49.6200,-102.1000,-13.054688
5,CA004015045,576.0,49.8300,-101.5200,-9.127960
6,CA004015800,582.0,49.2200,-102.1700,-11.440785
7,CA004016520,594.0,49.5800,-101.7000,-17.365932
8,CA004016521,596.0,49.6000,-101.7200,-9.726271
9,CA004018678,643.0,49.9300,-101.9700,-8.649940


## Map

In [321]:
pd_df['station'].unique()

array([u'USC00323686', u'CA005012545', u'CA005012960', u'USC00324646',
       u'USW00024013', u'CA004011160', u'USC00322304', u'USC00323963',
       u'CA005010QFQ', u'USC00328792', u'USC00322472', u'USR0000NJCL',
       u'CA005010485', u'CA005010480', u'USR0000NLOS', u'USC00328990',
       u'CA005012400', u'CA005010640', u'USC00329445', u'CA005010140',
       u'CA005012080', u'USC00323217', u'CA00502055E', u'CA005022946',
       u'USC00327664', u'USC00320941', u'USC00326025', u'USC00327201',
       u'USC00325078', u'USC00327704', u'CA005011720', u'CA005012720',
       u'CA004015800', u'CA005012672', u'USC00321288', u'USC00320626',
       u'CA005010547', u'CA0050220M0', u'CA004015045', u'CA005011293',
       u'USC00329333', u'CA005022041', u'CA005011051', u'USW00094011',
       u'USC00325993', u'CA005022065', u'CA005020320', u'CA004016521',
       u'CA005010538', u'USW00094084', u'USC00328913', u'USC00321498',
       u'CA005012941', u'CA005010180', u'CA005022040', u'USC00320961',
      

In [322]:
#define a mapping from the range of the value to hex colors.
from matplotlib.colors import rgb2hex
_avg='vecSum'
_min=pd_df[_avg].min()
_max=pd_df[_avg].max()
_min,_max

import pylab as plt
cmap=plt.get_cmap('jet')
def get_color(val):
    x=(val-_min)/(_max-_min)
    return(rgb2hex(cmap(x)[:3]))

get_color(1000.)

u'#800000'

In [323]:
min_lat,max_lat,min_long,max_long = box = (np.min(pd_df['latitude']), np.max(pd_df['latitude']), np.min(pd_df['longitude']), np.max(pd_df['longitude']))

In [324]:
pd_df = pd_df.replace([np.inf, -np.inf], np.NaN)

In [325]:
center = [(min_lat+max_lat)/2, (min_long+max_long)/2]
zoom = 9

m = Map(default_tiles=TileLayer(opacity=1.0), center=center, zoom=zoom)

r = Rectangle(bounds=[[min_lat-0.1,min_long-0.1],[max_lat+0.1,max_long+0.1]], weight=5, fill_opacity=0.0)
m += r

lat_margin=(max_lat-min_lat)/4
long_margin=(max_long-min_long)/4
circles = []

station_df.sort_values(by=['vecSum'], inplace=True,ascending=False)
count = 0
for index, row in station_df.iterrows():#pdf
    _lat=row['latitude']
    _long=row['longitude']
#     _lat=station_df.index.levels[2][i]
#     _long=station_df.index.levels[3][i]
    _count=row['vecSum']*10
    print _lat, _long, _count
    if (_count > 0): #vary
        _coef=_count #row[_avg]
        # taking sqrt of count so that the  area of the circle corresponds to the count
        c = Circle(location=(_lat,_long), radius=int(300*np.sqrt(_count+0.0)), weight=1,
            color='#F00', opacity=0.8, fill_opacity=0.4,
            fill_color=get_color(_coef))
        circles.append(c)
        m.add_layer(c)
        count += 1
        if (count >= 15):
            break
m    

48.2553 -101.2733 38.925401031
48.0781 -99.2656 16.4075349507
48.37 -100.43 8.72192382812
49.45 -99.02 8.34296875
48.0475 -100.31 -2.45621066623
48.3542 -99.9925 -3.26001183461
49.13 -100.12 -12.7015904018
48.4167 -101.35 -22.1147460938
48.0797 -100.875 -23.8560820279
49.42 -99.65 -28.333062066
49.18 -99.65 -36.3245738636
49.47 -100.47 -37.4263509115
49.18 -100.5 -39.6762319712
48.1803 -101.2964 -46.4661574591
48.2675 -100.8439 -47.7133554955
49.57 -99.33 -51.5420532227
49.55 -99.08 -54.2410681383
48.6692 -102.0975 -54.7923093854
49.13 -100.6 -55.6411508413
48.2881 -99.4317 -59.4633871822
48.4583 -101.5697 -59.7983976723
49.283 -100.983 -60.5392456055
49.18 -100.33 -61.4453125
49.3 -100.32 -64.334044023
48.6333 -102.4 -67.6651000977
49.783 -99.633 -68.822265625
49.05 -99.42 -72.7232776989
48.9097 -101.0192 -75.2957393346
49.17 -100.4 -76.0571289062
49.23 -100.05 -79.5849609375
48.9333 -99.6833 -81.7614746094
49.4 -99.63 -83.1425382653
49.22 -100.08 -83.6726888021
49.1 -99.35 -84.011718

Unnamed: 0,vector,elevation,station,latitude,longitude,vecSum
226,"[711.0, 711.0, 711.0, 711.0, 864.0, 762.0, 762...",469.4,USC00323963,48.9989,-99.3464,inf
9,"[660.0, 635.0, 635.0, 660.0, 889.0, 889.0, 864...",458.1,USC00322472,48.6500,-101.0167,inf
148,"[660.0, 660.0, 711.0, 686.0, 686.0, 686.0, 686...",466.3,USC00322304,48.0475,-100.3100,inf
123,"[530.0, 530.0, 530.0, 540.0, 540.0, 580.0, 580...",482.0,CA005010191,49.3000,-99.4500,inf
240,"[102.0, 102.0, 102.0, 102.0, 102.0, 102.0, 178...",460.2,USC00323686,48.2675,-100.8439,inf
75,"[610.0, 610.0, 610.0, 610.0, 610.0, 610.0, 610...",457.8,USC00329333,48.9097,-101.0192,inf
8,"[910.0, 910.0, 910.0, 910.0, 910.0, 930.0, 950...",482.0,CA005010191,49.3000,-99.4500,inf
224,"[457.0, 457.0, 457.0, 483.0, 660.0, 660.0, 660...",460.2,USC00323686,48.2675,-100.8439,inf
2,"[nan, 610.0, nan, nan, nan, nan, nan, nan, nan...",482.0,CA005010191,49.3000,-99.4500,inf
225,"[102.0, 102.0, 102.0, 102.0, 178.0, 178.0, 178...",484.9,USC00320796,48.6167,-99.3667,6.540800e+04


In [48]:
pd_df['vector'].loc[226]

array([  711.,   711.,   711.,   711.,   864.,   762.,   762.,   762.,
         762.,   762.,   762.,   762.,   737.,   737.,   864.,   864.,
         864.,   864.,   864.,   864.,   864.,   864.,    nan,   940.,
         940.,   940.,   940.,   940.,   940.,   940.,   940.,   940.,
         940.,   940.,   940.,   940.,   940.,   940.,   940.,   940.,
         940.,   940.,   965.,   965.,   965.,   965.,   965.,   965.,
         965.,   965.,   965.,   965.,   965.,   965.,   965.,   965.,
        1016.,  1016.,  1016.,   965.,   965.,   940.,   838.,   838.,
         838.,   762.,   762.,   762.,   762.,   762.,   762.,   762.,
         762.,   762.,   762.,   762.,   762.,   762.,   762.,   762.,
         686.,   762.,   813.,   813.,   711.,   711.,   711.,   686.,
         660.,   610.,   584.,   508.,   381.,   203.,   102.,    25.,
           0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
           0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
      

### excercises:
* Add a legend that relates the colors to values.
* Leaflet supports a variety of maps. See if you can get a topographical map as the background.

In [17]:
%matplotlib inline
pdf.plot.scatter(x='elevation',y='avg(coeff_1)');

NameError: name 'pdf' is not defined