## iPyLeaflet
[ipyleaflet](https://github.com/ellisonbg/ipyleaflet) is a bridge between jupyter notebooks and the [leaflet](http://leafletjs.com/)  javascript library for drawing maps.

ipyleaflet comes with a few examples notebooks (this notebook was derived from one) but very little documentation,
for more documentation read the [Leaflet IPA](http://leafletjs.com/reference.html)

For installation directions, see the README on [ipyleaflet](https://github.com/ellisonbg/ipyleaflet)

In [1]:
from ipyleaflet import (
    Map,
    Marker,
    TileLayer, ImageOverlay,
    Polyline, Polygon, Rectangle, Circle, CircleMarker,
    GeoJSON,
    DrawControl
)

## Visualizing the distribution of the observations

## Load the required libraries

In [2]:
import pandas as pd
import numpy as np
import sklearn as sk
import urllib
import math

In [3]:
import findspark
findspark.init()

from pyspark import SparkContext
#sc.stop()
sc = SparkContext(master="local[3]",pyFiles=['lib/numpy_pack.py','lib/computeStats.py'])

from pyspark import SparkContext
from pyspark.sql import *
sqlContext = SQLContext(sc)

In [4]:
import sys
sys.path.append('./lib')

import numpy as np
from numpy_pack import packArray,unpackArray
from computeStats import computeOverAllDist, STAT_Descriptions

In [5]:
### Read the data frame from pickle file

data_dir='../../Data/Weather'
file_index='SBBSSBSB' # my file
#file_index='BBBSBBBB'
meas='SNWD'

from pickle import load

#read statistics
filename=data_dir+'/STAT_%s.pickle'%file_index
STAT,STAT_Descriptions = load(open(filename,'rb'))
print('keys from STAT=',STAT.keys())

#!ls -ld $data_dir/*.parquet

#read data
filename=data_dir+'/decon_%s_%s.parquet'%(file_index,meas)

df=sqlContext.read.parquet(filename)
print(df.count())
df.show(2)

('keys from STAT=', ['TMIN', 'TOBS', 'TMAX', 'SNOW', 'SNWD', 'PRCP'])
497
+-----------------+------------------+-------------------+---------+--------+--------+---------+-----------+-------------------+-------------------+-------------------+------------------+-----------+--------------------+------+--------------------+------+
|          coeff_1|           coeff_2|            coeff_3|elevation|   label|latitude|longitude|measurement|              res_1|              res_2|              res_3|          res_mean|    station|           total_var|undefs|              vector|  year|
+-----------------+------------------+-------------------+---------+--------+--------+---------+-----------+-------------------+-------------------+-------------------+------------------+-----------+--------------------+------+--------------------+------+
|7472.087375891099| 865.3917184837485| 387.52827328538126|   2332.9|SBBSSBSB| 43.1667|-109.9833|       SNWD|0.06262446246961834| 0.0500509909664027|0.04752962

In [6]:
#extract longitude and latitude for each station
features='coeff_1,coeff_2,coeff_3'
sqlContext.registerDataFrameAsTable(df,'weather')
Query="SELECT station, latitude,longitude,elevation,%s FROM weather"%features
print(Query)
df1 = sqlContext.sql(Query)
df1.show(4)

SELECT station, latitude,longitude,elevation,coeff_1,coeff_2,coeff_3 FROM weather
+-----------+--------+---------+---------+-----------------+------------------+-------------------+
|    station|latitude|longitude|elevation|          coeff_1|           coeff_2|            coeff_3|
+-----------+--------+---------+---------+-----------------+------------------+-------------------+
|USC00485115| 43.1667|-109.9833|   2332.9|7472.087375891099| 865.3917184837485| 387.52827328538126|
|USC00486440| 43.8567|-110.5889|   2072.0|8097.220485661097|63.365174721610465|-1061.8621361754058|
|USC00486440| 43.8567|-110.5889|   2072.0|7379.178510720495|1030.6913505602406| -143.6801725884934|
|USC00486428| 43.6536|-110.7169|   1964.1|6183.460872576893| 581.2391619943382| -62.86351840091008|
+-----------+--------+---------+---------+-----------------+------------------+-------------------+
only showing top 4 rows



In [7]:
pdf = df.toPandas()
pdf['latitude'].min()

42.866700000000002

In [8]:
# find min and max lon and lat
df.groupby('measurement').agg({'latitude':'min'}).show()
df.groupby('measurement').agg({'latitude':'max'}).show()
df.groupby('measurement').agg({'longitude':'min'}).show()
df.groupby('measurement').agg({'longitude':'max'}).show()

+-----------+-------------+
|measurement|min(latitude)|
+-----------+-------------+
|       SNWD|      42.8667|
+-----------+-------------+

+-----------+-------------+
|measurement|max(latitude)|
+-----------+-------------+
|       SNWD|      43.8667|
+-----------+-------------+

+-----------+--------------+
|measurement|min(longitude)|
+-----------+--------------+
|       SNWD|     -111.0339|
+-----------+--------------+

+-----------+--------------+
|measurement|max(longitude)|
+-----------+--------------+
|       SNWD|     -104.9614|
+-----------+--------------+



In [9]:
feature='coeff_1'
df2=df1.groupby(['station','latitude','longitude','elevation']).agg({"station": "count", feature: "mean"})
pdf=df2.toPandas()
#pdf.loc['region']='west'
#pdf.loc[pdf['longitude']>=-109,'region']='east'
pdf.sort_values(by=['station'],inplace=True)
pdf.head(2)

Unnamed: 0,station,latitude,longitude,elevation,count(station),avg(coeff_1)
0,US1WYTT0009,43.6215,-110.6253,2030.9,1,1965.369388
21,USC00480140,43.7728,-111.0339,1962.0,59,3236.162026


In [10]:
pos_c1_df = pdf.loc[pdf['avg('+feature+')']>0,:]
pos_c1_df.head()

Unnamed: 0,station,latitude,longitude,elevation,count(station),avg(coeff_1)
0,US1WYTT0009,43.6215,-110.6253,2030.9,1,1965.369388
21,USC00480140,43.7728,-111.0339,1962.0,59,3236.162026
27,USC00480603,42.8733,-110.9075,1958.3,20,4036.568003
8,USC00480605,42.8667,-110.9,1930.9,12,4302.603964
14,USC00480865,43.2278,-110.4358,1991.9,57,4850.762338


In [11]:
neg_c1_df = pdf.loc[pdf['avg('+feature+')']<0,:]
neg_c1_df.head()

Unnamed: 0,station,latitude,longitude,elevation,count(station),avg(coeff_1)
23,USC00480727,43.3667,-105.2,1382.0,1,-1254.029634
4,USC00480778,43.6561,-107.7375,1717.5,6,-1099.583096
33,USC00481000,43.4053,-108.1633,1479.5,1,-1089.182938
7,USC00482595,43.2281,-108.9489,1699.3,1,-1223.090364
30,USC00482725,43.4117,-104.9614,1345.7,2,-985.249696


In [12]:
#define a mapping from the range of the value to hex colors.
from matplotlib.colors import rgb2hex
_avg='avg(%s)'%feature
_min=pdf[_avg].min()
_max=pdf[_avg].max()
_min,_max

import pylab as plt

def get_color(val,_min,_max):
    x=(val-_min)/(_max-_min)
    if(_min*_max > 0):
        c = 'gist_yarg'
    else:
        c = 'gist_gray'
    cmap=plt.get_cmap(c)#'PiYG')
    return(rgb2hex(cmap(x)[:3]))

get_color(1000.,0,1000)

u'#ffffff'

In [13]:
print _avg
print _min
print _max

avg(coeff_1)
-1254.02963438
9854.19835614


## Map

In [14]:
min_lat,max_lat,min_long,max_long = box = (42.8, 43.9, -111.1, -104.85)


#### Nearly all stations having positive coefficients for eigenvector 1 for SNWD are west of -109 degrees lon

In [17]:
center = [(min_lat+max_lat)/2, (min_long+max_long)/2]
zoom = 7

m = Map(default_tiles=TileLayer(opacity=1.0), center=center, zoom=zoom)
#m = Map(default_tiles=TileLayer(opacity=0.5,url='http://{s}.tile.opentopomap.org/{z}/{x}/{y}.png'), center=center, zoom=zoom)

#m = Map(default_tiles=TileLayer(opacity=1.0), center=center, zoom=zoom)

topo = TileLayer(opacity=0.5, url='http://{s}.tile.opentopomap.org/{z}/{x}/{y}.png')
#topo = TileLayer(opacity=1.0, url='http://server.arcgisonline.com/ArcGIS/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}')
world = TileLayer(opacity=0.25, url='http://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}')
#m.add_layer(topo)
m.add_layer(world)
#m.remove_layer(m.default_tiles)

r = Rectangle(bounds=[[min_lat,min_long],[max_lat,max_long]], weight=2, fill_opacity=0.0)

m += r

l = Polyline(locations=[[min_lat,-109],[max_lat,-109]], color='red', weight=5, fill_opacity=0.0)

#m += l

m.interact(zoom=(5,10,1))

lat_margin=(max_lat-min_lat)/4
long_margin=(max_long-min_long)/4
circles = []

temp_df = pos_c1_df
_min=temp_df[_avg].min()
_max=temp_df[_avg].max()
print _min, _max

temp_df = pdf
for index,row in temp_df.iterrows():#pdf.iterrows():#neg_c1_df.iterrows():#pos_c1_df.iterrows():
    _lat=row['latitude']
    _long=row['longitude']
    _count=row['count(station)']
    _coef=row[_avg]
    
    if _coef > 0:
        fc = 'blue'
    else:
        fc = 'orange'
    #print _coef
    # taking sqrt of count so that the  area of the circle corresponds to the count
    #c = Circle(location=(_lat,_long), radius=int(300*np.sqrt(_count+0.0)), weight=1,
    #        color='#F00', opacity=0.8, fill_opacity=0.4,
    #        fill_color=get_color(_coef))
    # 
    #print _coef
    c = Circle(location=(_lat,_long), radius=int(np.abs(_coef)+1000), weight=1,
            color='#F00', opacity=0.8, fill_opacity=0.5,
            fill_color=fc)#get_color(_coef,_min,_max))
    circles.append(c)
    m.add_layer(c)
m    

163.88236108 9854.19835614


Widget Javascript not detected.  It may not be installed or enabled properly.


### excercises:
* Add a legend that relates the colors to values.
* Leaflet supports a variety of maps. See if you can get a topographical map as the background.

In [19]:
df_all=df1.groupby(['station','latitude','longitude','elevation']).agg({"station": "count", 'coeff_1': "mean",'coeff_2': "mean",'coeff_3': "mean"})
all_pdf=df_all.toPandas()
#pdf.loc['region']='west'
#pdf.loc[pdf['longitude']>=-109,'region']='east'
all_pdf.sort_values(by=['station'],inplace=True)
pdf.head(10)

Unnamed: 0,station,latitude,longitude,elevation,count(station),avg(coeff_1)
0,US1WYTT0009,43.6215,-110.6253,2030.9,1,1965.369388
21,USC00480140,43.7728,-111.0339,1962.0,59,3236.162026
27,USC00480603,42.8733,-110.9075,1958.3,20,4036.568003
8,USC00480605,42.8667,-110.9,1930.9,12,4302.603964
23,USC00480727,43.3667,-105.2,1382.0,1,-1254.029634
4,USC00480778,43.6561,-107.7375,1717.5,6,-1099.583096
14,USC00480865,43.2278,-110.4358,1991.9,57,4850.762338
33,USC00481000,43.4053,-108.1633,1479.5,1,-1089.182938
10,USC00482054,42.9589,-109.9958,2239.4,13,956.75632
26,USC00482242,42.9281,-110.1272,2235.7,22,693.842543
