## iPyLeaflet
[ipyleaflet](https://github.com/ellisonbg/ipyleaflet) is a bridge between jupyter notebooks and the [leaflet](http://leafletjs.com/)  javascript library for drawing maps.

ipyleaflet comes with a few examples notebooks (this notebook was derived from one) but very little documentation,
for more documentation read the [Leaflet IPA](http://leafletjs.com/reference.html)

For installation directions, see the README on [ipyleaflet](https://github.com/ellisonbg/ipyleaflet)

In [16]:
from ipyleaflet import (
    Map,
    Marker,
    TileLayer, ImageOverlay,
    Polyline, Polygon, Rectangle, Circle, CircleMarker,
    GeoJSON,
    DrawControl
)

## Visualizing the distribution of the observations

## Load the required libraries

In [7]:
import pandas as pd
import numpy as np
import sklearn as sk
import urllib
import math

In [1]:
import findspark
findspark.init()

from pyspark import SparkContext
#sc.stop()
sc = SparkContext(master="local[3]",pyFiles=['lib/numpy_pack.py','lib/computeStats.py'])

from pyspark import SparkContext
from pyspark.sql import *
sqlContext = SQLContext(sc)

In [2]:
import sys
sys.path.append('./lib')

import numpy as np
from numpy_pack import packArray,unpackArray
from computeStats import computeOverAllDist, STAT_Descriptions

In [75]:
### Read the data frame from pickle file

data_dir='../../Data/Weather'
file_index='SSSBBBBB'#BBBSBBBB'
meas='PRCP'# 'SNWD'

from pickle import load

#read statistics
filename=data_dir+'/STAT_%s.pickle'%file_index
STAT,STAT_Descriptions = load(open(filename,'rb'))
print('keys from STAT=',STAT.keys())

#!ls -ld $data_dir/*.parquet

#read data
filename=data_dir+'/decon_%s_%s.parquet'%(file_index,meas)

df=sqlContext.read.parquet(filename)
print(df.count())
df.show(2)

('keys from STAT=', ['TMIN', 'TOBS', 'TMAX', 'SNOW', 'SNWD', 'PRCP'])
2574
+-------------------+------------------+------------------+---------+--------+--------+---------+-----------+------------------+------------------+------------------+------------------+-----------+---------+------+--------------------+------+
|            coeff_1|           coeff_2|           coeff_3|elevation|   label|latitude|longitude|measurement|             res_1|             res_2|             res_3|          res_mean|    station|total_var|undefs|              vector|  year|
+-------------------+------------------+------------------+---------+--------+--------+---------+-----------+------------------+------------------+------------------+------------------+-----------+---------+------+--------------------+------+
| -59.12169445219956|-80.55032955545542|130.65039781920893|   2691.4|SSSBBBBB| 39.6505|-106.0904|       PRCP| 0.985360323869068|  0.95910094853714|0.9093062143222587|0.7875935516024978|US1COSU0034

In [76]:
sqlContext.registerDataFrameAsTable(df,'weather')

In [77]:
# Plot the high and low snow density areas

Query="SELECT elevation, vector, station, latitude, longitude FROM weather\n\t"
print Query
df1 = sqlContext.sql(Query)
print df1.count(),'rows'
df1.show(5)
rows=df1.rdd.map(lambda row:[unpackArray(row['vector'],np.float16), row['elevation'],row['station'],row['latitude'],row['longitude']]).collect()
print (len(rows), len(rows[0]))

SELECT elevation, vector, station, latitude, longitude FROM weather
	
2574 rows
+---------+--------------------+-----------+--------+---------+
|elevation|              vector|    station|latitude|longitude|
+---------+--------------------+-----------+--------+---------+
|   2691.4|[00 00 00 00 00 0...|US1COSU0034| 39.6505|-106.0904|
|   2490.8|[00 00 00 00 00 0...|US1COEG0021|  39.645|-106.3953|
|   3442.4|[00 48 00 42 00 4...|USC00051660| 39.3672|-106.1897|
|   1775.2|[00 00 00 00 00 4...|US1COGF0001| 39.5413|-107.3269|
|   3383.3|[00 00 00 00 60 5...|USS0005K09S|   39.65|-105.8667|
+---------+--------------------+-----------+--------+---------+
only showing top 5 rows

(2574, 5)


In [78]:
pd_df = pd.DataFrame(rows)
pd_df.columns = ['vector','elevation','station','latitude','longitude']
pd_df.head()

Unnamed: 0,vector,elevation,station,latitude,longitude
0,"[0.0, 0.0, 0.0, 0.0, 185.0, 51.0, 15.0, 0.0, 0...",2691.4,US1COSU0034,39.6505,-106.0904
1,"[0.0, 0.0, 0.0, 41.0, 0.0, 15.0, 51.0, 46.0, 3...",2490.8,US1COEG0021,39.645,-106.3953
2,"[8.0, 3.0, 20.0, 3.0, 5.0, 8.0, 0.0, 0.0, 23.0...",3442.4,USC00051660,39.3672,-106.1897
3,"[0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 76.0...",1775.2,US1COGF0001,39.5413,-107.3269
4,"[0.0, 0.0, 51.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",3383.3,USS0005K09S,39.65,-105.8667


In [79]:
new_col = [np.nansum(x) for x in pd_df['vector']]
len(new_col)

2574

In [80]:
pd_df['vecSum'] = new_col

In [81]:
pd_df.head()

Unnamed: 0,vector,elevation,station,latitude,longitude,vecSum
0,"[0.0, 0.0, 0.0, 0.0, 185.0, 51.0, 15.0, 0.0, 0...",2691.4,US1COSU0034,39.6505,-106.0904,4884.0
1,"[0.0, 0.0, 0.0, 41.0, 0.0, 15.0, 51.0, 46.0, 3...",2490.8,US1COEG0021,39.645,-106.3953,6304.0
2,"[8.0, 3.0, 20.0, 3.0, 5.0, 8.0, 0.0, 0.0, 23.0...",3442.4,USC00051660,39.3672,-106.1897,6824.0
3,"[0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 76.0...",1775.2,US1COGF0001,39.5413,-107.3269,5668.0
4,"[0.0, 0.0, 51.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",3383.3,USS0005K09S,39.65,-105.8667,9432.0


In [82]:
pd_df = pd_df.replace([np.inf, -np.inf], np.nan)
pd_df = pd_df.replace([np.inf, -np.inf], np.nan).dropna(subset=['vecSum'], how="all")
pd_df.head()

Unnamed: 0,vector,elevation,station,latitude,longitude,vecSum
0,"[0.0, 0.0, 0.0, 0.0, 185.0, 51.0, 15.0, 0.0, 0...",2691.4,US1COSU0034,39.6505,-106.0904,4884.0
1,"[0.0, 0.0, 0.0, 41.0, 0.0, 15.0, 51.0, 46.0, 3...",2490.8,US1COEG0021,39.645,-106.3953,6304.0
2,"[8.0, 3.0, 20.0, 3.0, 5.0, 8.0, 0.0, 0.0, 23.0...",3442.4,USC00051660,39.3672,-106.1897,6824.0
3,"[0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 76.0...",1775.2,US1COGF0001,39.5413,-107.3269,5668.0
4,"[0.0, 0.0, 51.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",3383.3,USS0005K09S,39.65,-105.8667,9432.0


## Map

In [83]:
# pd_df['vecSum'].unique()

In [84]:
#define a mapping from the range of the value to hex colors.
from matplotlib.colors import rgb2hex
_avg='vecSum'
_min=pd_df[_avg].min()
_max=pd_df[_avg].max()
_min,_max

import pylab as plt
cmap=plt.get_cmap('jet')
def get_color(val):
    x=(val-_min)/(_max-_min)
    return(rgb2hex(cmap(x)[:3]))

get_color(1000.)

u'#000080'

In [85]:
min_lat,max_lat,min_long,max_long = box = (np.min(pd_df['latitude']), np.max(pd_df['latitude']), np.min(pd_df['longitude']), np.max(pd_df['longitude']))

In [86]:
center = [(min_lat+max_lat)/2, (min_long+max_long)/2]
zoom = 9

m = Map(default_tiles=TileLayer(opacity=1.0), center=center, zoom=zoom)

r = Rectangle(bounds=[[min_lat,min_long],[max_lat,max_long]], weight=5, fill_opacity=0.0)
m += r

lat_margin=(max_lat-min_lat)/4
long_margin=(max_long-min_long)/4
circles = []

pd_df.sort_values(by='vecSum', inplace=True,ascending=False)
count = 0
for index,row in pd_df.iterrows():#pdf
    _lat=row['latitude']
    _long=row['longitude']
    _count=(row['vecSum']/100) #row['count(station)']
    if (_count > 0): #vary
        _coef=5 #row[_avg]
        # taking sqrt of count so that the  area of the circle corresponds to the count
        c = Circle(location=(_lat,_long), radius=int(300*np.sqrt(_count+0.0)), weight=1,
            color='#F00', opacity=0.3, fill_opacity=0.4,
            fill_color=get_color(_coef))
        circles.append(c)
        m.add_layer(c)
        count += 1
        if (count > 50):
            break
m    

### excercises:
* Add a legend that relates the colors to values.
* Leaflet supports a variety of maps. See if you can get a topographical map as the background.

In [None]:
%matplotlib inline
pdf.plot.scatter(x='elevation',y='avg(coeff_1)');

In [43]:
sc.stop()