## iPyLeaflet
[ipyleaflet](https://github.com/ellisonbg/ipyleaflet) is a bridge between jupyter notebooks and the [leaflet](http://leafletjs.com/)  javascript library for drawing maps.

ipyleaflet comes with a few examples notebooks (this notebook was derived from one) but very little documentation,
for more documentation read the [Leaflet IPA](http://leafletjs.com/reference.html)

For installation directions, see the README on [ipyleaflet](https://github.com/ellisonbg/ipyleaflet)

In [1]:
from ipyleaflet import (
    Map,
    Marker,
    TileLayer, ImageOverlay,
    Polyline, Polygon, Rectangle, Circle, CircleMarker,
    GeoJSON,
    DrawControl
)

## Visualizing the distribution of the observations

## Load the required libraries

In [2]:
import pandas as pd
import numpy as np
import sklearn as sk
import urllib
import math

In [3]:
import findspark
findspark.init()

from pyspark import SparkContext
#sc.stop()
sc = SparkContext(master="local[3]",pyFiles=['lib/numpy_pack.py','lib/computeStats.py'])

from pyspark import SparkContext
from pyspark.sql import *
sqlContext = SQLContext(sc)

In [4]:
import sys
sys.path.append('./lib')

import numpy as np
from numpy_pack import packArray,unpackArray
from computeStats import computeOverAllDist, STAT_Descriptions

In [42]:
### Read the data frame from pickle file

data_dir='../../Data/Weather'
file_index='BSBSSSBB'#BBBSBBBB'
meas='TOBS'# 'SNWD'

from pickle import load

#read statistics
filename=data_dir+'/STAT_%s.pickle'%file_index
STAT,STAT_Descriptions = load(open(filename,'rb'))
print('keys from STAT=',STAT.keys())

#!ls -ld $data_dir/*.parquet

#read data
filename=data_dir+'/decon_%s_%s.parquet'%(file_index,meas)

df=sqlContext.read.parquet(filename)
print(df.count())
df.show(2)

('keys from STAT=', ['TMIN', 'TOBS', 'TMAX', 'SNOW', 'SNWD', 'PRCP'])


AnalysisException: u'Path does not exist: file:/CSE255-DSE230/Data/Weather/decon_BSBSSSBB_TOBS.parquet;'

In [30]:
sqlContext.registerDataFrameAsTable(df,'weather')

In [31]:
# Plot the high and low snow density areas

Query="SELECT elevation, vector, station, latitude, longitude FROM weather\n\t"
print Query
df1 = sqlContext.sql(Query)
print df1.count(),'rows'
df1.show(5)
rows=df1.rdd.map(lambda row:[unpackArray(row['vector'],np.float16), row['elevation'],row['station'],row['latitude'],row['longitude']]).collect()
print (len(rows), len(rows[0]))

SELECT elevation, vector, station, latitude, longitude FROM weather
	
109 rows
+---------+--------------------+-----------+--------+---------+
|elevation|              vector|    station|latitude|longitude|
+---------+--------------------+-----------+--------+---------+
|     44.2|[00 00 00 00 00 0...|USC00099186| 31.2514| -82.3128|
|     72.5|[00 00 00 7E 00 0...|USC00096237| 31.1333| -83.2167|
|     80.8|[00 00 00 00 00 0...|USC00098974| 30.8056| -83.2736|
|     44.8|[00 00 00 00 00 0...|USC00084394| 30.5228| -82.9447|
|     79.2|[00 00 00 00 00 0...|USC00098351| 32.4414| -82.2181|
+---------+--------------------+-----------+--------+---------+
only showing top 5 rows

(109, 5)


In [32]:
pd_df = pd.DataFrame(rows)
pd_df.columns = ['vector','elevation','station','latitude','longitude']
pd_df.head()

Unnamed: 0,vector,elevation,station,latitude,longitude
0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",44.2,USC00099186,31.2514,-82.3128
1,"[0.0, nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",72.5,USC00096237,31.1333,-83.2167
2,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",80.8,USC00098974,30.8056,-83.2736
3,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",44.8,USC00084394,30.5228,-82.9447
4,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",79.2,USC00098351,32.4414,-82.2181


In [33]:
new_col = [np.nansum(x) for x in pd_df['vector']]
len(new_col)

109

In [38]:
pd_df['vecSum'] = new_col

In [39]:
pd_df.head()

Unnamed: 0,vector,elevation,station,latitude,longitude,vecSum
61,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",70.1,USC00092839,32.5575,-82.9036,51.0
46,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",97.5,USC00098496,32.5806,-82.3822,51.0
28,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",53.3,USC00095858,32.8089,-82.2372,51.0
20,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",32.0,USC00096838,31.3781,-82.1292,51.0
75,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",61.0,USC00098476,31.7078,-82.1969,51.0


## Map

In [40]:
pd_df['vecSum'].unique()

array([  51.,   25.,   64.,   13.,   50.,   38.,   89.,   92.,   21.,
        152.,  110.,   35.,    8.,   18.,   56.,   81.,  179.,  102.,
         69.,   33.,   63.,   99.,   40.,   87.,  191.,   75.,   41.,
         30.,   82.,   46.,   79.,   76.,  101.,  355.,   26.,   84.,
         53.,  127.,  114.,  119.,   58.,   23.,    5.,    3.,  112.,
         20.,   45.,   71.])

In [18]:
#define a mapping from the range of the value to hex colors.
from matplotlib.colors import rgb2hex
_avg='vecSum'
_min=pd_df[_avg].min()
_max=pd_df[_avg].max()
_min,_max

import pylab as plt
cmap=plt.get_cmap('jet')
def get_color(val):
    x=(val-_min)/(_max-_min)
    return(rgb2hex(cmap(x)[:3]))

get_color(1000.)

u'#000080'

In [16]:
min_lat,max_lat,min_long,max_long = box = (np.min(pd_df['latitude']), np.max(pd_df['latitude']), np.min(pd_df['longitude']), np.max(pd_df['longitude']))

In [41]:
center = [(min_lat+max_lat)/2, (min_long+max_long)/2]
zoom = 9

m = Map(default_tiles=TileLayer(opacity=1.0), center=center, zoom=zoom)

r = Rectangle(bounds=[[min_lat,min_long],[max_lat,max_long]], weight=5, fill_opacity=0.0)
m += r

lat_margin=(max_lat-min_lat)/4
long_margin=(max_long-min_long)/4
circles = []

pd_df.sort_values(by='vecSum', inplace=True,ascending=False)
count = 0
for index,row in pd_df.iterrows():#pdf
    _lat=row['latitude']
    _long=row['longitude']
    _count=(row['vecSum']/100) #row['count(station)']
    if (_count > 0): #vary
        _coef=5 #row[_avg]
        # taking sqrt of count so that the  area of the circle corresponds to the count
        c = Circle(location=(_lat,_long), radius=int(300*np.sqrt(_count+0.0)), weight=1,
            color='#F00', opacity=0.8, fill_opacity=0.4,
            fill_color=get_color(_coef))
        circles.append(c)
        m.add_layer(c)
        count += 1
        if (count > 50):
            break
m    

### excercises:
* Add a legend that relates the colors to values.
* Leaflet supports a variety of maps. See if you can get a topographical map as the background.

In [None]:
%matplotlib inline
pdf.plot.scatter(x='elevation',y='avg(coeff_1)');

In [43]:
sc.stop()