## iPyLeaflet
[ipyleaflet](https://github.com/ellisonbg/ipyleaflet) is a bridge between jupyter notebooks and the [leaflet](http://leafletjs.com/)  javascript library for drawing maps.

ipyleaflet comes with a few examples notebooks (this notebook was derived from one) but very little documentation,
for more documentation read the [Leaflet IPA](http://leafletjs.com/reference.html)

For installation directions, see the README on [ipyleaflet](https://github.com/ellisonbg/ipyleaflet)

In [11]:
from ipyleaflet import (
    Map,
    Marker,
    TileLayer, ImageOverlay,
    Polyline, Polygon, Rectangle, Circle, CircleMarker,
    GeoJSON,
    DrawControl
)

## Visualizing the distribution of the observations

## Load the required libraries

In [12]:
import pandas as pd
import numpy as np
import sklearn as sk
import urllib
import math

In [13]:
import findspark
findspark.init()

from pyspark import SparkContext
#sc.stop()
sc = SparkContext(master="local[3]",pyFiles=['lib/numpy_pack.py','lib/computeStats.py'])

from pyspark import SparkContext
from pyspark.sql import *
sqlContext = SQLContext(sc)

ValueError: Cannot run multiple SparkContexts at once; existing SparkContext(app=pyspark-shell, master=local[3]) created by __init__ at <ipython-input-3-9a9c5a5dabc7>:6 

In [14]:
import sys
sys.path.append('./lib')

import numpy as np
from numpy_pack import packArray,unpackArray
from computeStats import computeOverAllDist, STAT_Descriptions

In [5]:
### Read the data frame from pickle file

data_dir='../../Data/Weather'
file_index='BSBSSSBS'
meas='TMIN'

from pickle import load

#read statistics
filename=data_dir+'/STAT_%s.pickle'%file_index
STAT,STAT_Descriptions = load(open(filename,'rb'))
print('keys from STAT=',STAT.keys())

#!ls -ld $data_dir/*.parquet

#read data
filename=data_dir+'/decon_%s_%s.parquet'%(file_index,meas)

df=sqlContext.read.parquet(filename)
print(df.count())
df.show(2)

('keys from STAT=', ['TMIN', 'TOBS', 'TMAX', 'SNOW', 'SNWD', 'PRCP'])
2146
+-------------------+------------------+-------------------+---------+--------+--------+---------+-----------+-------------------+-------------------+-------------------+-------------------+-----------+-----------+------+--------------------+------+
|            coeff_1|           coeff_2|            coeff_3|elevation|   label|latitude|longitude|measurement|              res_1|              res_2|              res_3|           res_mean|    station|  total_var|undefs|              vector|  year|
+-------------------+------------------+-------------------+---------+--------+--------+---------+-----------+-------------------+-------------------+-------------------+-------------------+-----------+-----------+------+--------------------+------+
|-1279.6068065826728| 95.88417930683788|-37.159380121393674|      3.0|BSBSSSBS| 24.6278| -82.8736|       TMIN|0.07562723612869544|0.07069061709076589|0.06887640687012085|0.099

In [6]:
#extract longitude and latitude for each station
feature='coeff_1'
sqlContext.registerDataFrameAsTable(df,'weather')
Query="SELECT station, latitude,longitude,elevation,%s FROM weather"%feature
print(Query)
df1 = sqlContext.sql(Query)
df1.show(4)

SELECT station, latitude,longitude,elevation,coeff_1 FROM weather
+-----------+--------+---------+---------+-------------------+
|    station|latitude|longitude|elevation|            coeff_1|
+-----------+--------+---------+---------+-------------------+
|USC00082418| 24.6278| -82.8736|      3.0|-1279.6068065826728|
|USC00082418| 24.6278| -82.8736|      3.0|-1243.6190900424936|
|USC00082418| 24.6278| -82.8736|      3.0|-1220.3240905813605|
|USC00082418| 24.6278| -82.8736|      3.0|-1153.6304822642571|
+-----------+--------+---------+---------+-------------------+
only showing top 4 rows



In [7]:
df2=df1.groupby(['station','latitude','longitude','elevation']).agg({"station": "count", feature: "mean"})
pdf=df2.toPandas()
pdf.sort_values(by=['station'],inplace=True)
pdf.head(2)

Unnamed: 0,station,latitude,longitude,elevation,count(station),avg(coeff_1)
49,USC00080535,28.55,-82.6333,14.9,8,141.200022
21,USC00080598,29.7956,-82.9178,9.1,11,665.053243


In [8]:
#define a mapping from the range of the value to hex colors.
from matplotlib.colors import rgb2hex
_avg='avg(%s)'%feature
_min=pdf[_avg].min()
_max=pdf[_avg].max()
_min,_max

import pylab as plt
cmap=plt.get_cmap('jet')
def get_color(val):
    x=(val-_min)/(_max-_min)
    return(rgb2hex(cmap(x)[:3]))

get_color(1000.)

u'#800000'

## Map

In [None]:
#compute 
print pdf.min()
print pdf.max()
print "hey"

In [None]:
min_lat,max_lat,min_long,max_long = box = (42.1103, 42.6167, -72.6, -70.8)
print "hey"

In [10]:
center = [(min_lat+max_lat)/2, (min_long+max_long)/2]
zoom = 9

m = Map(default_tiles=TileLayer(opacity=1.0), center=center, zoom=zoom)

r = Rectangle(bounds=[[min_lat,min_long],[max_lat,max_long]], weight=5, fill_opacity=0.0)
m += r

lat_margin=(max_lat-min_lat)/4
long_margin=(max_long-min_long)/4
circles = []
for index,row in pdf.iterrows():
    _lat=row['latitude']
    _long=row['longitude']
    _count=row['count(station)']
    _coef=row[_avg]
    # taking sqrt of count so that the  area of the circle corresponds to the count
    c = Circle(location=(_lat,_long), radius=int(300*np.sqrt(_count+0.0)), weight=1,
            color='#F00', opacity=0.8, fill_opacity=0.4,
            fill_color=get_color(_coef))
    circles.append(c)
    m.add_layer(c)
m    

In [27]:
stations = [u'USC00081163', u'USC00081310', u'USC00081432', u'USC00081632', u'USC00081635',u'USW00012833', u'USW00012842', u'USW00012871', u'USW00012873', u'USW00092802', u'USW00092806']
tuples = []
for sta in stations:
    temp_row =  pdf.loc[pdf['station'] == sta]
    temp_row = temp_row.values[0]
    #print temp_row
    lat = temp_row[1]
    lon = temp_row[2]
    cou = temp_row[4]
    tuples.append((lat,lon,cou))
circles_my = []
for (lat,lon,cou) in tuples:
    _lat=lat
    _long=lon
    _count=cou
    _coef=row[_avg]
    # taking sqrt of count so that the  area of the circle corresponds to the count
    c = Circle(location=(_lat,_long), radius=int(300*np.sqrt(_count+0.0)), weight=1,
            color='#0C0', opacity=0.8, fill_opacity=0.4,
            fill_color=get_color(_coef))
    circles_my.append(c)
    m.add_layer(c)
print tuples
m
    

[(28.6664, -82.0894, 55), (26.5333, -82.1833, 26), (29.1333, -83.05, 63), (27.9667, -82.7667, 38), (27.9833, -82.8333, 5), (29.6333, -83.1053, 20), (27.9619, -82.5403, 74), (27.4014, -82.5586, 13), (27.9106, -82.6875, 14), (28.1878, -82.6258, 2), (27.7628, -82.6261, 100)]


### excercises:
* Add a legend that relates the colors to values.
* Leaflet supports a variety of maps. See if you can get a topographical map as the background.

In [None]:
%matplotlib inline
pdf.plot.scatter(x='elevation',y='avg(coeff_1)');

In [None]:
RDD1=sc.parallelize(["spark  basics", "spark big  data analysis", "spring"]) 
RDD2=sc.parallelize(["spark using pyspark", "big data"])
 
RDD1.subtract(RDD2).collect()