## Visualizing the distribution of the observations
This notebook visualize the locations on map of the samples with minimum/maximum eigenvector coefficients (1-3) for SNWD.

In [3]:
from ipyleaflet import (
    Map,
    Marker,
    TileLayer, ImageOverlay,
    Polyline, Polygon, Rectangle, Circle, CircleMarker,
    GeoJSON,
    DrawControl
)

In [4]:
import pandas as pd
import numpy as np
import sklearn as sk
import urllib
import math
import pylab as plt

import sys
sys.path.append('./lib')

from leaflet import *

In [5]:
data_dir = "../../Data/Weather/"
from pickle import load
with open(data_dir+'/SNWD_min_coeffs.pickle','rb') as file:
    rows_min_coeff=load(file)
with open(data_dir+'/SNWD_max_coeffs.pickle','rb') as file:
    rows_max_coeff=load(file)

In [6]:
tables = []
for r in rows_min_coeff:
    tables.append(pd.DataFrame(r))
for r in rows_max_coeff:
    tables.append(pd.DataFrame(r))
tables_all = pd.concat(tables)

In [7]:
import pylab as plt
prop_cycle = plt.rcParams['axes.prop_cycle']
#colors = prop_cycle.by_key()['color']
colors = ['#ff0000','#00ff00','#0000ff','#ffff00','#ff00ff','#00ffff']

In [9]:
tables_all.head(5)

Unnamed: 0,coeff_1,coeff_2,coeff_3,coeff_4,elevation,label,latitude,longitude,measurement,res_1,res_2,res_3,res_4,res_mean,station,total_var,undefs,vector,year
0,-11887.116725,1502.126642,697.165599,1580.674783,1931.5,SSSBSBBB,39.4317,-120.2406,SNWD,0.072693,0.069382,0.070016,0.053894,0.767752,USC00047641,229185980.0,15,"[39, 102, 90, 102, 90, 102, 64, 102, 39, 102, ...",1971.0
1,-11749.309543,3316.269866,876.399808,595.44361,1774.9,SSSBSBBB,39.3331,-120.173,SNWD,0.104933,0.035429,0.03151,0.03054,0.752206,USC00049043,208145907.0,1,"[36, 99, 36, 99, 36, 99, 36, 99, 36, 99, 242, ...",1983.0
2,-11293.442597,821.563071,467.251791,-277.475643,1641.3,SSSBSBBB,39.4539,-120.6556,SNWD,0.065328,0.074726,0.072879,0.076441,0.750994,USC00041018,204956666.0,18,"[0, 126, 0, 126, 0, 126, 191, 102, 166, 102, 9...",1971.0
3,-11169.757828,1156.928952,1354.83235,-1192.776627,1931.5,SSSBSBBB,39.4317,-120.2406,SNWD,0.065948,0.058526,0.049723,0.040403,0.736591,USC00047641,190225408.0,5,"[36, 95, 36, 95, 246, 96, 246, 96, 246, 96, 24...",1973.0
0,-7000.172104,-3954.403368,-303.063949,-17.304986,1902.9,SSSBSBBB,39.2,-120.2333,SNWD,0.370864,0.170099,0.16892,0.168916,0.692259,USC00048474,112513482.0,0,"[119, 100, 94, 100, 43, 100, 43, 100, 17, 100,...",1972.0


In [43]:
# showing all 24 samples(2(min,max) x 3 coeffs x 4 samples) on map
import numpy as np

scope_max=tables_all.max()
scope_min=tables_all.min()
min_lat,max_lat,min_long,max_long = box = (scope_min['latitude'], scope_max['latitude'], scope_min['longitude'], scope_max['longitude'])
center = [(min_lat+max_lat)/2, (min_long+max_long)/2]
zoom = 10
lat_margin=(max_lat-min_lat)/4
long_margin=(max_long-min_long)/4
m = Map(default_tiles=TileLayer(opacity=0.6), center=center, zoom=zoom)
r = Rectangle(bounds=[[min_lat,min_long],[max_lat,max_long]], weight=5, fill_opacity=0.0)
m += r

for i,table in enumerate(tables):
    color=colors[i]
    for j,row in table.iterrows():
        _lat=row['latitude']
        _long=row['longitude']
        res = row['res_3']
        offset = np.random.random(2) * 0.01
        c = Circle(location=(_lat+offset[0],_long+offset[1]), radius=int(1000*(1.0-res)), weight=1,
            color=color, opacity=0.3, fill_opacity=0.6, fill_color=color)
        m += c
m    

In [44]:
#sort the 24 samples by year to see if there's case that multiple samples in same year
tables_all[['station','elevation','year']].sort_values(by=['year']).set_index(['year'])

Unnamed: 0_level_0,station,elevation
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1958.0,USC00047641,1931.5
1963.0,USC00048332,2098.5
1966.0,USW00023225,1608.1
1967.0,USC00048758,1898.9
1968.0,USC00048474,1902.9
1969.0,USC00040931,1699.3
1970.0,USC00048474,1902.9
1970.0,USC00047641,1931.5
1970.0,USC00042467,1809.6
1971.0,USC00049043,1774.9


In [49]:
# from above, we see year 1973, 1975 have 3 samples each
# checkout the 3 samples of year 1975 first
tables_all[tables_all.year==1975][['station','elevation','year']]

Unnamed: 0,station,elevation,year
0,USC00049043,1774.9,1975.0
2,USC00042467,1809.6,1975.0
3,USW00023225,1608.1,1975.0


In [46]:
# plot on map the 3 samples of year '1975' which shows same SNWD pattern(with same color)
m = Map(default_tiles=TileLayer(opacity=0.6), center=center, zoom=zoom)
r = Rectangle(bounds=[[min_lat,min_long],[max_lat,max_long]], weight=5, fill_opacity=0.0)
m += r

for i,table in enumerate(tables):
    color=colors[i]
    for j,row in table.iterrows():
        if int(row['year']) != 1975:
            continue
        print row['latitude'],row['longitude'],row['elevation']
        _lat=row['latitude']
        _long=row['longitude']
        res = row['res_3']
        offset = np.random.random(2) * 0.01
        c = Circle(location=(_lat+offset[0],_long+offset[1]), radius=int(1000*(1.0-res)), weight=1,
            color=color, opacity=0.3, fill_opacity=0.6, fill_color=color)
        m += c
m    

39.3331 -120.173 1774.9
39.3239 -120.2331 1809.6
39.2775 -120.7103 1608.1


In [48]:
# print 3 samples of year 1973
tables_all[tables_all.year==1973][['station','elevation','year']]

Unnamed: 0,station,elevation,year
3,USC00047641,1931.5,1973.0
1,USC00042338,1357.9,1973.0
3,USC00048758,1898.9,1973.0


In [47]:
# plot on map the 3 samples of year '1973' which shows different patterns(with different color).
m = Map(default_tiles=TileLayer(opacity=0.6), center=center, zoom=zoom)
r = Rectangle(bounds=[[min_lat,min_long],[max_lat,max_long]], weight=5, fill_opacity=0.0)
m += r

for i,table in enumerate(tables):
    color=colors[i]
    for j,row in table.iterrows():
        if int(row['year']) != 1973:
            continue
        print row['latitude'],row['longitude'],row['elevation']
        _lat=row['latitude']
        _long=row['longitude']
        res = row['res_3']
        offset = np.random.random(2) * 0.01
        c = Circle(location=(_lat+offset[0],_long+offset[1]), radius=int(1000*(1.0-res)), weight=1,
            color=color, opacity=0.3, fill_opacity=0.6, fill_color=color)
        m += c
m    

39.4317 -120.2406 1931.5
39.3 -120.8333 1357.9
39.1678 -120.1428 1898.9
