In [1]:
import pandas as pd
import numpy as np
import bqplot
import yt
import us
import math

In [None]:
from bqplot import DateScale, LinearScale, Axis, Lines, Scatter, Bars, Hist, Figure, LogScale
from bqplot.interacts import FastIntervalSelector

from ipywidgets import VBox, HTML, Dropdown, HBox

### 1. Read Data, set year as index, and aggregate data by year

#### 1)Read ufo dataset, state area dataset and store as DataFrame types

In [None]:
names = ["date", "city", "state", "country", "shape", "duration_seconds",
         "duration_reported", "rdescription", "report_date", "latitude",
         "longitude"]
fn = "/srv/nbgrader/data/ufo-scrubbed-geocoded-time-standardized.csv"
ufo = pd.read_csv(fn, names = names, parse_dates = ["date", "report_date"])

In [None]:
state_info = pd.read_csv("state_info.txt")

#### 2)Include 'fips' into ufo dataset and aggregate state area data into ufo dataset by matching values in fips and GEOID

* We are joining state area and fips into ufo dataset so that our normalization process would be effective and consistant.

In [None]:
abbr_to_fits = us.states.mapping('abbr', 'fips')
ufo["fips"] = ufo["state"].apply(lambda a: int(abbr_to_fits.get(str(a).upper(), -1)))
fips_count = ufo.groupby("fips")["duration_seconds"].count()
total_time_all = ufo.groupby("fips")["duration_seconds"].sum()
ufo['GEOID'] = [int(i) for i in ufo['fips']]

In [None]:
ufo.set_index('GEOID')
ufo = ufo.join(state_info.set_index('GEOID'), on='GEOID', lsuffix='org', rsuffix='new')

#### 3)Set year as the index of ufo dataset and normalize the total sightings as well as total duration per year by states area

In [None]:
ufo['year'] = [i.year for i in ufo['date']]
ufo.set_index('year')

In [None]:
ufo['count'] = [1/i for i in ufo['Area']]
ufo['duration_normal'] = ufo['duration_seconds']/ufo['Area']

In [None]:
total_sightings = ufo.groupby(['state','year'])['count'].sum()
total_time = ufo.groupby(['state','year'])['duration_normal'].sum()

In [None]:
total_sightings_unnormal = ufo.groupby(['state','year'])['count'].count()
total_time_unnormal = ufo.groupby(['state','year'])['duration_seconds'].sum()

In [None]:
total_sightings_year = ufo.groupby('year')
total_sightings_year = total_sightings_year['count'].count()
total_duration_year = ufo.groupby('year')['duration_seconds'].sum()

In [None]:
total_sightings_state1 = ufo.groupby('fips')['count'].sum()
total_duration_state = ufo.groupby('fips')['duration_normal'].sum()

In [None]:
total_sightings_state = {}

In [None]:
for i,j in zip(total_sightings_state1,total_sightings_state1.index):
    total_sightings_state[j] = math.log2(i)

### 2. Create US map and plots of total sightings and total duration in the function of year

In [None]:
import bqplot.pyplot as bqp

In [None]:
import us
state_selected = ''

In [None]:
dt_x_fast = LinearScale()
lin_y = LinearScale()
log_y = LogScale()

x_ax = Axis(label = 'Year', scale = dt_x_fast)
y_ay_S = Axis(scale = lin_y, orientation = 'vertical')
y_ay_D = Axis(scale = log_y, orientation = 'vertical')



#### Get US map data and add id, coordinate, area properties to the dictionary

In [None]:
geom_data = bqplot.topo_load('map_data/USStatesMap.json')
#geom_data = a['objects']['subunits']['geometries']
for i in geom_data['objects']['subunits']['geometries']:
    for row in state_info.itertuples():
        if i['id'] == row.GEOID:
            i['coordinate'] = [row.INTPTLAT,row.INTPTLON]
            i['area'] = row.Area

#### Observe selecting function of the map, and connect the selected state to plots

In [None]:
map_styles = {'scales': {'projection': bqplot.AlbersUSA(),
                         'color': bqplot.ColorScale(colors=["#c7e9b4","#253494"])},
              'color': total_sightings_state}
map_tt = bqplot.Tooltip(fields = ['name','id'], labels = ['State','ID'])
states_map = bqplot.Map(map_data=geom_data,#bqplot.topo_load('map_data/USStatesMap.json'),
                        interactions = {'click':'select','hover':'tooltip'},
                        unslected_styles = {'opacity': 0.4},
                        **map_styles, tooltip = map_tt, display_legend = False, labels=['UFO Sightings'])
fig_map = bqplot.Figure(marks=[states_map], fig_margin = {'top':20,'bottom':30,'left':-105,'right':0})

In [None]:
def observe_selected(change):
    ind = intsel_fast.selected
    if states_map.selected:
        for abbr,fips in abbr_to_fits.items():  
            if fips:
                if int(fips) == states_map.selected[-1]:
                    global state_selected
                    state_selected = abbr.lower()                  
                    if cap_select.value == 'Total Sightings':
                        plot_all.x = total_sightings[state_selected].index.values
                        plot_all.y = total_sightings[state_selected][:]
                        title.value = '<center><font size="+2"><b>Normalized Total %s of UFO Sightings in %s by Year</b></font></center>'%(
                                      sighting_text,state_selected.upper())
                        if not ind is None:
                            if any(ind):
                                tot = total_sightings_unnormal[state_selected].loc[ind[0]:ind[1]].sum()
                                db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                                                int(ind[0]),int(ind[1]),str(tot))
                    
                    if cap_select.value == 'Total Duration Time':
                        plot_all.x = total_time[state_selected].index.values
                        plot_all.y = total_time[state_selected][:]
                        title.value = '<center><font size="+2"><b>Normalized Total %s of UFO Sightings in %s by Year</b></font></center>'%(
                                      time_text,state_selected.upper())
                        if not ind is None:
                            if any(ind):
                                tot = total_time_unnormal[state_selected].loc[ind[0]:ind[1]].sum()
                                db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                                                int(ind[0]),int(ind[1]),str(tot))
    else:
        state_selected = ''
        if cap_select.value == 'Total Sightings':
            plot_all.x = total_sightings_year.index.values
            plot_all.y = total_sightings_year[:]
            title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(sighting_text)
        if cap_select.value == 'Total Duration Time':
            plot_all.x = total_duration_year.index.values
            plot_all.y = total_duration_year[:]
            title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(time_text)
        if not ind is None:
            if any(ind):
                if cap_select.value == 'Total Sightings':
                    tot = total_sightings_year.loc[ind[0]:ind[1]].sum() 
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
                if cap_select.value == 'Total Duration Time':
                    tot = total_duration_year.loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
        
states_map.observe(observe_selected, 'selected')

#### Create plots with Time interval by year, and use callback function to return the selected interval

In [None]:
plot_all = Lines(x=total_sightings_year.index.values,y=total_sightings_year[:],
                 colors = ['orange'],scales={'x':dt_x_fast,'y':lin_y})

intsel_fast = FastIntervalSelector(scale = dt_x_fast, marks = [plot_all])

def fast_interval_change_callback(change):
    ind = change.new
    if any(ind):
        if cap_select.value == 'Total Sightings':
            if not states_map.selected:
                tot = total_sightings_year.loc[ind[0]:ind[1]].sum()                
            else:
                tot = total_sightings_unnormal[state_selected].loc[ind[0]:ind[1]].sum()
            db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                            int(ind[0]),int(ind[1]),str(tot))
        if cap_select.value == 'Total Duration Time':
            if not states_map.selected:
                tot = total_duration_year.loc[ind[0]:ind[1]].sum()
            else:
                tot = total_time_unnormal[state_selected].loc[ind[0]:ind[1]].sum()
            db_fast.value = '<center>The selected time period:<strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                            int(ind[0]),int(ind[1]),str(tot))
intsel_fast.observe(fast_interval_change_callback,names = ['selected'])
db_fast = HTML()
db_fast.value = '<center>The selected time period is <strong>%s</strong></center>'%(str(intsel_fast.selected))

fig_all = Figure(marks=[plot_all],axes = [x_ax, y_ay_S],interaction=intsel_fast,max_aspect_ratio=1.2)

In [None]:
sighting_text = 'Number'
time_text = 'Duration Time'
title = HTML()
title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(sighting_text)

### 3. Add dropdown widget and connect US map and plots by using callback function

In [None]:
cap_select = Dropdown(options=['Total Sightings','Total Duration Time'],description='Caption:')
def onCaptionSelected(change):
    cap = change['new']
    #print(state_selected)
    ind = intsel_fast.selected
    if cap == 'Total Sightings':
        states_map.color = total_sightings_state#fips_count.to_dict()        
        if not states_map.selected:
            plot_all.x = total_sightings_year.index.values
            plot_all.y = total_sightings_year[:]
            title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(sighting_text)
            if not ind is None:
                if any(ind):
                    tot = total_sightings_year.loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
            
        else:
            plot_all.x = total_sightings[state_selected].index.values
            plot_all.y = total_sightings[state_selected][:]
            title.value = '<center><font size="+2"><b>Normalized Total %s of UFO Sightings in %s by Year</b></font></center>'%(
                        sighting_text,state_selected.upper())
            if not ind is None:
                if any(ind):
                    tot = total_sightings_unnormal[state_selected].loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
            
    if cap == 'Total Duration Time':
        states_map.color = total_duration_state.to_dict()
        title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(time_text)
        if not states_map.selected:
            plot_all.x = total_duration_year.index.values
            plot_all.y = total_duration_year[:]
            title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(time_text)
            if not ind is None:
                if any(ind):
                    tot = total_duration_year.loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
        else:
            plot_all.x = total_time[state_selected].index.values
            plot_all.y = total_time[state_selected][:]
            title.value = '<center><font size="+2"><b>Normalized Total %s of UFO Sightings in %s by Year</b></font></center>'%(
                        time_text,state_selected.upper())
            if not ind is None:
                if any(ind):
                    tot = total_time_unnormal[state_selected].loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
        
cap_select.observe(onCaptionSelected,'value') 

### 4. Embed all the widgets and figures into an H-box

In [None]:
VBox([title,
      HBox([
          VBox([cap_select,fig_map], layout = {'width': '80%','height':'95%'}),
          VBox([db_fast,fig_all],layout = {'width': '100%','height':'95%'})])])