In [1]:
%matplotlib notebook
import pandas as pd
import bqplot 
import us
from bqplot import (
    LogScale, LinearScale, OrdinalColorScale, ColorAxis,
    Axis,DateScale, Scatter, Lines, CATEGORY10, Label, Figure, Tooltip
)
from bqplot import pyplot as plt
from bqplot.interacts import (
    FastIntervalSelector, IndexSelector, BrushIntervalSelector,
    BrushSelector, MultiSelector, LassoSelector, PanZoom, HandDraw
)
from ipywidgets import interact, HBox, VBox, Dropdown, HTML, Checkbox

from IPython.display import display
names = ["date", "city", "state", "country", "shape", "duration_seconds",
         "duration_reported", "description", "report_date", "latitude",
         "longitude"]

#fn = "/srv/nbgrader/data/ufo-scrubbed-geocoded-time-standardized.csv"
fn = "/ufo-scrubbed-geocoded-time-standardized.csv"
ufo = pd.read_csv(fn, names = names, parse_dates = ["date", "report_date"])

# Uploading the dataset for normalization
area_dataset = "/area_dataset.csv"
columns=['fips','STATE','land_area']
land_area = pd.read_csv(area_dataset, names = columns)

In [2]:
abbr_to_fits = us.states.mapping('abbr', 'fips')
ufo["fips"] = ufo["state"].apply(lambda a: int(abbr_to_fits.get(str(a).upper(), -1)))

In [3]:
#Merging it with the ufo dataset
ufo = ufo.merge(land_area,on='fips',how='inner')

#data cleaning - more adddition needed
ufo.dropna()
ufo = ufo[ufo['fips']!=-1 ]
#removing washington dc 
ufo = ufo[ufo['fips']!=11]
#Adding year as a part of the dataset
ufo['year']=ufo['date'].dt.year
ufo['duration_hours'] = (ufo['duration_seconds']/3600)

In [4]:
#Normalizing the land area array by dividing it by the minimul value in the array
ufo['land_area']= ufo['land_area']/ufo['land_area'].min()

In [5]:
#Defining a new drop down
new_dropdown = Dropdown(value=1, options={'Total sightings': 1, 'Total time': 2})
#Defining a check box
normalize= Checkbox(value=False,description='Normalize',disabled=False)

In [6]:
#definitions related to the map
map_styles = {'scales': {'projection': bqplot.AlbersUSA(),
                         'color': bqplot.ColorScale(colors=["AliceBlue","BlueViolet"])}}

tt = Tooltip(fields=['id', 'name', 'color'], labels=['FIPS code', 'State', 'Total number of sightings'])
states_map = bqplot.Map(map_data=bqplot.topo_load('map_data/USStatesMap.json'),**map_styles,tooltip=tt,colors:{'default'}
        selected_styles= {'selected_fill': 'Black', 'selected_stroke': 'White', 'selected_stroke_width': 4.0},
        unselected_styles={'unselected_fill':None},colors={'default_color':'#eef3fe'},
        hovered_styles= {'hovered_fill': 'White', 'hovered_stroke': 'Black', 'hovered_stroke_width':4.0})
color_scale =bqplot.ColorScale(colors=["AliceBlue","BlueViolet"])
axis = ColorAxis(scale=color_scale,tick_style={'font-size': '25px','font-weight':'bold','font-family':'serif'})
map_fig = bqplot.Figure(marks=[states_map],title_style={'font-size': '20px','font-weight':'bold','font-family':'serif'},
                        title='UFO sightings in the United States',
                        axes=[axis],background_style={'fill':'White'})
map_fig.fig_margin={'top':0,'bottom':0,'left':0,'right':0}
#Defining interations for the map
states_map.interactions = {'click': "select", 'hover': 'tooltip'}

In [7]:
#defining line plot
x_sc = LinearScale()
y_sc = LinearScale()
scatter_chart = Lines(scales= {'x': x_sc, 'y': y_sc},colors=['#984ea3'],labels_visibility='label',marker='circle')
ax_x = Axis(scale=x_sc,label='Year',label_color='#939092',label_offset='35px',grid_color='White',tick_style={'font-size': '15px','font-weight':'bold','font-family':'serif'})
ax_y = Axis(scale=y_sc, orientation='vertical',label_color='#939092',label_offset='35px',grid_color='White',tick_style={'font-size': '15px','font-weight':'bold','font-family':'serif'})
#Interactivity for the plot
intsel_fast = FastIntervalSelector(scale=x_sc,color="black")

scatter_figure = Figure(axes=[ax_x, ax_y], marks=[scatter_chart],interaction=intsel_fast,title_style={'font-size': '15px','font-weight':'bold','font-family':'serif'},
                        label_location='middle',background_style={'fill':'#f1ecce'})#b7d8b6

In [8]:
#HTML for displaying the selected years
db_fast = HTML('<h1 style="color:black;font-family:serif;font-size:160%;font-weight:bold;">Years : 1910 to 2015</h1>')
db_fast.layout={'width':'100%','margin':'1px 3px 0px 0px','height':'10%','align_items':'stretch'}

In [9]:
# Function to update the plot everytime a new state is selected. This function is also reused for plotting normalized values.
def update_plot(fips,normalize):
    df_count = ufo[ufo['fips']==fips]
    land_area = df_count['land_area'].iloc[0]
               
    if new_dropdown.get_interact_value() == 1:
        df_count = df_count.groupby("year",as_index=False)['duration_hours'].count()
        if normalize == True:
            normalized_values = df_count['duration_hours']/land_area
            scatter_chart.y = normalized_values
        else:
            scatter_chart.y = df_count['duration_hours']
    else:
        df_count = df_count.groupby("year",as_index=False)['duration_hours'].sum()
        if normalize == True:
            normalized_values = df_count['duration_hours']/land_area
            scatter_chart.y = normalized_values
        else:
            scatter_chart.y = df_count['duration_hours']

    scatter_chart.set_trait("x",df_count['year'])
    df_state = ufo[ufo['fips'] == fips]
    #setting the title of the scatter plot everytime a different state is selected
    state = us.states.lookup(df_state.iloc[0].state.upper())
    scatter_figure.set_trait('title',"UFO sightings in {} from 1910-2010".format(state.name))

In [10]:
def get_fips_count(change,ufo,normalize):
    group = perform_filtering(ufo,change)
    fips_count = check_normalize(normalize,group['duration_hours'],group['fips'],group['land_area'])
    update_mapstyles(fips_count)    
    #Updating the plot
    if states_map.selected:
        update_plot(states_map.selected[0],normalize)


def perform_filtering(ufo,change):
    if change == 1:
        group = ufo.groupby(['fips','land_area'],as_index=False).count()
        map_fig.set_trait('title','Total UFO sightings in the United States')
        #setting the property of the plot according to the selected criteria
        ax_y.set_trait('label','Total Number of sightings')
        #setting the label of the tooltip according to the selected combobox value
        tt.set_trait('labels',['FIPS code', 'State', 'Total number of sightings'])
    else:
        group = ufo.groupby(['fips','land_area'],as_index=False).sum().astype(int)
        map_fig.set_trait('title','Total duration of UFO sightings in the United States')
        #setting the property of the scatter plot according to the selected criteria
        ax_y.set_trait('label','Total Duration of Sightings in Hours')
        #setting the label of the tooltip according to the selected combobox value
        tt.set_trait('labels',['FIPS code', 'State', 'Total duration in hours'])
    return group
        
#Check if the data has to be normalized    
def check_normalize(normalize,duration,fips,land_area):
        if normalize == True:
            normalized= normalize_dataset(duration,land_area)
            fips_count = zip(fips,normalized.astype(int))
            set_color_scale(normalized)
        else:
            fips_count = zip(fips,duration.astype(int))
            set_color_scale(duration)
        return fips_count

#set the range of the color scale
def set_color_scale(duration_data):
    color_scale.set_trait('max',float(duration_data.max()))
    color_scale.set_trait('min',float(duration_data.min()))

#Normalize the dataset by dividing it by land area
def normalize_dataset(duration,land_area):
    normalized_set = (duration.astype(float)/land_area.astype(float))
    return normalized_set

#call back function when the drop down is changed
def watch_dropdown(change):
    intsel_fast.selected = None
    states_map.selected = None
    db_fast.value = '<h1 style="color:black;font-family:serif;font-size:160%;font-weight:bold;">Years:1910 to 2015</h1>'
    update_plot(17,normalize.get_interact_value())
    return get_fips_count(change.new,ufo,normalize.get_interact_value())

#Upsate the map by passing the color property
def update_mapstyles(fips_count):
    states_map.set_trait('color',dict(fips_count))

new_dropdown.observe(watch_dropdown, names=['value'])

#setting a default value for the plot without normalizing
get_fips_count(1,ufo,normalize.get_interact_value())


In [11]:
#Call back function when the selected index changes
def selected_index_changed(change):
    #Making sure that the previous state is deselected
    scatter_figure.visible=True
    if change.old is None and change.new is not []:
        states_map.set_trait("selected",change.new)
        update_plot(change.new,normalize.get_interact_value())
    elif len(change.old) == 0 or change.new is None or change.new == []:
        states_map.set_trait("selected",None)
    else:
        states_map.set_trait("selected",[change.new[len(change.new)-1]])
        update_plot(change.new[len(change.new)-1],normalize.get_interact_value())

#by default show the sightings for illinois
states_map.observe(selected_index_changed, names=['selected'])
update_plot(fips=17,normalize=False)

In [12]:
#HTML for showing the selected years
db_fast = HTML('<h1 style="color:black;font-family:serif;font-size:160%;font-weight:bold;">Years : 1910 to 2015</h1>')
db_fast.layout={'width':'100%','margin':'1px 3px 0px 0px','height':'10%','align_items':'stretch'}

In [13]:
#Call back function of the fast interval changer
def fast_interval_change_callback(change):
    if change.new is not None and len(change.new) == 2:
        states_map.selected = None
        df_selected_years = update_ufo_dataset(int(change.new[0]),int(change.new[1]))
        get_fips_count(new_dropdown.get_interact_value(),df_selected_years,normalize.get_interact_value())
        db_fast.value = '<h1 style="color:black;font-family:serif;font-size:160%;font-weight:bold;">Years: '+str(int(change.new[0]))+ ' to ' +str(int(change.new[1]))+'</h1>'
    
def update_ufo_dataset(limit_a,limit_b):
    df_selected_years=ufo.loc[(ufo["year"] >= int(limit_a))& (ufo["year"] <= int(limit_b)),]
    return df_selected_years
#observe method for keeping track of the changes in the selected intervals
intsel_fast.observe(fast_interval_change_callback, names=['selected'])


In [14]:
#Call back function on click of the normalize button
def normalize_data(change):
    states_map.selected = None
    selected_years =intsel_fast.selected
    if selected_years is not None:
        if selected_years.any() is not None and len(selected_years) == 2:
            updated_dataset = update_ufo_dataset(int(selected_years[0]),int(selected_years[1]))
        else:
            updated_dataset = ufo
    else:
        updated_dataset = ufo
    get_fips_count(new_dropdown.get_interact_value(),updated_dataset,change.new)
            
normalize.observe(normalize_data, names=['value'])


In [15]:
VBox([HBox([new_dropdown,HBox([normalize])]),HBox([map_fig,HBox([db_fast])],layout={'width':'100%'}),scatter_figure])

A Jupyter Widget