# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Aim" data-toc-modified-id="Aim-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Aim</a></div><div class="lev1 toc-item"><a href="#Create-the-network-&quot;edges&quot;-----pairs-of-aiports-forming-the-trips" data-toc-modified-id="Create-the-network-&quot;edges&quot;-----pairs-of-aiports-forming-the-trips-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Create the network "edges" --- pairs of aiports forming the trips</a></div><div class="lev2 toc-item"><a href="#Data-tidying" data-toc-modified-id="Data-tidying-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Data tidying</a></div><div class="lev2 toc-item"><a href="#Most-frequent-&quot;trips&quot;-in-US-during-the-past-year" data-toc-modified-id="Most-frequent-&quot;trips&quot;-in-US-during-the-past-year-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Most frequent "trips" in US during the past year</a></div><div class="lev1 toc-item"><a href="#Create-undirected-graph----edges-ignoring-directionality" data-toc-modified-id="Create-undirected-graph----edges-ignoring-directionality-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Create undirected graph -- edges ignoring directionality</a></div><div class="lev2 toc-item"><a href="#Data-tidying" data-toc-modified-id="Data-tidying-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Data tidying</a></div><div class="lev2 toc-item"><a href="#Most-frequent-&quot;routes&quot;-in-US-during-the-past-year" data-toc-modified-id="Most-frequent-&quot;routes&quot;-in-US-during-the-past-year-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Most frequent "routes" in US during the past year</a></div><div class="lev1 toc-item"><a href="#Airport-centrality-analysis-with-complex-network-theory-tools" data-toc-modified-id="Airport-centrality-analysis-with-complex-network-theory-tools-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Airport centrality analysis with complex network theory tools</a></div>

# Aim


In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn.apionly as sns
import plotly
import plotly.plotly as py
import calendar

# libraries for network analysis
import networkx as nx
import bct

from datetime import datetime
from pprint import pprint
from IPython.display import display

import cufflinks as cf
cf.set_config_file(theme='ggplot')

import util

# limit output to avoid cluttering screen
pd.options.display.max_rows = 20

In [3]:
period = '11/1/2015 to 10/31/2016'
outfile = 'network_analysis'

In [4]:
def get_main_data():
    df_data = util.load_airport_data()
    
    # make the "day_of_week" explicit
    hash_dayofweek = {1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat', 7:'Sun'}
    df_data['DAY_OF_WEEK'] = df_data['DAY_OF_WEEK'].map(lambda key: hash_dayofweek[key])
    return df_data

df_data = get_main_data()

 ... load dataframe from 2015-11.zip 
 ... load dataframe from 2015-12.zip 
 ... load dataframe from 2016-01.zip 
 ... load dataframe from 2016-02.zip 
 ... load dataframe from 2016-03.zip 
 ... load dataframe from 2016-04.zip 
 ... load dataframe from 2016-05.zip 
 ... load dataframe from 2016-06.zip 
 ... load dataframe from 2016-07.zip 
 ... load dataframe from 2016-08.zip 
 ... load dataframe from 2016-09.zip 
 ... load dataframe from 2016-10.zip 


In [5]:
df_lookup = pd.read_csv('df_lookup.csv') # lookup table for the AIRPORT_ID above

display(df_lookup.head())

# create hash-tables for later convenience (with map/appy functions)
# (maps Airport "ID_Code" to other quantities of interest)
hash_airport   = df_lookup.set_index('Code')['Airport'].to_dict()
hash_citystate = df_lookup.set_index('Code')['City_State'].to_dict()
hash_lat       = df_lookup.set_index('Code')['lat'].to_dict()
hash_lon       = df_lookup.set_index('Code')['lon'].to_dict()

Unnamed: 0,Code,Description,Airport,City,State,Region,lat,lon,City_State
0,10135,"Allentown/Bethlehem/Easton, PA: Lehigh Valley ...",Lehigh Valley International,Allentown/Bethlehem/Easton,PA,Northeast,40.65165,-75.434746,Allentown/Bethlehem/Easton (PA)
1,10136,"Abilene, TX: Abilene Regional",Abilene Regional,Abilene,TX,South,32.448736,-99.733144,Abilene (TX)
2,10140,"Albuquerque, NM: Albuquerque International Sun...",Albuquerque International Sunport,Albuquerque,NM,West,35.043333,-106.612909,Albuquerque (NM)
3,10141,"Aberdeen, SD: Aberdeen Regional",Aberdeen Regional,Aberdeen,SD,Midwest,45.453458,-98.417726,Aberdeen (SD)
4,10146,"Albany, GA: Southwest Georgia Regional",Southwest Georgia Regional,Albany,GA,South,31.535671,-84.193905,Albany (GA)


# Create the network "edges" --- pairs of aiports forming the trips

- this is a directed network in the language of graph theory

## Data tidying

In [6]:
# create a new column containing the *origin* and the *destination* airport
# (these will form the network "edges" in our graph, with airport being the nodes)
df_data['Trips'] = tuple(zip(df_data['ORIGIN_AIRPORT_ID'], df_data['DEST_AIRPORT_ID']))
df_data.head()

Unnamed: 0,YEAR,QUARTER,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,ORIGIN_AIRPORT_ID,DEST_AIRPORT_ID,Trips
0,2015,4,11,4,Wed,14570,13930,"(14570, 13930)"
1,2015,4,11,5,Thu,13930,14057,"(13930, 14057)"
2,2015,4,11,6,Fri,13930,14057,"(13930, 14057)"
3,2015,4,11,7,Sat,13930,14057,"(13930, 14057)"
4,2015,4,11,8,Sun,13930,14057,"(13930, 14057)"


In [7]:
# create table of "trip_counts" (sorted by most frequent trips)
trip_counts = df_data['Trips'].value_counts().to_frame('counts')
trip_counts.head()

Unnamed: 0,counts
"(14771, 12892)",17757
"(12892, 14771)",17409
"(12892, 12478)",12463
"(12478, 12892)",12461
"(12892, 12889)",11317


In [8]:
# create two columns for the pair of nodes forming the edge
trip_counts['code1'] = trip_counts.index.map(lambda x: x[0])
trip_counts['code2'] = trip_counts.index.map(lambda x: x[1])

trip_counts.reset_index(drop=True,inplace=True)
trip_counts.head()

Unnamed: 0,counts,code1,code2
0,17757,14771,12892
1,17409,12892,14771
2,12463,12892,12478
3,12461,12478,12892
4,11317,12892,12889


In [9]:
# create columns with corresponding airport information
columns = [u'Airport', u'City',u'State']

for col in columns:
    # create hash-table for ID lookup
    hash_table = df_lookup.set_index('Code')[col].to_dict()
    
    trip_counts[col + '1'] = trip_counts['code1'].map(lambda code: hash_table[code])
    trip_counts[col + '2'] = trip_counts['code2'].map(lambda code: hash_table[code])
    
# reorder columns (just personal preference)
cols = trip_counts.columns.tolist()
cols = [cols[0]] + cols[3:9] + cols[1:3]
trip_counts = trip_counts[cols]
trip_counts.head()

Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2
0,17757,San Francisco International,Los Angeles International,San Francisco,Los Angeles,CA,CA,14771,12892
1,17409,Los Angeles International,San Francisco International,Los Angeles,San Francisco,CA,CA,12892,14771
2,12463,Los Angeles International,John F. Kennedy International,Los Angeles,New York,CA,NY,12892,12478
3,12461,John F. Kennedy International,Los Angeles International,New York,Los Angeles,NY,CA,12478,12892
4,11317,Los Angeles International,McCarran International,Los Angeles,Las Vegas,CA,NV,12892,12889


- add distance associated with each trips (ie, distance between aiports in kilometers)
- to do this, we convert the pairs of lat/lon into distance using [Vincent's formula](https://en.wikipedia.org/wiki/Vincenty's_formulae)

In [10]:
# add distance associated with each trips (ie, distance between aiports)
# see https://en.wikipedia.org/wiki/Vincenty's_formulae
from geopy.distance import vincenty
dist_ = []
for code1,code2 in zip(trip_counts['code1'],trip_counts['code2']):
    coord1 = hash_lat[code1],hash_lon[code1]
    coord2 = hash_lat[code2],hash_lon[code2]
    dist_.append(vincenty(coord1,coord2).kilometers)
    
trip_counts['distance'] = dist_
trip_counts.head()

Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2,distance
0,17757,San Francisco International,Los Angeles International,San Francisco,Los Angeles,CA,CA,14771,12892,543.531637
1,17409,Los Angeles International,San Francisco International,Los Angeles,San Francisco,CA,CA,12892,14771,543.531637
2,12463,Los Angeles International,John F. Kennedy International,Los Angeles,New York,CA,NY,12892,12478,3983.0794
3,12461,John F. Kennedy International,Los Angeles International,New York,Los Angeles,NY,CA,12478,12892,3983.0794
4,11317,Los Angeles International,McCarran International,Los Angeles,Las Vegas,CA,NV,12892,12889,380.413047


## Most frequent "trips" in US during the past year

- now that we have an appropriate table, let's start exploring which trip (pair of airports) took place the most during Nov-1-2015 to Oct-31-2016

- let's first see the top 10 trips

In [11]:
print "{} unique trips made".format(trip_counts.shape[0])
print ' the top 10 flights during {} '.format(period).center(80,'=')
trip_counts.head(n=10)

4637 unique trips made


Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2,distance
0,17757,San Francisco International,Los Angeles International,San Francisco,Los Angeles,CA,CA,14771,12892,543.531637
1,17409,Los Angeles International,San Francisco International,Los Angeles,San Francisco,CA,CA,12892,14771,543.531637
2,12463,Los Angeles International,John F. Kennedy International,Los Angeles,New York,CA,NY,12892,12478,3983.0794
3,12461,John F. Kennedy International,Los Angeles International,New York,Los Angeles,NY,CA,12478,12892,3983.0794
4,11317,Los Angeles International,McCarran International,Los Angeles,Las Vegas,CA,NV,12892,12889,380.413047
5,11298,McCarran International,Los Angeles International,Las Vegas,Los Angeles,NV,CA,12889,12892,380.413047
6,10245,Seattle/Tacoma International,Los Angeles International,Seattle,Los Angeles,WA,CA,14747,12892,1535.3794
7,10224,Los Angeles International,Seattle/Tacoma International,Los Angeles,Seattle,CA,WA,12892,14747,1535.3794
8,10057,LaGuardia,Chicago O'Hare International,New York,Chicago,NY,IL,12953,13930,1180.12932
9,9954,Chicago O'Hare International,LaGuardia,Chicago,New York,IL,NY,13930,12953,1180.12932


- the top trips comes in pair....which makes sense, as most flights are "round-trips"

- For instance, **SF to LA** (17757 flights) and **LA to SF** (17409 flights) were the most frequent made trip. 

  - As these values are very close, it's reasonable to say most of the flights were round trip
  
  - (the small difference in flight-counts can be due to missed flight, permanent relocation, etc)'
  
  
Let's next plot the top 500 trips.

In [12]:
# create hover-text object for plotly

def string_rank(ranking):
    headstr = 'Ranking: '
    if ranking == 1:
        return headstr + '1st'
    elif ranking == 2:
        return headstr + '2nd'
    elif ranking == 3:
        return headstr + '3rd'
    else:
        return headstr + str(ranking)+'th'
    
trip_counts['text'] = (trip_counts['Airport1'] 
              + ' to ' + trip_counts['Airport2']
              + '<br>' + trip_counts['City1'] + ' (' + trip_counts['State1'] + ')'
              + ' to ' + trip_counts['City2'] + ' (' + trip_counts['State2'] + ')'
              + '<br>Number of flight: ' + trip_counts['counts'].astype(str))

trip_counts['text'] = trip_counts['text'] + '<br>' + map(string_rank,trip_counts['text'].index + 1)
trip_counts['text'][:5].tolist()

['San Francisco International to Los Angeles International<br>San Francisco (CA) to Los Angeles (CA)<br>Number of flight: 17757<br>Ranking: 1st',
 'Los Angeles International to San Francisco International<br>Los Angeles (CA) to San Francisco (CA)<br>Number of flight: 17409<br>Ranking: 2nd',
 'Los Angeles International to John F. Kennedy International<br>Los Angeles (CA) to New York (NY)<br>Number of flight: 12463<br>Ranking: 3rd',
 'John F. Kennedy International to Los Angeles International<br>New York (NY) to Los Angeles (CA)<br>Number of flight: 12461<br>Ranking: 4th',
 'Los Angeles International to McCarran International<br>Los Angeles (CA) to Las Vegas (NV)<br>Number of flight: 11317<br>Ranking: 5th']

In [13]:
trip_counts.iplot(kind='bar',columns=['counts'],text='text',filename='test')

In [14]:
# plot top_k
top_k = 500
trip_counts[:top_k].iplot(kind='bar',columns=['counts'],text='text',filename='test')

# Create undirected graph -- edges ignoring directionality

- In our next analysis, we'll drop **directionality** in our analysis

- That is, for any given trip (edge), we'll ignoring which airport was used for **take-off** and **landing**

- So the airport pair (SF,LA) will form an **undirected edge** with a value of 17757+17409 = 35166

- So to create an undirected graph, .we do the following:

  - For any airport pair ``A,B``, we identify the directed edges ``(A -> B)`` and ``(A <- B)``

  - The resulting undirected edge ``(A <-> B)`` will have the value ``(A -> B) + (A <- B)``

## Data tidying

In [15]:
trip_counts.head()

Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2,distance,text
0,17757,San Francisco International,Los Angeles International,San Francisco,Los Angeles,CA,CA,14771,12892,543.531637,San Francisco International to Los Angeles Int...
1,17409,Los Angeles International,San Francisco International,Los Angeles,San Francisco,CA,CA,12892,14771,543.531637,Los Angeles International to San Francisco Int...
2,12463,Los Angeles International,John F. Kennedy International,Los Angeles,New York,CA,NY,12892,12478,3983.0794,Los Angeles International to John F. Kennedy I...
3,12461,John F. Kennedy International,Los Angeles International,New York,Los Angeles,NY,CA,12478,12892,3983.0794,John F. Kennedy International to Los Angeles I...
4,11317,Los Angeles International,McCarran International,Los Angeles,Las Vegas,CA,NV,12892,12889,380.413047,Los Angeles International to McCarran Internat...


In [16]:
tmp = pd.Series(map(lambda pair: (min(pair), max(pair) ), 
                     zip(trip_counts['code1'],trip_counts['code2'])))

print tmp[:6]

# detect flights A->B and A<-B (flights sharing same pair of airport)
mask_AB = tmp.duplicated(keep='first') # edges A -> B
mask_BA = tmp.duplicated(keep='last')  # edges B -> A
mask_    = ~(mask_AB|mask_BA)         # some trips only have one direction

assert mask_AB.sum() == mask_BA.sum() 
assert trip_counts.shape[0] == (mask_AB.sum() + mask_BA.sum() + mask_.sum())

trips_AB = trip_counts[mask_AB]
trips_BA = trip_counts[mask_BA]
trip_neither = trip_counts[ ~(mask_AB|mask_BA)]

display(trips_AB.head())
display(trips_BA.head())
display(trip_neither.head())

0    (12892, 14771)
1    (12892, 14771)
2    (12478, 12892)
3    (12478, 12892)
4    (12889, 12892)
5    (12889, 12892)
dtype: object


Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2,distance,text
1,17409,Los Angeles International,San Francisco International,Los Angeles,San Francisco,CA,CA,12892,14771,543.531637,Los Angeles International to San Francisco Int...
3,12461,John F. Kennedy International,Los Angeles International,New York,Los Angeles,NY,CA,12478,12892,3983.0794,John F. Kennedy International to Los Angeles I...
5,11298,McCarran International,Los Angeles International,Las Vegas,Los Angeles,NV,CA,12889,12892,380.413047,McCarran International to Los Angeles Internat...
7,10224,Los Angeles International,Seattle/Tacoma International,Los Angeles,Seattle,CA,WA,12892,14747,1535.3794,Los Angeles International to Seattle/Tacoma In...
9,9954,Chicago O'Hare International,LaGuardia,Chicago,New York,IL,NY,13930,12953,1180.12932,Chicago O'Hare International to LaGuardia<br>C...


Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2,distance,text
0,17757,San Francisco International,Los Angeles International,San Francisco,Los Angeles,CA,CA,14771,12892,543.531637,San Francisco International to Los Angeles Int...
2,12463,Los Angeles International,John F. Kennedy International,Los Angeles,New York,CA,NY,12892,12478,3983.0794,Los Angeles International to John F. Kennedy I...
4,11317,Los Angeles International,McCarran International,Los Angeles,Las Vegas,CA,NV,12892,12889,380.413047,Los Angeles International to McCarran Internat...
6,10245,Seattle/Tacoma International,Los Angeles International,Seattle,Los Angeles,WA,CA,14747,12892,1535.3794,Seattle/Tacoma International to Los Angeles In...
8,10057,LaGuardia,Chicago O'Hare International,New York,Chicago,NY,IL,12953,13930,1180.12932,LaGuardia to Chicago O'Hare International<br>N...


Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2,distance,text
3241,366,Wiley Post/Will Rogers Memorial,Fairbanks International,Barrow,Fairbanks,AK,AK,10754,11630,809.595183,Wiley Post/Will Rogers Memorial to Fairbanks I...
3598,263,Devils Lake Regional,Denver International,Devils Lake,Denver,ND,CO,11447,11292,1028.249825,Devils Lake Regional to Denver International<b...
3607,261,Hattiesburg-Laurel Regional,Dallas/Fort Worth International,Hattiesburg/Laurel,Dallas/Fort Worth,MS,TX,14109,11298,751.719146,Hattiesburg-Laurel Regional to Dallas/Fort Wor...
4344,23,Washington Dulles International,San Antonio International,Washington,San Antonio,DC,TX,12264,14683,2192.125251,Washington Dulles International to San Antonio...
4365,16,Joslin Field - Magic Valley Regional,San Francisco International,Twin Falls,San Francisco,ID,CA,15389,14771,862.579453,Joslin Field - Magic Valley Regional to San Fr...


In [17]:
trips_AB = trip_counts[mask_AB]
trips_BA = trip_counts[mask_BA]
trip_neither = trip_counts[ ~(mask_AB|mask_BA)]

# this will serve as our final undirected graph
trip_counts_und = trips_AB.copy()

# to identify matching rows, swap code1,code2
trips_BA = trips_BA.rename(columns={'code1':'code2','code2':'code1'})[['counts','code1','code2']]

# now we can use the code pairs as merge-keys
trip_counts_und = trips_AB.merge(trips_BA, on=['code1','code2'],suffixes=['','_'])

trip_counts_und.head()

Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2,distance,text,counts_
0,17409,Los Angeles International,San Francisco International,Los Angeles,San Francisco,CA,CA,12892,14771,543.531637,Los Angeles International to San Francisco Int...,17757
1,12461,John F. Kennedy International,Los Angeles International,New York,Los Angeles,NY,CA,12478,12892,3983.0794,John F. Kennedy International to Los Angeles I...,12463
2,11298,McCarran International,Los Angeles International,Las Vegas,Los Angeles,NV,CA,12889,12892,380.413047,McCarran International to Los Angeles Internat...,11317
3,10224,Los Angeles International,Seattle/Tacoma International,Los Angeles,Seattle,CA,WA,12892,14747,1535.3794,Los Angeles International to Seattle/Tacoma In...,10245
4,9954,Chicago O'Hare International,LaGuardia,Chicago,New York,IL,NY,13930,12953,1180.12932,Chicago O'Hare International to LaGuardia<br>C...,10057


In [18]:
# now we can sum both directions of the edge to create our undirected graph :)
trip_counts_und['counts'] = trip_counts_und['counts'] + trip_counts_und['counts_']
del trip_counts_und['counts_']

# to complete, append the trips that only had one-way direction, and re-sort!
trip_counts_und = trip_counts_und.append(trip_neither).\
                      sort_values('counts',ascending=False).\
                      reset_index(drop=True)

# finaly undirected graph!
trip_counts_und.head(10)

Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2,distance,text
0,35166,Los Angeles International,San Francisco International,Los Angeles,San Francisco,CA,CA,12892,14771,543.531637,Los Angeles International to San Francisco Int...
1,24924,John F. Kennedy International,Los Angeles International,New York,Los Angeles,NY,CA,12478,12892,3983.0794,John F. Kennedy International to Los Angeles I...
2,22615,McCarran International,Los Angeles International,Las Vegas,Los Angeles,NV,CA,12889,12892,380.413047,McCarran International to Los Angeles Internat...
3,20469,Los Angeles International,Seattle/Tacoma International,Los Angeles,Seattle,CA,WA,12892,14747,1535.3794,Los Angeles International to Seattle/Tacoma In...
4,20011,Chicago O'Hare International,LaGuardia,Chicago,New York,IL,NY,13930,12953,1180.12932,Chicago O'Hare International to LaGuardia<br>C...
5,18254,Honolulu International,Kahului Airport,Honolulu,Kahului,HI,HI,12173,13830,162.094231,Honolulu International to Kahului Airport<br>H...
6,18244,San Francisco International,McCarran International,San Francisco,Las Vegas,CA,NV,14771,12889,666.370587,San Francisco International to McCarran Intern...
7,18141,Chicago O'Hare International,Los Angeles International,Chicago,Los Angeles,IL,CA,13930,12892,2807.429621,Chicago O'Hare International to Los Angeles In...
8,18093,Hartsfield-Jackson Atlanta International,Orlando International,Atlanta,Orlando,GA,FL,10397,13204,649.748804,Hartsfield-Jackson Atlanta International to Or...
9,17042,Ronald Reagan Washington National,Logan International,Washington,Boston,DC,MA,11278,10721,642.205372,Ronald Reagan Washington National to Logan Int...


## Most frequent "routes" in US during the past year

- To distinguish undirected edges from directed ones, I'll call the edges in the undirected graph **"routes"** , with the line of thinking that trips A->B and B->A shares the same *route*

- (I'll continue to call the directed edges **trips**)

In [19]:
route_counts = trip_counts_und

Let's analyze the most frequent **routes** during the period Nov-1-2015 to Oct-31-2016

In [20]:
print "{} unique routes".format(route_counts.shape[0])
print ' the top 10 flight-routes during {} '.format(period).center(80,'=')
route_counts.head(n=10)

2365 unique routes


Unnamed: 0,counts,Airport1,Airport2,City1,City2,State1,State2,code1,code2,distance,text
0,35166,Los Angeles International,San Francisco International,Los Angeles,San Francisco,CA,CA,12892,14771,543.531637,Los Angeles International to San Francisco Int...
1,24924,John F. Kennedy International,Los Angeles International,New York,Los Angeles,NY,CA,12478,12892,3983.0794,John F. Kennedy International to Los Angeles I...
2,22615,McCarran International,Los Angeles International,Las Vegas,Los Angeles,NV,CA,12889,12892,380.413047,McCarran International to Los Angeles Internat...
3,20469,Los Angeles International,Seattle/Tacoma International,Los Angeles,Seattle,CA,WA,12892,14747,1535.3794,Los Angeles International to Seattle/Tacoma In...
4,20011,Chicago O'Hare International,LaGuardia,Chicago,New York,IL,NY,13930,12953,1180.12932,Chicago O'Hare International to LaGuardia<br>C...
5,18254,Honolulu International,Kahului Airport,Honolulu,Kahului,HI,HI,12173,13830,162.094231,Honolulu International to Kahului Airport<br>H...
6,18244,San Francisco International,McCarran International,San Francisco,Las Vegas,CA,NV,14771,12889,666.370587,San Francisco International to McCarran Intern...
7,18141,Chicago O'Hare International,Los Angeles International,Chicago,Los Angeles,IL,CA,13930,12892,2807.429621,Chicago O'Hare International to Los Angeles In...
8,18093,Hartsfield-Jackson Atlanta International,Orlando International,Atlanta,Orlando,GA,FL,10397,13204,649.748804,Hartsfield-Jackson Atlanta International to Or...
9,17042,Ronald Reagan Washington National,Logan International,Washington,Boston,DC,MA,11278,10721,642.205372,Ronald Reagan Washington National to Logan Int...


In [21]:
route_counts['text'] = (  route_counts['Airport1'] 
              + ' <-> ' + route_counts['Airport2']
              + '<br>'  + route_counts['City1'] + ' (' + route_counts['State1'] + ')'
              + ' <-> ' + route_counts['City2'] + ' (' + route_counts['State2'] + ')'
              + '<br>Number of flights: ' + route_counts['counts'].astype(str))

route_counts['text'] = route_counts['text'] + '<br>' + map(string_rank,route_counts['text'].index + 1)
route_counts['text'][:5].tolist()

['Los Angeles International <-> San Francisco International<br>Los Angeles (CA) <-> San Francisco (CA)<br>Number of flights: 35166<br>Ranking: 1st',
 'John F. Kennedy International <-> Los Angeles International<br>New York (NY) <-> Los Angeles (CA)<br>Number of flights: 24924<br>Ranking: 2nd',
 'McCarran International <-> Los Angeles International<br>Las Vegas (NV) <-> Los Angeles (CA)<br>Number of flights: 22615<br>Ranking: 3rd',
 'Los Angeles International <-> Seattle/Tacoma International<br>Los Angeles (CA) <-> Seattle (WA)<br>Number of flights: 20469<br>Ranking: 4th',
 "Chicago O'Hare International <-> LaGuardia<br>Chicago (IL) <-> New York (NY)<br>Number of flights: 20011<br>Ranking: 5th"]

In [22]:
route_counts.iplot(kind='bar',columns=['counts'],text='text',filename='test',color='cyan')

In [23]:
# plot top_k
top_k = 250
route_counts[:top_k].iplot(kind='bar',columns=['counts'],text='text',filename='test',color='cyan')

# Airport centrality analysis with complex network theory tools




In [None]:
nx.DiGraph