In [1]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [2]:
df = pd.read_csv('Data/2019_1.csv')
df = df.iloc[:50000]

In [3]:
df

Unnamed: 0,ITIN_ID,YEAR,QUARTER,ORIGIN_STATE_NM,DEST_STATE_NM,TICKET_CARRIER,PASSENGERS,MARKET_FARE,NONSTOP_MILES
0,201912,2019,1,Pennsylvania,Georgia,DL,1.0,36.0,692.0
1,201913,2019,1,Pennsylvania,Georgia,DL,1.0,143.0,692.0
2,201914,2019,1,Pennsylvania,Georgia,DL,1.0,203.0,692.0
3,201915,2019,1,Pennsylvania,Georgia,DL,1.0,211.0,692.0
4,201916,2019,1,Pennsylvania,Georgia,DL,1.0,241.0,692.0
...,...,...,...,...,...,...,...,...,...
49995,2019134049,2019,1,Pennsylvania,Kentucky,DL,1.0,191.0,507.0
49996,2019134050,2019,1,Kentucky,Pennsylvania,DL,1.0,192.0,507.0
49997,2019134050,2019,1,Pennsylvania,Kentucky,DL,1.0,192.0,507.0
49998,2019134051,2019,1,Kentucky,Pennsylvania,DL,1.0,194.5,507.0


In [4]:
#In order to generate the map with the plotly library, I need to use 2 letter codes so I used this dictionary
#to add 2 columns that have the state codes for the origin state columns and destination state columns.

us_state_to_code = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
    "U.S. Pacific Trust Territories and Possessions": "UM"
}

In [5]:
origin_code_list = []

for elem in df['ORIGIN_STATE_NM']:
    origin_code_list.append(us_state_to_code[elem])

df['ORIGIN_STATE_CODE'] = origin_code_list

In [6]:
dest_code_list = []

for elem in df['DEST_STATE_NM']:
    dest_code_list.append(us_state_to_code[elem])

df['DEST_STATE_CODE'] = dest_code_list

In [7]:
df

Unnamed: 0,ITIN_ID,YEAR,QUARTER,ORIGIN_STATE_NM,DEST_STATE_NM,TICKET_CARRIER,PASSENGERS,MARKET_FARE,NONSTOP_MILES,ORIGIN_STATE_CODE,DEST_STATE_CODE
0,201912,2019,1,Pennsylvania,Georgia,DL,1.0,36.0,692.0,PA,GA
1,201913,2019,1,Pennsylvania,Georgia,DL,1.0,143.0,692.0,PA,GA
2,201914,2019,1,Pennsylvania,Georgia,DL,1.0,203.0,692.0,PA,GA
3,201915,2019,1,Pennsylvania,Georgia,DL,1.0,211.0,692.0,PA,GA
4,201916,2019,1,Pennsylvania,Georgia,DL,1.0,241.0,692.0,PA,GA
...,...,...,...,...,...,...,...,...,...,...,...
49995,2019134049,2019,1,Pennsylvania,Kentucky,DL,1.0,191.0,507.0,PA,KY
49996,2019134050,2019,1,Kentucky,Pennsylvania,DL,1.0,192.0,507.0,KY,PA
49997,2019134050,2019,1,Pennsylvania,Kentucky,DL,1.0,192.0,507.0,PA,KY
49998,2019134051,2019,1,Kentucky,Pennsylvania,DL,1.0,194.5,507.0,KY,PA


In [21]:
#Additional Resources:
#https://towardsdatascience.com/simplest-way-of-creating-a-choropleth-map-by-u-s-states-in-python-f359ada7735e
#https://plotly.github.io/plotly.py-docs/generated/plotly.express.choropleth.html
fig = px.choropleth(df,
                    locations='ORIGIN_STATE_CODE', #the locations parameters needs the 2 letter codes
                    locationmode="USA-states", 
                    scope="usa",
                    color='NONSTOP_MILES', #This displays the average of the column selected for each state
                    color_continuous_scale='oryel',
                    title='Average Miles of a Flight When Departing by State'
                    )
fig.show()

In [22]:
fig = px.choropleth(df,
                    locations='DEST_STATE_CODE',
                    locationmode="USA-states", 
                    scope="usa",
                    color='NONSTOP_MILES',
                    color_continuous_scale='oryel',
                    title='Average Miles of a Flight When Arriving by State'
                    )
fig.show()

In [23]:
fig = px.choropleth(df,
                    locations='ORIGIN_STATE_CODE',
                    locationmode="USA-states", 
                    scope="usa",
                    color='MARKET_FARE',
                    color_continuous_scale='speed',
                    title='Average Fare Ticket by State'
                    )
fig.show()