In [43]:
import pandas as pd
import numpy as np

In [44]:
# print all the outputs in a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [45]:
vaccine_data= "https://raw.githubusercontent.com/umakanetkar/plotly_dash_visualization/main/us_state_vaccinations1.csv"

In [46]:
df = pd.read_csv(vaccine_data)

In [47]:
df.columns

Index(['date', 'State', 'daily_vaccinations', 'day', 'month', 'year'], dtype='object')

In [48]:
df.shape

(49700, 6)

In [49]:
df.isna().sum()

date                   0
State                  0
daily_vaccinations    65
day                    0
month                  0
year                   0
dtype: int64

In [50]:
df = df.fillna(method='bfill')

In [51]:
df.isna().sum()

date                  0
State                 0
daily_vaccinations    0
day                   0
month                 0
year                  0
dtype: int64

In [52]:
population_data = "https://raw.githubusercontent.com/umakanetkar/plotly_dash_visualization/main/US_POP.xlsx"

In [53]:
pop=  pd.read_excel(population_data)
#pop=  pd.read_excel("US_POP.xlsx")

In [54]:
pop.head()

Unnamed: 0,State,2021,2022
0,.Alabama,5049846.0,5074296.0
1,.Alaska,734182.0,733583.0
2,.Arizona,7264877.0,7359197.0
3,.Arkansas,3028122.0,3045637.0
4,.California,39142991.0,39029342.0


In [55]:
pop.head(2)
df.head(2)

Unnamed: 0,State,2021,2022
0,.Alabama,5049846.0,5074296.0
1,.Alaska,734182.0,733583.0


Unnamed: 0,date,State,daily_vaccinations,day,month,year
0,12-01-2021,Alabama,5906.0,12,1,2021
1,13-01-2021,Alabama,5906.0,13,1,2021


In [56]:
df.State.unique()

array(['Alabama', 'Alaska', 'American Samoa', 'Arizona', 'Arkansas',
       'Bureau of Prisons', 'California', 'Colorado', 'Connecticut',
       'Delaware', 'Dept of Defense', 'District of Columbia',
       'Federated States of Micronesia', 'Florida', 'Georgia', 'Guam',
       'Hawaii', 'Idaho', 'Illinois', 'Indian Health Svc', 'Indiana',
       'Iowa', 'Kansas', 'Kentucky', 'Long Term Care', 'Louisiana',
       'Maine', 'Marshall Islands', 'Maryland', 'Massachusetts',
       'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York State', 'North Carolina', 'North Dakota',
       'Northern Mariana Islands', 'Ohio', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Puerto Rico', 'Republic of Palau', 'Rhode Island',
       'South Carolina', 'South Dakota', 'Tennessee', 'Texas',
       'United States', 'Utah', 'Vermont', 'Veterans Health',
       'Virgin Islands', 'Virginia', 'Washington', 'West V

In [57]:
pop.State.unique()

array(['.Alabama', '.Alaska', '.Arizona', '.Arkansas', '.California',
       '.Colorado', '.Connecticut', '.Delaware', '.District of Columbia',
       '.Florida', '.Georgia', '.Hawaii', '.Idaho', '.Illinois',
       '.Indiana', '.Iowa', '.Kansas', '.Kentucky', '.Louisiana',
       '.Maine', '.Maryland', '.Massachusetts', '.Michigan', '.Minnesota',
       '.Mississippi', '.Missouri', '.Montana', '.Nebraska', '.Nevada',
       '.New Hampshire', '.New Jersey', '.New Mexico', '.New York',
       '.North Carolina', '.North Dakota', '.Ohio', '.Oklahoma',
       '.Oregon', '.Pennsylvania', '.Rhode Island', '.South Carolina',
       '.South Dakota', '.Tennessee', '.Texas', '.Utah', '.Vermont',
       '.Virginia', '.Washington', '.West Virginia', '.Wisconsin',
       '.Wyoming', nan, '.Puerto Rico'], dtype=object)

In [58]:
pop['State'] = pop['State'].str.replace('.', '')


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.



In [59]:
merged_df = pd.merge(df, pop, on='State')

In [60]:
df.shape
pop.shape
merged_df.shape

(49700, 6)

(53, 3)

(39372, 8)

In [61]:
merged_df.head(2)

Unnamed: 0,date,State,daily_vaccinations,day,month,year,2021,2022
0,12-01-2021,Alabama,5906.0,12,1,2021,5049846.0,5074296.0
1,13-01-2021,Alabama,5906.0,13,1,2021,5049846.0,5074296.0


In [62]:
merged_df['population'] = merged_df.apply(lambda row: row[2021] if row['year'] == 2021 else row[2022], axis=1)

In [63]:
merged_df.head(2)

Unnamed: 0,date,State,daily_vaccinations,day,month,year,2021,2022,population
0,12-01-2021,Alabama,5906.0,12,1,2021,5049846.0,5074296.0,5049846.0
1,13-01-2021,Alabama,5906.0,13,1,2021,5049846.0,5074296.0,5049846.0


In [64]:
merged_df.isna().any()

date                  False
State                 False
daily_vaccinations    False
day                   False
month                 False
year                  False
2021                  False
2022                  False
population            False
dtype: bool

In [65]:
df=merged_df[['date','State','daily_vaccinations','day','month','year','population' ]]

In [66]:
df['cumulative_vaccinations'] = df.groupby(['State'])['daily_vaccinations'].cumsum()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [67]:
df.head()

Unnamed: 0,date,State,daily_vaccinations,day,month,year,population,cumulative_vaccinations
0,12-01-2021,Alabama,5906.0,12,1,2021,5049846.0,5906.0
1,13-01-2021,Alabama,5906.0,13,1,2021,5049846.0,11812.0
2,14-01-2021,Alabama,7083.0,14,1,2021,5049846.0,18895.0
3,15-01-2021,Alabama,7478.0,15,1,2021,5049846.0,26373.0
4,16-01-2021,Alabama,7498.0,16,1,2021,5049846.0,33871.0


In [68]:
#to get % of population vaccinated
df['percent'] = df.apply(lambda row: (row['cumulative_vaccinations'] / row['population']) * 100, axis=1)

In [69]:
df = df.groupby(['State','year','month']).agg({'daily_vaccinations': 'sum','percent': 'max'}).reset_index()

In [70]:
df.head()

Unnamed: 0,State,year,month,daily_vaccinations,percent
0,Alabama,2021,1,241218.0,4.777
1,Alabama,2021,2,576041.0,16.184
2,Alabama,2021,3,786827.0,31.765
3,Alabama,2021,4,857332.0,48.742
4,Alabama,2021,5,499604.0,58.636


In [71]:
#since we dont have population data of 2023
df = df[df['year']!=2023]

In [72]:
#converting state names to state abbreviations
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [73]:
df['state_abbr'] = df['State'].map(us_state_abbrev)

In [74]:
df.head(2)

Unnamed: 0,State,year,month,daily_vaccinations,percent,state_abbr
0,Alabama,2021,1,241218.0,4.777,AL
1,Alabama,2021,2,576041.0,16.184,AL


### Building a Dashboard

In [75]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px  # (version 4.7.0 or higher)
import plotly.graph_objects as go
from dash import Dash, Input, Output  # pip install dash (version 2.0.0 or higher)
#from dash import  dcc, html

In [76]:
# Define the app
app = dash.Dash(__name__)

server=app.server

# Layout
app.layout = html.Div([
    html.H1("Choropleth Map with Plotly Dash"),
    dcc.Dropdown(
        id="year-dropdown",
        options=[{"label": year, "value": year} for year in df["year"].unique()],
        value=df["year"].min()
    ),
    dcc.Dropdown(
        id="month-dropdown",
        options=[{"label": month, "value": month} for month in df["month"].unique()],
        value=df["month"].min()
    ),
    dcc.Graph(id="choropleth-map")
])

# Callback
@app.callback(
    Output("choropleth-map", "figure"),
    [Input("year-dropdown", "value"), Input("month-dropdown", "value")]
)
def update_choropleth_map(year, month):
    filtered_df = df[(df["year"] == year) & (df["month"] == month)]
    fig = px.choropleth(
        filtered_df, 
        locations="state_abbr", 
        locationmode="USA-states", 
        color="percent", 
        scope="usa",
        color_continuous_scale=px.colors.sequential.YlOrRd,
        #labels={'percent': '% of Vaccinated Population'}
        #labels={'percent': '% of population vaccinated', 'state_abbr': 'State'}
        labels={'percent': '% of Population Vaccinated', 'State': 'State'}
    )
    fig.update_layout(
        margin={"r":0,"t":0,"l":0,"b":0},
        template="plotly_dark"
    )
    return fig

# Running the app
if __name__ == "__main__":
    app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [04/Mar/2023 12:36:44] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Mar/2023 12:36:44] "GET /_dash-component-suites/dash/deps/polyfill@7.v2_8_1m1677788500.12.1.min.js HTTP/1.1" 200 -
127.0.0.1 - - [04/Mar/2023 12:36:44] "GET /_dash-component-suites/dash/deps/react@16.v2_8_1m1677788500.14.0.min.js HTTP/1.1" 200 -
127.0.0.1 - - [04/Mar/2023 12:36:44] "GET /_dash-component-suites/dash/deps/react-dom@16.v2_8_1m1677788500.14.0.min.js HTTP/1.1" 200 -
127.0.0.1 - - [04/Mar/2023 12:36:44] "GET /_dash-component-suites/dash/deps/prop-types@15.v2_8_1m1677788500.8.1.min.js HTTP/1.1" 200 -
127.0.0.1 - - [04/Mar/2023 12:36:44] "GET /_dash-component-suites/dash/dash-renderer/build/dash_renderer.v2_8_1m1677788499.min.js HTTP/1.1" 200 -
127.0.0.1 - - [04/Mar/2023 12:36:44] "GET /_dash-component-suites/dash/dcc/dash_core_components.v2_8_0m1677788500.js HTTP/1.1" 200 -
127.0.0.1 - - [04/Mar/2023 12:36:44] "GET /_dash-component-suites/d