In [1]:
import os
import csv
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import linregress
import scipy.stats as st
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Step 1: Reference to csv data files
alcohol_path = "Data/NIH_Alcohol_Sales_Thru_Jun2020.csv"

In [3]:
# Step 2: Import csv files and create data frame, print headers to terminal
alcohol_df = pd.read_csv(alcohol_path)
alcohol_df.head()

Unnamed: 0,Year,Month,FIPS,Beverage,Gallons,Ethanol,Population,PerCapita,PerCapita3yr,PctChange
0,2017,1,2,1,103645,42598,593378,0.0718,,
1,2017,1,5,1,402885,165586,2455022,0.0674,,
2,2017,1,8,1,733857,301615,4634346,0.0651,,
3,2017,1,9,1,412100,169373,3015481,0.0562,,
4,2017,1,12,1,2770686,1138752,17722275,0.0643,,


#### Year = 2020 includes 3-year Per Capita average of Ethanol consumption (gallons) AND percent change
* Keep only 2020 data in the Year column

In [None]:
alcohol_2020_df = pd.read_csv(alcohol_path)

In [None]:
# Step 3: Reset data frame index and include only columns needed for merging and analysis
alcohol_df = alcohol_df[[
    'FIPS','Month','Year','Beverage','Ethanol','PerCapita']]
alcohol_df.head()

In [4]:
# Step 3: Reset data frame index and include only columns needed for merging and analysis
alcohol_df = alcohol_df[[
    'FIPS','Month','Year','Beverage','Ethanol','PerCapita']]
alcohol_df.head()

Unnamed: 0,FIPS,Month,Year,Beverage,Ethanol,PerCapita
0,2,1,2017,1,42598,0.0718
1,5,1,2017,1,165586,0.0674
2,8,1,2017,1,301615,0.0651
3,9,1,2017,1,169373,0.0562
4,12,1,2017,1,1138752,0.0643


In [5]:
# Step 6: Add calculations as new columns
alcohol_df["PerCapita"] = ((alcohol_df["PerCapita"]*100)).round(2)
alcohol_df.head()

Unnamed: 0,FIPS,Month,Year,Beverage,Ethanol,PerCapita
0,2,1,2017,1,42598,7.18
1,5,1,2017,1,165586,6.74
2,8,1,2017,1,301615,6.51
3,9,1,2017,1,169373,5.62
4,12,1,2017,1,1138752,6.43


In [6]:
alcohol_df.dtypes

FIPS           int64
Month          int64
Year           int64
Beverage       int64
Ethanol        int64
PerCapita    float64
dtype: object

In [7]:
alcohol_df["Year"] = alcohol_df["Year"].apply(str)
alcohol_df["Month"] = alcohol_df["Month"].apply(str)
alcohol_df.dtypes

FIPS           int64
Month         object
Year          object
Beverage       int64
Ethanol        int64
PerCapita    float64
dtype: object

In [8]:
alcohol_df.head()

Unnamed: 0,FIPS,Month,Year,Beverage,Ethanol,PerCapita
0,2,1,2017,1,42598,7.18
1,5,1,2017,1,165586,6.74
2,8,1,2017,1,301615,6.51
3,9,1,2017,1,169373,5.62
4,12,1,2017,1,1138752,6.43


In [9]:
alcohol_df = alcohol_df.rename(columns={
    'FIPS':'State',
})
alcohol_df.head()

Unnamed: 0,State,Month,Year,Beverage,Ethanol,PerCapita
0,2,1,2017,1,42598,7.18
1,5,1,2017,1,165586,6.74
2,8,1,2017,1,301615,6.51
3,9,1,2017,1,169373,5.62
4,12,1,2017,1,1138752,6.43


In [10]:
alcohol_df['State'] = alcohol_df['State'].replace({
    2:'Alaska',5:'Arkansas',8:'Colorado',9:'Connecticut',12:'Florida',
    17:'Illinois',20:'Kansas',21:'Kentucky',22:'Louisiana',
    25:'Massachusetts',29:'Missouri',38:'North Dakota',41:'Oregon',
    48:'Texas',51:'Virginia',55:'Wisconsin'
})
alcohol_df.head()

Unnamed: 0,State,Month,Year,Beverage,Ethanol,PerCapita
0,Alaska,1,2017,1,42598,7.18
1,Arkansas,1,2017,1,165586,6.74
2,Colorado,1,2017,1,301615,6.51
3,Connecticut,1,2017,1,169373,5.62
4,Florida,1,2017,1,1138752,6.43


In [13]:
alcohol_df['Beverage'] = alcohol_df['Beverage'].replace({
    1:'Spirits',
    2:'Wine',
    3:'Beer'
})
alcohol_df.head()

Unnamed: 0,State,Month,Year,Beverage,Ethanol,PerCapita
0,Alaska,1,2017,Spirits,42598,7.18
1,Arkansas,1,2017,Spirits,165586,6.74
2,Colorado,1,2017,Spirits,301615,6.51
3,Connecticut,1,2017,Spirits,169373,5.62
4,Florida,1,2017,Spirits,1138752,6.43


In [18]:
state_df = alcohol_df[['State']]
state_df.head(25)

Unnamed: 0,State
0,Alaska
1,Arkansas
2,Colorado
3,Connecticut
4,Florida
5,Illinois
6,Kansas
7,Kentucky
8,Louisiana
9,Massachusetts


In [None]:
alcohol_df.head("State")

In [None]:
# create a csv with the merged, cleaned data
# alcohol_df.to_csv(r'Data\alcohol_df.csv', index=False)