In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from sklearn import preprocessing
import csv

In [None]:
rawDS = pd.read_csv("../FromAnil/UNM/Read/State_2020_9_9.csv")
#print(rawDS.head())

# Remove columns with NA or missing values
rawDS.dropna(axis=1, inplace=True)
#print(rawDS.shape)

# Convert date to days of year
#rawDS['DSCR']=rawDS['date']
rawDS['DSCR']=[dt.datetime.strptime(s, '%m/%d/%Y').timetuple().tm_yday for s in rawDS['date']]

# Create cumulative cases for each state and take the record from non-zero cumNewCases
rawDS['cumNewCases']=rawDS.groupby(['STFIPS'])['New cases/1000 people'].apply(lambda x: x.cumsum())

# Drop rows when cumNewCases==0
rawDS.drop(rawDS.loc[rawDS['cumNewCases']==0].index, inplace=True)
rawDS.reset_index()
print(rawDS.shape)

# Create DSFC attribute
rawDS['DSFC'] = rawDS.groupby(['STFIPS'])['DSCR'].apply(lambda x: x-min(x)+1)

# Create daily covid exposure
ceg = rawDS.groupby(['STFIPS'])['COVID exposure/1000 people']
for group_name, df_group in ceg:
    #print(group_name)
    grval = [i for i in df_group]
    prev = grval[0]
    
    td = []
    td.append(prev)

    for i in range(1,len(grval)):
        y = grval[i]-prev
        if y<0:
            y=0
        else:
            prev=grval[i]
        td.append(y)
    rawDS.loc[rawDS['STFIPS']==group_name,'DailyCovidExposure']=td
    print(min(rawDS.loc[rawDS['STFIPS']==group_name,'DailyCovidExposure']), max(rawDS.loc[rawDS['STFIPS']==group_name,'DailyCovidExposure']))

del(ceg)
del(df_group)
del(group_name)
del(grval)
del(td)
del(y)
del(i)
del(prev)

# Compute test positivity rate
rawDS['Positivity Rate'] = 0.0
st = rawDS['STNAME'].unique()
for sn in st:
    t=((rawDS['STNAME']==sn) & (rawDS['Tests done/1000 people']))>0
    rawDS.loc[t,'Positivity Rate']=100*(rawDS.loc[t,'New cases/1000 people']/rawDS.loc[t,'Tests done/1000 people'])
del(st)
del(t)
del(sn)

# Compute 7 days moving average with center
rawDS['SDI']=rawDS.groupby(['STFIPS','STNAME'])['Social distancing index'].transform(lambda x: x.rolling(7, center=True).mean())
rawDS['NewCases']=rawDS.groupby(['STFIPS','STNAME'])['New cases/1000 people'].transform(lambda x: x.rolling(7, center=True).mean())
rawDS['TestingDone']=rawDS.groupby(['STFIPS','STNAME'])['Tests done/1000 people'].transform(lambda x: x.rolling(7, center=True).mean())
rawDS['HospitalBedUtil']=rawDS.groupby(['STFIPS','STNAME'])['% hospital bed utilization'].transform(lambda x: x.rolling(7, center=True).mean())
rawDS['CovidExposure']=rawDS.groupby(['STFIPS','STNAME'])['DailyCovidExposure'].transform(lambda x: x.rolling(7, center=True).mean())
rawDS['PositivityRate']=rawDS.groupby(['STFIPS','STNAME'])['Positivity Rate'].transform(lambda x: x.rolling(7, center=True).mean())

covidDS=pd.DataFrame(rawDS.loc[:,['STFIPS', 'STNAME','DSCR','DSFC','SDI','NewCases','TestingDone','HospitalBedUtil','CovidExposure','PositivityRate']])
covidDS.dropna(inplace=True)
#covidDS.reset_index(inplace=True)
del(rawDS)

# Merge region and division information
stateDS = pd.DataFrame(pd.read_csv("../FromAnil/UNM/Read/state_code.csv"))
covidDS = pd.merge(covidDS, stateDS, on=['STFIPS','STNAME'])
del(stateDS)

# Sort data
covidDS.sort_values(by=['STNAME','DSFC'], ascending=[True,True], inplace=True)
covidDS.reset_index(inplace=True)

# save covid data
covidDS = pd.DataFrame(covidDS.loc[:,['STFIPS','STNAME','STNAMELONG','Region','Division','rColor','dColor','DSCR','DSFC','SDI','NewCases','TestingDone','HospitalBedUtil','CovidExposure','PositivityRate']], columns=['STFIPS','STNAME','STNAMELONG','Region','Division','rColor','dColor','DSCR','DSFC','SDI','NewCases','TestingDone','HospitalBedUtil','CovidExposure','PositivityRate'])

covidDS.index = covidDS.index+1
covidDS.to_csv("../FromAnil/UNM/Write/covid_state_MA.csv", sep=',', index_label="rID")
covidDS.head()

In [3]:
## Analysis on selected nodes
rawDS = pd.DataFrame(pd.read_csv("../FromAnil/UNM/Analysis/covid_state_7MA_2_br.csv"))
rawDS.head()
rawDS.drop(labels=['rID'], axis=1, inplace=True)
rawDS.head()

Unnamed: 0,STFIPS,STNAME,Region,Division,DSCR,DSFC,SDI,NewCases,TestingDone,HospitalBedUtil,CovidExposure,PositivityRate,ClusterID
0,2,AK,West,Pacific,174,101,26.571429,0.018714,123.577143,38.0,0.05,0.015182,73
1,2,AK,West,Pacific,175,102,26.428571,0.020729,127.094286,38.0,0.051429,0.016185,73
2,2,AK,West,Pacific,176,103,26.0,0.021829,130.71,38.0,0.051429,0.016508,73
3,2,AK,West,Pacific,177,104,26.0,0.023257,134.632857,38.0,0.051429,0.017026,73
4,2,AK,West,Pacific,177,104,26.0,0.023257,134.632857,38.0,0.051429,0.017026,75


In [7]:
# Create class frature
rawDS['class'] = 'B'
rawDS.loc[rawDS['ClusterID']==88, 'class']='C'
for i in [74,76,78,80,82,84,86,89,91,93]:
    rawDS.loc[rawDS['ClusterID']==i, 'class']='A'

In [11]:
# Decision tree based analysis


array(['B'], dtype=object)

In [None]:
# Plot for all states

st=10
sIDs = covidDS['STNAME'].unique()
print(sIDs)
scaler = preprocessing.MinMaxScaler()

x = covidDS.loc[covidDS['STNAME']=='LA']['DSFC']
y=pd.DataFrame(scaler.fit_transform(covidDS.loc[covidDS['STNAME']==sIDs[st],['SDI','NewCases','TestingDone','HospitalBedUtil','CovidExposure','PositivityRate']]), columns=['SDI','NewCases','TestingDone','HospitalBedUtil','CovidExposure','PositivityRate'])
plt.plot(x,y['SDI'],'red')
plt.plot(x,y['NewCases'],'green')
plt.plot(x,y['TestingDone'],'blue')
plt.plot(x,y['HospitalBedUtil'],'darkorange')
plt.plot(x,y['CovidExposure'],'darkcyan')
plt.plot(x,y['PositivityRate'],'purple')
plt.title('State code: '+sIDs[st])    

del(y)


In [None]:
print(np.std(covidDS['NewCases']), np.std(covidDS['PositivityRate']))