In [None]:
import os
import json
import requests
import csv
import numpy as np
import pandas as pd
import geopandas as gpd
from zipfile import ZipFile
from datetime import date, timedelta, datetime, timezone
from arcgis.gis import GIS
from arcgis.features import FeatureLayerCollection

In [None]:
print(str(datetime.now()) + " Running 1_COVID_Daily_Cases_Process")
yearDate = date.today().strftime('%m%d%y')
print("today: "+yearDate)
yesterday = date.today() - timedelta(days=1)
yearDate_yesterday = yesterday.strftime('%m%d%y')
print("yesterday: "+yearDate_yesterday)

### Download data

In [None]:
gis = GIS()
daily_covid_county_service_id = 'a7887f1940b34bf5a02c6f7f27a5cb2c'
covid_cases = gis.content.get(daily_covid_county_service_id)
covid_cases

In [None]:
query = covid_cases.layers[0].query(where="1=1")
df_covid_cases = query.sdf
df_covid_cases.head()

In [None]:
df_covid_cases_drop = df_covid_cases.drop(columns=['SHAPE'])
df_Case = df_covid_cases_drop.sort_values(by=['DEPCODE'])
df_Case.head()

In [None]:
covid_data_updated = covid_cases.layers[0].properties.editingInfo.lastEditDate
covid_update_time = datetime.fromtimestamp(covid_data_updated/1000).strftime('%m/%d/%y %H:%M ET')
print("COVID Cases by County data is updated at {}".format(covid_update_time))

In [None]:
# add covid data update time to excel file - df_Case "Florida_COVID19_Cases_by_County" sheet
df_Case['UpdateTime'] = covid_update_time
df_Case

In [None]:
covid_by_population_id = '09e0d01d8e8e46b6ae678c256d918178'
covid_popu = gis.content.get(covid_by_population_id)
covid_popu

In [None]:
query_popu = covid_popu.layers[0].query(where="1=1")
df_cases_popu = query_popu.sdf
df_cases_popu.head()

In [None]:
df_cases_popu_drop = df_cases_popu.drop(columns=['SHAPE'])
df_PP = df_cases_popu_drop.sort_values(by=['FID'])
df_PP.head()

In [None]:
pp_data_updated = covid_popu.layers[0].properties.editingInfo.lastEditDate
pp_update_time = datetime.fromtimestamp(pp_data_updated/1000).strftime('%m/%d/%y %H:%M ET')
print("COVID by popolation data is updated at {}".format(pp_update_time))

In [None]:
# add pp data update time to excel file - df_PP "Sheet1" sheet
df_PP['PPUpdateTime'] = pp_update_time
df_PP

In [None]:
print("Saving Florida_COVID19_Cases_by_County{}.xlsx in data/s".format(yearDate))
with pd.ExcelWriter('data/Florida_COVID19_Cases_by_County{}.xlsx'.format(yearDate)) as writer:  
    df_Case.to_excel(writer, sheet_name='Florida_COVID19_Cases_by_County')
    df_PP.to_excel(writer, sheet_name='Sheet1')

### Extract data, process data, generate shapefile

In [None]:
df_case = df_Case[['OBJECTID','COUNTYNAME','County_1','State','NewPos','NewTested','PUIsTotal','CasesAll','UpdateTime']].copy()
df_case.head()

In [None]:
a = set(df_PP['NAME'].values)
b = set(df_case['County_1'].values)
a.difference(b)

In [None]:
b.difference(a)

In [None]:
df_PP.loc[df_PP['NAME']=='A State','NAME']='State'

In [None]:
df_merge = pd.merge(df_PP, df_case, left_on=['NAME'],right_on= ['County_1'], how = 'outer')
df_merge.head(len(df_merge))

In [None]:
df_merge_rename =df_merge.rename(columns={"NAME": "County"})
df_merge_drop = df_merge_rename.drop(columns=['OBJECTID','FID_1', 'County_1','Shape__Area','Shape__Length'])
df_merge_drop.head(len(df_merge_drop))

In [None]:
df_merge_drop['P100K_Y'] = df_merge_drop['NewPos'] / df_merge_drop['Pop']*100000
df_merge_drop['P100K_7'] = df_merge_drop['Css7Dys'] / df_merge_drop['Pop']*100000
df_merge_drop['P100K_14'] = df_merge_drop['Css14Dy'] / df_merge_drop['Pop']*100000
df_merge_drop['T100K_Y'] = df_merge_drop['NewTested'] / df_merge_drop['Pop']*100000
df_merge_drop['PRate'] = df_merge_drop['NewPos'] / df_merge_drop['NewTested']
df_merge_drop['TestGoal'] = df_merge_drop['Css14Dy'] / 14 * 30

In [None]:
df_merge_drop_unknown = df_merge_drop.drop([67])
df_merge_drop_unknown

In [None]:
df_merge_drop_unknown['P100K_Y'] =df_merge_drop_unknown['P100K_Y'].astype(int)
df_merge_drop_unknown['P100K_7'] =df_merge_drop_unknown['P100K_7'].astype(int)
df_merge_drop_unknown['P100K_14'] =df_merge_drop_unknown['P100K_14'].astype(int)
df_merge_drop_unknown['T100K_Y'] =df_merge_drop_unknown['T100K_Y'].astype(int)
df_merge_drop_unknown['TestGoal'] =df_merge_drop_unknown['TestGoal'].astype(int)

In [None]:
df_merge_drop_unknown.head()

In [None]:
gdf_county_polygon = gpd.read_file('shp/Florida_Counties.shp')
gdf_county_polygon.head()

In [None]:
a = set(df_merge_drop_unknown['County'].values)
b = set(gdf_county_polygon['COUNTYNAME'].values)
a.difference(b)

In [None]:
b.difference(a)

In [None]:
df_merge_poly = pd.merge(df_merge_drop_unknown, gdf_county_polygon, left_on = ['County'],right_on= ['COUNTYNAME'], how = 'outer')
df_merge_poly.head()

In [None]:
df_merge_poly1 = df_merge_poly.drop(columns=['DEPCODE','OBJECTID','COUNTYNAME_y','COUNTY','DATESTAMP','ShapeSTAre','ShapeSTLen'])
df_merge_poly2 = df_merge_poly1.rename(columns={"COUNTYNAME_x": "COUNTYNAME"})
df_merge_poly2

In [None]:
result_folder = 'results/{}'.format(yearDate)
if not os.path.exists(result_folder):
    os.makedirs(result_folder)

In [None]:
shpfile_name = "covid_impact_poly"+ yearDate
print("Saving {}.shp in results/".format(shpfile_name))
gdf_merge_poly = gpd.GeoDataFrame(df_merge_poly2, geometry='geometry',crs="EPSG:4326")
gdf_merge_poly.to_file(os.path.join(result_folder,"{}.shp".format(shpfile_name)))

In [None]:
print("Saving {}.zip in publish/".format(shpfile_name))
with ZipFile(os.path.join('publish', '{}.zip'.format(shpfile_name)), 'w') as zipObj:
    for file in os.listdir(result_folder):
        if shpfile_name in file:
            file_path = os.path.join(result_folder, file)
            print(file)
            try:
                zipObj.write(file_path, os.path.basename(file_path))
            except OSError as err:
                print("OS error: {0}".format(err))   

### Use the shapefile to update Florida COVID 19 Health Metrics AGOL layer

In [None]:
# Check if the data is updated or not today
yesterday_covid_file = "data/Florida_COVID19_Cases_by_County{}.xlsx".format(yearDate_yesterday)
today_covid_file = "data/Florida_COVID19_Cases_by_County{}.xlsx".format(yearDate)
df_yester_covid = pd.read_excel(yesterday_covid_file, sheet_name = 'Florida_COVID19_Cases_by_County')
df_today_covid = pd.read_excel(today_covid_file, sheet_name = 'Florida_COVID19_Cases_by_County')
df_yester_newpos = df_yester_covid[['County_1','NewPos']]
df_today_newpos = df_today_covid[['County_1','NewPos']]

In [None]:
if df_yester_newpos.equals(df_today_newpos): 
    raise Exception("Today's data is not updated yet. Exit the script.")
else: 
    print ("Today's data is updated.") 

In [None]:
print("Updating Florida COVID 19 Health Metrics layer¶")
uflURL = "https://ufl.maps.arcgis.com/"
with open("env.json", "r") as f:
    tokenfile = json.load(f)

In [None]:
my_token = tokenfile['token']
AGOLConnection = GIS(token=my_token)
print(AGOLConnection.users.__dict__)

In [None]:
covid_daily_zip_file = "publish/covid_impact_poly{}.zip".format(yearDate)
covid_daily_content = AGOLConnection.content.get('2214f2f45aa24cd5bb4ff0175fcf8a2c')
covid_daily_content

In [None]:
covid_daily_collection = FeatureLayerCollection.fromitem(covid_daily_content)
response_covid = covid_daily_collection.manager.overwrite(covid_daily_zip_file)
print(response_covid)

In [None]:
print(str(datetime.now()) + " Done with 1_COVID_Daily_Cases_Process")