# Employment dataset & COVID-19 data

## Calculate COVID-19 data trends for last week of the data (Florida)

### Lets start by importing the COVID-19 Cases .csv file

In [1]:
import pandas as pd

cases = pd.read_csv('../../Team/covid_confirmed_usafacts.csv')
cases.head()

Unnamed: 0,countyFIPS,County Name,State,StateFIPS,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,...,2023-07-14,2023-07-15,2023-07-16,2023-07-17,2023-07-18,2023-07-19,2023-07-20,2023-07-21,2023-07-22,2023-07-23
0,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1001,Autauga County,AL,1,0,0,0,0,0,0,...,19913,19913,19913,19913,19913,19913,19913,19913,19913,19913
2,1003,Baldwin County,AL,1,0,0,0,0,0,0,...,70521,70521,70521,70521,70521,70521,70521,70521,70521,70521
3,1005,Barbour County,AL,1,0,0,0,0,0,0,...,7582,7582,7582,7582,7582,7582,7582,7582,7582,7582
4,1007,Bibb County,AL,1,0,0,0,0,0,0,...,8149,8149,8149,8149,8149,8149,8149,8149,8149,8149


### Since we are only interested in the data trend for Florida, we will create a new dataframe for only Florida data and find the Trend on that dataframe

In [2]:
florida_df = cases[cases['State'] == 'FL']
date_columns = florida_df.columns[-7:]
trends_df = florida_df[date_columns].diff(axis=1)

overall_trend = trends_df.sum().sum()

if overall_trend > 0:
    print("Cases are increasing.")
elif overall_trend < 0:
    print("Cases are decreasing.")
else:
    print("Cases are stable, no significant change.")

Cases are stable, no significant change.


## Now, we will read the Employment data and display it

In [3]:
employment_df = pd.read_excel('allhlcn231.xlsx')
employment_df.head()

Unnamed: 0,Area\nCode,St,Cnty,Own,NAICS,Year,Qtr,Area Type,St Name,Area,...,Industry,Status Code,Establishment Count,January Employment,February Employment,March Employment,Total Quarterly Wages,Average Weekly Wage,Employment Location Quotient Relative to U.S.,Total Wage Location Quotient Relative to U.S.
0,US000,US,0.0,0,10,2023,1,Nation,,U.S. TOTAL,...,"10 Total, all industries",,11883196,150176434,150957408,151418052,2873835484925,1465,1.0,1.0
1,US000,US,0.0,1,10,2023,1,Nation,,U.S. TOTAL,...,"10 Total, all industries",,60862,2864978,2878828,2883282,69903511841,1870,1.0,1.0
2,US000,US,0.0,2,10,2023,1,Nation,,U.S. TOTAL,...,"10 Total, all industries",,71582,4524712,4616278,4635444,87116336812,1459,1.0,1.0
3,US000,US,0.0,3,10,2023,1,Nation,,U.S. TOTAL,...,"10 Total, all industries",,171871,14247053,14368698,14432079,222765651053,1194,1.0,1.0
4,US000,US,0.0,5,10,2023,1,Nation,,U.S. TOTAL,...,"10 Total, all industries",,11578881,128539691,129093604,129467247,2494049985219,1487,1.0,1.0


## Perform COVID-19 data merges with the Employment data

### Merge Cases, Deaths, and County population to make a Super COVID dataset

In [4]:
cases_df = pd.read_csv("../../Team/covid_confirmed_usafacts.csv")
deaths_df = pd.read_csv("../../Team/covid_deaths_usafacts.csv")
population_df = pd.read_csv("../../Team/covid_county_population_usafacts.csv")

cases_deaths = pd.merge(cases_df, deaths_df, on='countyFIPS', suffixes=('_cases', '_deaths'))
covid_df = pd.merge(cases_deaths, population_df, on='countyFIPS')
covid_df.head()

Unnamed: 0,countyFIPS,County Name_cases,State_cases,StateFIPS_cases,2020-01-22_cases,2020-01-23_cases,2020-01-24_cases,2020-01-25_cases,2020-01-26_cases,2020-01-27_cases,...,2023-07-17_deaths,2023-07-18_deaths,2023-07-19_deaths,2023-07-20_deaths,2023-07-21_deaths,2023-07-22_deaths,2023-07-23_deaths,County Name,State,population
0,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,Statewide Unallocated,AL,0
1,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,Statewide Unallocated,AK,0
2,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,Statewide Unallocated,AZ,0
3,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,Statewide Unallocated,AR,0
4,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,Statewide Unallocated,CA,0


### Merge the Super COVID dataset with the Employment dataset

In [7]:
# First, we will extract the county name from the "Area" column which is in the format of "County, State"
employment_df['County Name'] = employment_df['Area'].str.extract(r'^(.*?),')
# A new "County Name" column is created in the Employment dataset

# Now, we can merge Super COVID19 dataset with the Employment dataset based on the "County Name" column
covid_employment = employment_df.merge(covid_df, on='County Name', how='inner')

# Finally, drop the created "County Name" column so that we are left with the original data
covid_employment = covid_employment.drop(columns='County Name')
covid_employment.head()

Unnamed: 0,Area\nCode,St,Cnty,Own,NAICS,Year,Qtr,Area Type,St Name,Area,...,2023-07-16_deaths,2023-07-17_deaths,2023-07-18_deaths,2023-07-19_deaths,2023-07-20_deaths,2023-07-21_deaths,2023-07-22_deaths,2023-07-23_deaths,State,population
0,1001,1,1.0,0,10,2023,1,County,Alabama,"Autauga County, Alabama",...,235,235,235,235,235,235,235,235,AL,55869
1,1001,1,1.0,1,10,2023,1,County,Alabama,"Autauga County, Alabama",...,235,235,235,235,235,235,235,235,AL,55869
2,1001,1,1.0,2,10,2023,1,County,Alabama,"Autauga County, Alabama",...,235,235,235,235,235,235,235,235,AL,55869
3,1001,1,1.0,3,10,2023,1,County,Alabama,"Autauga County, Alabama",...,235,235,235,235,235,235,235,235,AL,55869
4,1001,1,1.0,5,10,2023,1,County,Alabama,"Autauga County, Alabama",...,235,235,235,235,235,235,235,235,AL,55869
