# ETL_Project

In [None]:
import pandas as pd
from sqlalchemy import create_engine

### Store SARS csv files into dataframe

In [None]:
sars_file = "Resources/sars_2003_complete_dataset_clean.csv"
df_sars = pd.read_csv(sars_file)
df_sars.head()

### Transform SARS data

In [None]:
df_sars = df_sars[['Country', 'Cumulative number of case(s)', 'Number of deaths']].copy()
df_sars.columns = ['country', 'total_cases_sars', 'total_deaths_sars']
df_sars = df_sars.groupby('country').max().reset_index()
df_sars.head()

In [None]:
df_sars.head()

### Store nCoV csv files into dataframe

In [None]:
ncov_file = "Resources/2019_nCoV_data.csv"
df_ncov = pd.read_csv(ncov_file)
df_ncov.head()

### Transform SARS data

In [None]:
# Rename cities in China.
df_ncov = df_ncov.replace(to_replace='China', value='Mainland China', regex=False)
df_ncov = df_ncov.replace(to_replace='Hong Kong', value='Hong Kong SAR, China', regex=False)
df_ncov = df_ncov.replace(to_replace='Macau', value='Macau SAR, China', regex=False)
df_ncov = df_ncov.replace(to_replace='Taiwan', value='Taiwan, China', regex=False)

In [None]:
df_ncov[['Confirmed', 'Deaths']] = df_ncov[['Confirmed', 'Deaths']].astype(int)
df_ncov = df_ncov.groupby('Country')['Confirmed', 'Deaths'].max().reset_index()
df_ncov.columns = ['country', 'total_cases_ncov', 'total_deaths_ncov']

In [None]:
df_ncov.head()

### Connect to local database

In [None]:
rds_connection_string = "postgres:postgres@localhost:5432/virus_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [None]:
engine.table_names()

### Load

In [None]:
df_sars.to_sql(name='sars', con=engine, if_exists='append', index=False)
df_ncov.to_sql(name='ncov', con=engine, if_exists='append', index=False)