In [5]:
import numpy as np
import pandas as pd

### Database Setup

To download the data you can uncomment the following code below:

In [6]:
# !gdown 1NOhnBmfMpJqv3kJ-z9klC0JPmAc2g-6K

In [7]:
%reload_ext sql

In [8]:
!psql postgresql://localhost/postgres -c 'DROP DATABASE IF EXISTS db'
!psql postgresql://localhost/postgres -c 'CREATE DATABASE db'

DROP DATABASE
CREATE DATABASE


In [9]:
df = pd.read_csv('Covid-19_Ver2.csv', low_memory=False) # remove low_memory=False if its lagging
df.head()

Unnamed: 0,cdc_report_dt,pos_spec_dt,onset_dt,current_status,sex,age_group,Race and ethnicity (combined),hosp_yn,icu_yn,death_yn,...,cdc_report_dtIs_year_end,cdc_report_dtIs_year_start,cdc_report_dtElapsed,pos_spec_dt_was_missing,onset_dt_was_missing,sex_was_missing,age_group_was_missing,Race and ethnicity (combined)_was_missing,pos_difference,onset_difference
0,2020-11-10,2020-11-10,,Laboratory-confirmed case,Male,10 - 19 Years,"Black, Non-Hispanic",No,Unknown,No,...,False,False,1604966400,False,True,False,False,False,0.0,
1,2020-11-14,2020-11-10,2020-11-10,Laboratory-confirmed case,Male,10 - 19 Years,"Black, Non-Hispanic",No,No,No,...,False,False,1605312000,False,False,False,False,False,4.0,4.0
2,2020-11-19,2020-11-10,2020-11-09,Laboratory-confirmed case,Male,10 - 19 Years,"Black, Non-Hispanic",No,No,No,...,False,False,1605744000,False,False,False,False,False,9.0,10.0
3,2020-11-14,2020-11-10,,Laboratory-confirmed case,Male,10 - 19 Years,"Black, Non-Hispanic",Missing,Missing,No,...,False,False,1605312000,False,True,False,False,False,4.0,
4,2020-11-13,2020-11-10,2020-11-10,Laboratory-confirmed case,Male,10 - 19 Years,"Black, Non-Hispanic",No,No,No,...,False,False,1605225600,False,False,False,False,False,3.0,3.0


In [10]:
df_sample = df.sample(n=1000000, random_state=42)

In [11]:
from sqlalchemy import create_engine

engine = create_engine("postgresql://localhost/db")
df_sample.to_sql(name="covid", con=engine)
engine.dispose()

In [12]:
%sql postgresql://localhost/db

In [13]:
%sqlcmd tables

Name
covid


In [14]:
%%sql
SELECT *
FROM covid
LIMIT 10;

index,cdc_report_dt,pos_spec_dt,onset_dt,current_status,sex,age_group,Race and ethnicity (combined),hosp_yn,icu_yn,death_yn,medcond_yn,cdc_report_dtYear,cdc_report_dtMonth,cdc_report_dtWeek,cdc_report_dtDay,cdc_report_dtDayofweek,cdc_report_dtDayofyear,cdc_report_dtIs_month_end,cdc_report_dtIs_month_start,cdc_report_dtIs_quarter_end,cdc_report_dtIs_quarter_start,cdc_report_dtIs_year_end,cdc_report_dtIs_year_start,cdc_report_dtElapsed,pos_spec_dt_was_missing,onset_dt_was_missing,sex_was_missing,age_group_was_missing,Race and ethnicity (combined)_was_missing,pos_difference,onset_difference
6547292,2020-10-21,,,Laboratory-confirmed case,Male,50 - 59 Years,"White, Non-Hispanic",Missing,Missing,Missing,Missing,2020,10,43,21,2,295,False,False,False,False,False,False,1603238400,True,True,False,False,False,,
3820813,2020-10-23,,2020-10-23,Laboratory-confirmed case,Female,60 - 69 Years,"White, Non-Hispanic",No,No,No,Yes,2020,10,43,23,4,297,False,False,False,False,False,False,1603411200,True,False,False,False,False,,0.0
822866,2020-10-25,,2020-10-25,Laboratory-confirmed case,Male,20 - 29 Years,Unknown,No,Missing,No,Missing,2020,10,43,25,6,299,False,False,False,False,False,False,1603584000,True,False,False,False,False,,0.0
879510,2020-07-08,,2020-06-28,Laboratory-confirmed case,Male,20 - 29 Years,Unknown,Missing,Missing,Missing,Missing,2020,7,28,8,2,190,False,False,False,False,False,False,1594166400,True,False,False,False,False,,10.0
1257160,2020-07-18,2020-07-20,2020-07-18,Laboratory-confirmed case,Female,20 - 29 Years,"White, Non-Hispanic",No,No,No,Yes,2020,7,29,18,5,200,False,False,False,False,False,False,1595030400,False,False,False,False,False,-2.0,0.0
7870120,2020-08-19,,,Laboratory-confirmed case,Male,30 - 39 Years,Missing,Missing,Missing,Missing,Missing,2020,8,34,19,2,232,False,False,False,False,False,False,1597795200,True,True,False,False,False,,
8379211,2020-09-04,,2020-08-24,Laboratory-confirmed case,Female,30 - 39 Years,Hispanic/Latino,Missing,Missing,Missing,Missing,2020,9,36,4,4,248,False,False,False,False,False,False,1599177600,True,False,False,False,False,,11.0
6283188,2020-11-15,,,Laboratory-confirmed case,Male,50 - 59 Years,Unknown,Missing,Missing,Missing,Missing,2020,11,46,15,6,320,False,False,False,False,False,False,1605398400,True,True,False,False,False,,
7556282,2020-10-11,,,Probable Case,Male,30 - 39 Years,Hispanic/Latino,Yes,Unknown,No,Missing,2020,10,41,11,6,285,False,False,False,False,False,False,1602374400,True,True,False,False,False,,
2235544,2020-04-27,2020-04-27,,Probable Case,Female,40 - 49 Years,Missing,Missing,Missing,Missing,Missing,2020,4,18,27,0,118,False,False,False,False,False,False,1587945600,False,True,False,False,False,0.0,


In [16]:
%config SqlMagic.displaylimit = None

In [17]:
%%sql
SELECT
  age_group,
  COUNT(*) AS male_lab_confirmed_cases
FROM covid
WHERE
  sex = 'Male'
  AND current_status = 'Laboratory-confirmed case'
GROUP BY age_group
ORDER BY age_group;


age_group,male_lab_confirmed_cases
0 - 9 Years,16792
10 - 19 Years,43973
20 - 29 Years,86051
30 - 39 Years,74621
40 - 49 Years,67883
50 - 59 Years,66010
60 - 69 Years,47671
70 - 79 Years,26230
80+ Years,15837
Unknown,1056
