In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import matplotlib.colors as mcolors
import pandas as pd 
import random
import math
import time
plt.style.use('fivethirtyeight')
%matplotlib inline

# 문제 1

In [None]:

confirmed_df = pd.read_csv('/content/drive/MyDrive/zerobase/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv('/content/drive/MyDrive/zerobase/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
recoveries_df = pd.read_csv('/content/drive/MyDrive/zerobase/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
latest_data = pd.read_csv('/content/drive/MyDrive/zerobase/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/10-12-2021.csv')
us_medical_data = pd.read_csv('/content/drive/MyDrive/zerobase/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports_us/10-12-2021.csv')
apple_mobility = pd.read_csv('/content/applemobilitytrends-2021-10-11.csv')

In [None]:
confirmed_df["Country/Region"].unique()

# 문제 2

In [None]:
total_data = confirmed_df[confirmed_df["Country/Region"].str.contains("Korea, South|US|Japan|United Kingdom|France")]
total_data["Country/Region"].unique()

In [None]:
total_data.head()

In [None]:
cols = confirmed_df.keys()
cols

In [None]:
confirmed = confirmed_df.loc[:, cols[4]:cols[-1]]
deaths = deaths_df.loc[:, cols[4]:cols[-1]]
recoveries = recoveries_df.loc[:, cols[4]:cols[-1]]

In [None]:
dates = confirmed.keys()
total_cases = []
total_deaths = [] 
total_recovered = [] 
total_active = [] 
mortality_rate = []
recovery_rate = []

for i in dates:
    confirmed_sum = confirmed[i].sum()
    death_sum = deaths[i].sum()
    recovered_sum = recoveries[i].sum()

     # confirmed, deaths, recovered, and active
    total_cases.append(confirmed_sum)
    total_deaths.append(death_sum)
    total_recovered.append(recovered_sum)
    total_active.append(confirmed_sum-death_sum-recovered_sum)
    
    # calculate rates
    mortality_rate.append(death_sum/confirmed_sum)
    recovery_rate.append(recovered_sum/confirmed_sum)
    

In [None]:
total_cases = np.array(total_cases).reshape(-1, 1)
total_deaths = np.array(total_deaths).reshape(-1, 1)
total_recovered = np.array(total_recovered).reshape(-1, 1)

In [None]:
# Getting daily increases 

In [None]:
def daily_increase(data):
    d = [] 
    for i in range(len(data)):
        if i == 0:
            d.append(data[0])
        else:
            d.append(data[i]-data[i-1])
    return d 

In [None]:
# confirmed cases
world_daily_increase = daily_increase(total_cases)
# deaths
world_daily_death = daily_increase(total_deaths)
# recoveries
world_daily_recovery = daily_increase(total_recovered)



In [None]:
plt.figure(figsize=(16, 10))
plt.plot(dates, total_cases)
#plt.plot(dates, world_confirmed_avg, linestyle='dashed', color='orange')
plt.title('# of Coronavirus Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['5 Country Coronavirus Cases'], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 10))
plt.bar(dates, world_daily_increase)
#plt.plot(dates, world_daily_increase_avg, color='orange', linestyle='dashed')
plt.title('World Daily Increases in Confirmed Cases', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
#plt.legend(['Moving Average {} Days'.format(window), 'World Daily Increase in COVID-19 Cases'], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()


# 문제 3

In [None]:
def country_plot(x, y1, y2, y3,country):
    # window is set as 14 in in the beginning of the notebook 
    window = 14
    plt.figure(figsize=(16, 10))
    plt.plot(x, y1)
    plt.legend(['{} Confirmed Cases'.format(country)], prop={'size': 20})
    plt.title('{} Confirmed Cases'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

    plt.figure(figsize=(16, 10))
    plt.bar(x, y2)
    plt.legend(['{} Daily Increase in Confirmed Cases'.format(country)], prop={'size': 20})
    plt.title('{} Daily Increases in Confirmed Cases'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

    plt.figure(figsize=(16, 10))
    plt.bar(x, y3)
    plt.legend(['{} Daily Increase in Confirmed Deaths'.format(country)], prop={'size': 20})
    plt.title('{} Daily Increases in Deaths'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

 
def get_country_info(country_name):
    country_cases = []
    country_deaths = []
    
    for i in dates:
        country_cases.append(confirmed_df[confirmed_df['Country/Region']==country_name][i].sum())
        country_deaths.append(deaths_df[deaths_df['Country/Region']==country_name][i].sum())
    return (country_cases, country_deaths)

def country_visualizations(country_name):
    country_info = get_country_info(country_name)
    country_cases = country_info[0]
    country_deaths = country_info[1]
    
    country_daily_increase = daily_increase(country_cases)
    country_daily_death = daily_increase(country_deaths)
    
    country_plot(dates, country_cases, country_daily_increase, country_daily_death, country_name)

In [None]:
countries = ['US','Korea, South','Japan','United Kingdom','France'] 
for country in countries:
    country_visualizations(country)

# 문제 4

In [None]:
def country_plot(x, y1, y2, y3,country, num):
    # window is set as 14 in in the beginning of the notebook 
    window = 14
    plt.figure(figsize=(16, 10))
    plt.plot(x, y1)
    plt.legend(['{} Confirmed Cases'.format(country)], prop={'size': 20})
    plt.title('{} Confirmed Cases'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

    plt.figure(figsize=(16, 10))
    plt.bar(x, y2)
    plt.legend(['{} Daily Increase in Confirmed Cases'.format(country)], prop={'size': 20})
    plt.title('{} Daily Increases in Confirmed Cases'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

    plt.figure(figsize=(16, 10))
    plt.bar(x, y3)
    plt.legend(['{} Daily Increase in Confirmed Deaths'.format(country)], prop={'size': 20})
    plt.title('{} Daily Increases in Deaths'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

 
def get_country_info(country_name, country_num):
    country_cases = []
    country_deaths = []
    
    for i in dates:
        country_cases.append(confirmed_df[confirmed_df['Country/Region']==country_name][i].sum() / country_num)
        country_deaths.append(deaths_df[deaths_df['Country/Region']==country_name][i].sum() / country_num)
    return (country_cases, country_deaths)

def country_visualizations(country_name, country_num):
    country_info = get_country_info(country_name,country_num)
    country_cases = country_info[0]
    country_deaths = country_info[1]
    
    country_daily_increase = daily_increase(country_cases)
    country_daily_death = daily_increase(country_deaths)
    
    country_plot(dates, country_cases, country_daily_increase, country_daily_death, country_name, country_num)

In [None]:
countries = {"US":329500000,'Korea, South':51780000,'Japan':125800000,'United Kingdom':67220000,'France':67390000}
for country, num in countries.items():
    country_visualizations(country, num)

# 문제 5

In [None]:
#print(type(world_daily_increase))
for i in range(len(world_daily_increase)):
  if world_daily_increase[i]>1400000:
    print(i)
print(len(world_daily_increase))

In [None]:
total_data.columns #총 634개 중에서 앞의 4행 빼고 계산한 것이므로 323번째 해당하는 날짜는 컬럼 327번째 있다.

In [None]:
total_data.columns[327]

#### 1) 12/10/20에 5개 총 나라의 일일증가율의 합이 가장 높았다.

#### 2) daily_increase 비율을 보면 일본을 제외한 나머지 국가의 그래프 추세는 비슷하다

# 문제 6

#### - France는 Daily increase와 confirmed된 수가 줄어들고 있다. 하지만 나머지 네개의 나라는 daily increase도 다시 감소추세에 있다.
#### - 한국 역시 다른 나라처럼 daily increase 비율을 봤을 때 감소하는 양상이고 사망률도 감소하는 추세이다.