In [154]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# https://www.mhlw.go.jp/stf/covid-19/open-data.html PCRの検査人数

download_file = 'pcr_tested_daily.csv'

# PCRの検査人数ファイルをダウンロード
if not os.path.isfile(download_file):
    df = pd.read_csv('https://www.mhlw.go.jp/content/pcr_tested_daily.csv',
                     encoding='UTF-8', index_col=False)
else:
    df = pd.read_csv(download_file, encoding='UTF-8',
                     index_col=False)

# 検査人数のデータフレームをCSV化
df.to_csv(download_file,index=False)
df_copy = df.rename(
    columns={'日付': 'date', 'PCR 検査実施件数(単日)': 'num'}).fillna(0.0)


In [155]:
# 先頭から表示
df_copy.head(5)


Unnamed: 0,date,num
0,2020/2/5,4.0
1,2020/2/6,19.0
2,2020/2/7,9.0
3,2020/2/8,4.0
4,2020/2/9,10.0


In [156]:
# 末尾から表示
df_copy.tail(5)


Unnamed: 0,date,num
547,2021/8/5,114844.0
548,2021/8/6,112653.0
549,2021/8/7,71501.0
550,2021/8/8,41841.0
551,2021/8/9,28421.0


In [157]:
# ランダムに表示
df_copy.sample(5)


Unnamed: 0,date,num
298,2020/11/29,11975.0
406,2021/3/17,84919.0
316,2020/12/17,62439.0
85,2020/4/30,8126.0
428,2021/4/8,70959.0


In [158]:
# データフレームをカウント
df_copy.count()

date    552
num     552
dtype: int64

In [159]:
# データフレームの並び替え 昇順
df_copy.sort_values(by='num')


Unnamed: 0,date,num
56,2020/4/1,0.0
48,2020/3/24,0.0
42,2020/3/18,0.0
6,2020/2/11,4.0
0,2020/2/5,4.0
...,...,...
478,2021/5/28,118523.0
463,2021/5/13,122502.0
460,2021/5/10,130677.0
469,2021/5/19,149234.0


In [160]:
# データフレームの並び替え 降順
df_copy.sort_values(by='num', ascending=False)


Unnamed: 0,date,num
484,2021/6/3,176379.0
469,2021/5/19,149234.0
460,2021/5/10,130677.0
463,2021/5/13,122502.0
478,2021/5/28,118523.0
...,...,...
3,2020/2/8,4.0
0,2020/2/5,4.0
42,2020/3/18,0.0
48,2020/3/24,0.0


In [161]:
# データの要約
df_copy.describe()


Unnamed: 0,num
count,552.0
mean,33063.541667
std,31805.330219
min,0.0
25%,5139.5
50%,21904.5
75%,58550.75
max,176379.0


In [162]:
# 日付型に変換
pd.to_datetime(df_copy['date'].head(5))


0   2020-02-05
1   2020-02-06
2   2020-02-07
3   2020-02-08
4   2020-02-09
Name: date, dtype: datetime64[ns]

In [163]:
# 検査数が10万人以上の日付を出す
df_copy[df_copy["num"] >= 100000]


Unnamed: 0,date,num
237,2020/9/29,103676.0
344,2021/1/14,101255.0
349,2021/1/19,103313.0
352,2021/1/22,101273.0
457,2021/5/7,109758.0
460,2021/5/10,130677.0
463,2021/5/13,122502.0
469,2021/5/19,149234.0
471,2021/5/21,117145.0
477,2021/5/27,116584.0


In [164]:
# 検査数が0人の日付を出す
df_copy[df_copy["num"] == 0.0]


Unnamed: 0,date,num
42,2020/3/18,0.0
48,2020/3/24,0.0
56,2020/4/1,0.0
