<a href="https://colab.research.google.com/github/yoshizow/covid19-analysis/blob/main/covid19_local_charts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
from io import StringIO

import numpy as np
import pandas as pd
import requests

In [13]:
TOKYO_CASES_URL = "https://stopcovid19.metro.tokyo.lg.jp/data/130001_tokyo_covid19_positive_cases_by_municipality.csv"
TOKYO_POPULATION_URL = "https://www.toukei.metro.tokyo.lg.jp/kurasi/2021/csv/ku21rv2310.csv"

# 市区町村別新規感染者数

In [14]:
resp = requests.get(TOKYO_CASES_URL)
csv_content = resp.text

In [15]:
pref_df = pd.read_csv(StringIO(csv_content), parse_dates=['公表_年月日'])
# 集計区分 = 都外, 調査中 となっているレコードは市区町村名が設定されていないので削除
pref_df.dropna(subset=['市区町村名'], inplace=True)
# ソート済みのようだが一応ソート
pref_df.sort_values(by=['公表_年月日'], inplace=True)
# 不要なカラム削除
pref_df = pref_df.loc[:, ['市区町村名', '公表_年月日', '陽性者数']]
pref_df

Unnamed: 0,市区町村名,公表_年月日,陽性者数
0,千代田区,2020-03-31,3
33,小平市,2020-03-31,1
34,日野市,2020-03-31,3
35,東村山市,2020-03-31,0
36,国分寺市,2020-03-31,0
...,...,...,...
32025,武蔵野市,2021-08-13,2215
32026,三鷹市,2021-08-13,2625
32027,青梅市,2021-08-13,1095
32013,中野区,2021-08-13,8973


In [16]:
# CSVのデータは累積なので、差分を取って日毎にする
cities = pref_df['市区町村名'].unique()
city_dfs = {}
for city in cities:
  city_df = pref_df[pref_df['市区町村名'] == city].copy()
  city_df['陽性者数'] = city_df['陽性者数'].diff()
  city_dfs[city] = city_df
pref_df = pd.concat(city_dfs.values())
pref_df

Unnamed: 0,市区町村名,公表_年月日,陽性者数
0,千代田区,2020-03-31,
64,千代田区,2020-04-01,0.0
128,千代田区,2020-04-02,1.0
192,千代田区,2020-04-03,0.0
256,千代田区,2020-04-04,1.0
...,...,...,...
31766,江戸川区,2021-08-09,123.0
31830,江戸川区,2021-08-10,83.0
31894,江戸川区,2021-08-11,181.0
31958,江戸川区,2021-08-12,327.0


In [17]:
import plotly.express as px

In [18]:
fig = px.line(pref_df, x='公表_年月日', y='陽性者数', color='市区町村名',
              height=600, width=800, title='東京都各市区町村の陽性者数')
fig.update_layout(xaxis_rangeslider_visible=True)
fig.update_xaxes(tickformat='%Y-%m-%d', tickangle=45)
fig.show()

# 市区町村別新規感染者数(人口比)

In [19]:
resp = requests.get(TOKYO_POPULATION_URL)
resp.encoding = 'SHIFT_JIS'
csv_content = resp.text

In [20]:
pop_df = pd.read_csv(StringIO(csv_content), skiprows=5)
pop_df = pop_df[pop_df['地域階層'] == 2]
pop_df = pop_df.rename(columns={'地域': '市区町村名', '人口／総数（人）': 'population'}).loc[:, ['市区町村名', 'population']]
pop_df

Unnamed: 0,市区町村名,population
5,千代田区,67165
6,中央区,170016
7,港区,258415
8,新宿区,346467
9,文京区,236296
...,...,...
62,三宅村,2228
63,御蔵島村,326
64,八丈町,6994
65,青ヶ島村,173


In [21]:
cases_pop_df = pd.merge(pref_df, pop_df, on='市区町村名')
cases_pop_df['陽性者数/人口1万'] = cases_pop_df['陽性者数'] / cases_pop_df['population'] * 100 * 10000
cases_pop_df

Unnamed: 0,市区町村名,公表_年月日,陽性者数,population,陽性者数/人口1万
0,千代田区,2020-03-31,,67165,
1,千代田区,2020-04-01,0.0,67165,0.000000
2,千代田区,2020-04-02,1.0,67165,14.888707
3,千代田区,2020-04-03,0.0,67165,0.000000
4,千代田区,2020-04-04,1.0,67165,14.888707
...,...,...,...,...,...
31057,江戸川区,2021-08-09,123.0,691841,177.786515
31058,江戸川区,2021-08-10,83.0,691841,119.969762
31059,江戸川区,2021-08-11,181.0,691841,261.620806
31060,江戸川区,2021-08-12,327.0,691841,472.651953


In [22]:
fig = px.line(cases_pop_df, x='公表_年月日', y='陽性者数/人口1万', color='市区町村名',
              height=600, width=800, title='東京都各市区町村の陽性者数(人口1万人当たり)')
fig.update_layout(xaxis_rangeslider_visible=True)
fig.update_xaxes(tickformat='%Y-%m-%d', tickangle=45)
fig.show()