<a href="https://colab.research.google.com/github/z-guard/analysis/blob/main/notebooks/traffic_accident.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 交通事故
-[交通事故統計情報のオープンデータ](https://www.npa.go.jp/publications/statistics/koutsuu/opendata/index_opendata.html)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [70]:
import os
import requests
import json
import pandas as pd
from urllib.parse import urlparse, quote
import io
import numpy as np
import time

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [3]:
DIR_NAME = '/content/drive/MyDrive/z-gard/data'

In [4]:
def get_data_csv_url(url, encoding='utf-8', excel=False):
    res = requests.get(url)
    if res.status_code == 200:
        if not excel:
            return pd.read_csv(io.BytesIO(res.content), encoding=encoding)
        else:
            return pd.read_excel(io.BytesIO(res.content))
    else:
        raise Exception(f'status_code: {res.status_code} ({url})')

In [6]:
accident_url = 'https://www.npa.go.jp/publications/statistics/koutsuu/opendata/2021/honhyo_2021.csv'

df_accident = get_data_csv_url(accident_url, encoding='cp932')
print(df_accident.shape)

(305196, 58)


In [73]:
df_accident_tokyo = df_accident[
    (df_accident['都道府県コード'] == 30) & # 東京都
    (df_accident['市区町村コード'] <= 123)   # 23区内
][[
    '事故内容',
    '死者数',
    '負傷者数',
    '事故類型',
    '地点\u3000緯度（北緯）',
    '地点\u3000経度（東経）',
]].rename(columns={
    '地点\u3000緯度（北緯）': '世界測地系_緯度',
    '地点\u3000経度（東経）': '世界測地系_経度',
}).reset_index(drop=True)
print(df_accident_tokyo.shape)

(19201, 6)


In [74]:
df_accident_tokyo['lat'] = df_accident_tokyo['世界測地系_緯度'] / 10000000
df_accident_tokyo['lon'] = df_accident_tokyo['世界測地系_経度'] / 10000000
df_accident_tokyo['内容'] = df_accident_tokyo['事故内容'].map({
    1: '死亡',
    2: '負傷',
})
df_accident_tokyo['種別'] = df_accident_tokyo['事故類型'].map({
    1: '人対車両',
    21: '車両相互',
    41: '車両単独',
    61: '列車',
})

In [75]:
df_accident_tokyo['内容'].value_counts()

負傷    19111
死亡       90
Name: 内容, dtype: int64

In [76]:
df_accident_tokyo['種別'].value_counts()

車両相互    13555
人対車両     3243
車両単独     2396
列車          7
Name: 種別, dtype: int64

In [79]:
df_accident_tokyo[['死者数', '負傷者数']].sum()

死者数        91
負傷者数    21409
dtype: int64

### 測量計算サイト
- [世界測地系座標変換「TKY2JGD」](https://vldb.gsi.go.jp/sokuchi/surveycalc/api_help.html)
- リクエストは10s間に3回まで

In [88]:
TKY2JGD_URL = 'http://vldb.gsi.go.jp/sokuchi/surveycalc/tky2jgd/tky2jgd.pl?outputType=json&sokuti=2&Place=1&'

In [89]:
df_accident_extract = df_accident_tokyo.iloc[0:10000]

In [90]:
start_time = time.time()
output = []
for index, (lat, lon) in enumerate(zip(df_accident_extract['lat'].values, df_accident_extract['lon'].values)):
    if (index % 100) == 0:
        print(f'{index}: {int(time.time() - start_time)}s')

    url = f'{TKY2JGD_URL}latitude={lat}&longitude={lon}'
    try:
        res = requests.get(url)
        if res.status_code == 200:
            tmp = json.loads(res.content)['OutputData']
            tmp['index'] = index
            output.append(tmp)
        else:
            print(f'{index}: status_code: {res.status_code}')
    except Exception as e:
        print(f'{index}: {e}')
    time.sleep(2)

df_output = pd.DataFrame(output)
print(df_output.shape)

0: 0s
100: 561s
200: 1121s
300: 1681s
400: 2241s
500: 2801s
600: 3362s
700: 3922s
800: 4482s
900: 5043s
1000: 5604s
1100: 6164s
1200: 6724s
1300: 7285s
1400: 7845s
1500: 8405s
1600: 8965s
1700: 9526s
1800: 10087s
1900: 10649s
2000: 11209s
2100: 11775s
2200: 12338s
2300: 12901s
2400: 13464s
2500: 14025s
2600: 14586s
2700: 15146s
2800: 15706s
2900: 16267s
3000: 16827s
3100: 17388s
3200: 17948s
3300: 18509s
3400: 19070s
3500: 19632s
3600: 20194s
3700: 20755s
3800: 21318s
3900: 21883s
4000: 22448s
4100: 23011s
4200: 23574s
4300: 24136s
4400: 24699s
4500: 25263s
4600: 25828s
4700: 26393s
4800: 26961s
4900: 27527s
5000: 28093s
5100: 28657s
5200: 29223s
5300: 29786s
5400: 30350s
5500: 30916s
5600: 31480s
5700: 32045s
5800: 32612s
5900: 33180s
6000: 33745s
6100: 34313s
6200: 34879s
6300: 35442s
6400: 36007s
6500: 36572s
6600: 37137s
6700: 37703s
6800: 38267s
6900: 38830s
7000: 39392s
7100: 39955s
7200: 40517s
7300: 41080s
7400: 41643s
7500: 42209s
7600: 42775s
7700: 43341s
7800: 43907s
7900: 4

In [93]:
df_output[['index', 'latitude', 'longitude']].to_csv(os.path.join(DIR_NAME, 'traffic_accident_1.csv'), index=False)