<a href="https://colab.research.google.com/github/z-guard/analysis/blob/main/notebooks/traffic_accident.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 交通事故
-[交通事故統計情報のオープンデータ](https://www.npa.go.jp/publications/statistics/koutsuu/opendata/index_opendata.html)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import requests
import json
import pandas as pd
from urllib.parse import urlparse, quote
import io
import numpy as np
import time

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [3]:
DIR_NAME = '/content/drive/MyDrive/z-gard/data'

In [4]:
def get_data_csv_url(url, encoding='utf-8', excel=False):
    res = requests.get(url)
    if res.status_code == 200:
        if not excel:
            return pd.read_csv(io.BytesIO(res.content), encoding=encoding)
        else:
            return pd.read_excel(io.BytesIO(res.content))
    else:
        raise Exception(f'status_code: {res.status_code} ({url})')

In [5]:
accident_url = 'https://www.npa.go.jp/publications/statistics/koutsuu/opendata/2021/honhyo_2021.csv'

df_accident = get_data_csv_url(accident_url, encoding='cp932')
print(df_accident.shape)

(305196, 58)


In [6]:
df_accident_tokyo = df_accident[
    (df_accident['都道府県コード'] == 30) & # 東京都
    (df_accident['市区町村コード'] <= 123)   # 23区内
][[
    '事故内容',
    '死者数',
    '負傷者数',
    '事故類型',
    '地点\u3000緯度（北緯）',
    '地点\u3000経度（東経）',
]].rename(columns={
    '地点\u3000緯度（北緯）': '緯度(度分秒)',
    '地点\u3000経度（東経）': '経度(度分秒)',
}).reset_index(drop=True)
print(df_accident_tokyo.shape)

(19201, 6)


In [7]:
df_accident_tokyo['内容'] = df_accident_tokyo['事故内容'].map({
    1: '死亡',
    2: '負傷',
})
df_accident_tokyo['種別'] = df_accident_tokyo['事故類型'].map({
    1: '人対車両',
    21: '車両相互',
    41: '車両単独',
    61: '列車',
})
df_accident_tokyo['緯度(度分秒)'] = df_accident_tokyo['緯度(度分秒)'].astype(str)
df_accident_tokyo['経度(度分秒)'] = df_accident_tokyo['経度(度分秒)'].astype(str)

In [8]:
df_accident_tokyo['内容'].value_counts()

負傷    19111
死亡       90
Name: 内容, dtype: int64

In [9]:
df_accident_tokyo['種別'].value_counts()

車両相互    13555
人対車両     3243
車両単独     2396
列車          7
Name: 種別, dtype: int64

In [10]:
df_accident_tokyo[['死者数', '負傷者数']].sum()

死者数        91
負傷者数    21409
dtype: int64

In [11]:
df_accident_tokyo.head()

Unnamed: 0,事故内容,死者数,負傷者数,事故類型,緯度(度分秒),経度(度分秒),内容,種別
0,2,0,1,21,354048614,1394655772,負傷,車両相互
1,2,0,1,21,353928232,1394207917,負傷,車両相互
2,2,0,1,21,353500951,1394519866,負傷,車両相互
3,2,0,2,21,353305748,1394443580,負傷,車両相互
4,2,0,2,21,354506518,1394235399,負傷,車両相互


### 度分秒から十進度への変換

In [12]:
def lat_deg_to_dec(deg):
    d = int(deg[:2])
    m = int(deg[2:4])
    s = int(deg[4:]) / 1000
    return (d + m/60 + s/3600)

def lon_deg_to_dec(deg):
    d = int(deg[:3])
    m = int(deg[3:5])
    s = int(deg[5:]) / 1000
    return (d + m/60 + s/3600)

In [13]:
df_accident_tokyo['緯度'] = df_accident_tokyo['緯度(度分秒)'].apply(lat_deg_to_dec)
df_accident_tokyo['経度'] = df_accident_tokyo['経度(度分秒)'].apply(lon_deg_to_dec)

In [14]:
df_accident_master = df_accident_tokyo[['内容', '種別', '死者数', '負傷者数', '緯度', '経度']]
df_accident_master.head()

Unnamed: 0,内容,種別,死者数,負傷者数,緯度,経度
0,負傷,車両相互,0,1,35.680171,139.782159
1,負傷,車両相互,0,1,35.657842,139.702199
2,負傷,車両相互,0,1,35.583598,139.755518
3,負傷,車両相互,0,2,35.551597,139.745439
4,負傷,車両相互,0,2,35.751811,139.709833


In [15]:
df_accident_master.to_csv(os.path.join(DIR_NAME, 'traffic_accident_master.csv'), index=False, encoding='utf-8_sig')

In [16]:
df_accident_master[['緯度', '経度']].describe()

Unnamed: 0,緯度,経度
count,19201.0,19201.0
mean,35.69108,139.735573
std,0.059227,0.075153
min,35.525612,139.566415
25%,35.653391,139.681702
50%,35.694507,139.730282
75%,35.735286,139.789327
max,35.817358,139.917634
