調布市の合成人口データ分析

- 合成人工データ　2015_008_8_13208.csvの分析
- 世帯種別
- 年齢分布
- 職業分布
    
    

# 1. ライブラリ導入

In [None]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import seaborn as sns
import json
import glob
import matplotlib.pyplot as plt
from matplotlib import animation, rc, gridspec
from IPython.display import HTML
import numpy as np
from math import *
import os
from collections import defaultdict
import time
import matplotlib.font_manager as fm
plt.rcParams['font.family'] = ['Hiragino Sans']
from matplotlib.animation import FuncAnimation
import folium




# 2. 調布市の合成人工データの分析
## 2-1. 合成人工データ　2015_008_8_13208.csv　　

In [None]:
people_path = '/Users/y-osamu/study/poi_sim/data/processed/2015_001_8_13208_01.csv' # 阪上ファイル実験ファイル

people_df = pd.read_csv(people_path, index_col=0)

rename_dict = {
    "prefecture_code": "都道府県コード",
    "prefecture_name": "都道府県名",
    "city_code": "市区町村コード",
    "city_name": "市区町村名",
    "town_code": "町丁目コード",
    "town_name": "町丁目名",
    "latitude": "緯度",
    "longitude": "経度",
    "household_id": "世帯ID",
    "family_type_id": "世帯類型ID",
    "family_type": "世帯類型",
    "num_member": "世帯人数",
    "abnormal_household": "異常世帯フラグ",
    "person_id": "個人ID",
    "age": "年齢",
    "gender_id": "性別ID",
    "gender": "性別",
    "role_household_type_id": "世帯内役割ID",
    "role_household_type": "世帯内役割",
    "industry_type_id": "産業分類ID",
    "industry_type": "産業分類",
    "employment_type_id": "就業形態ID",
    "employment_type": "就業形態",
    "company_size_id": "企業規模ID",
    "company_size": "企業規模"
}

people_df = people_df.rename(columns=rename_dict)
people_df


## 2-2 世帯単位の特徴抽出
### 世帯類型の割合

In [None]:
df_household = people_df.drop_duplicates(subset="世帯ID")

family_ratio = (
    df_household["世帯内役割"]
    .value_counts(normalize=True)
    .mul(100)
)

plt.figure(figsize=(6, 6))
plt.pie(
    family_ratio.values,
    labels=family_ratio.index,
    autopct="%.1f%%"
)
plt.title("世帯内役割の割合")
plt.show() 


### 世帯人数

In [None]:

plt.figure(figsize=(6,4))
sns.histplot(df_household["世帯人数"], bins=range(1,8), discrete=True)
plt.xlabel("世帯人数")
plt.ylabel("世帯数")
plt.title("世帯人数の分布")
plt.show()

## 2-3 年齢の特徴抽出


In [None]:
plt.figure(figsize=(6,4))
sns.histplot(people_df["年齢"], bins=20)
plt.xlabel("年齢")
plt.ylabel("人数")
plt.title("年齢分布")
plt.show()

In [None]:
age_gender = (
    people_df
    .groupby(["年齢", "性別"])
    .size()
    .reset_index(name="人数")
)

plt.figure(figsize=(16,7))
sns.barplot(data=age_gender, x="年齢", y="人数", hue="性別")
plt.title("年齢 × 性別分布")
plt.show()

## 2-4.産業・就業


In [None]:
employment_ratio = (
    people_df["就業形態"]
    .value_counts(normalize=True)
    .mul(100)
)

display(employment_ratio)

industry_age = (
    people_df
    .groupby("産業分類")["年齢"]
    .mean()
    .sort_values(ascending=False)
)

display(industry_age)

family_employment = (
    people_df
    .groupby(["世帯類型", "就業形態"])
    .size()
    .unstack(fill_value=0)
)

display(family_employment)

## 2-5. 丁目別の世帯数

In [None]:
town_households = (
    df_household
    .groupby("町丁目名")["世帯ID"]
    .count()
    .sort_values(ascending=False)
)

town_households.head(10)

In [None]:
geo_household = df_household[["緯度", "経度", "世帯人数", "世帯類型"]]

plt.figure(figsize=(12, 12))
plt.scatter(
    geo_household["経度"],
    geo_household["緯度"],
    c=geo_household["世帯人数"],
    cmap="viridis",
    s=10,
    alpha=0.6
)
plt.colorbar(label="世帯人数")
plt.xlabel("経度")
plt.ylabel("緯度")
plt.title("世帯人数の空間分布")
plt.show()

In [None]:
# 調布市の緯度経度
min_lat = geo_household["緯度"].min()
max_lat = geo_household["緯度"].max()
min_lon = geo_household["経度"].min()
max_lon = geo_household["経度"].max()

m = folium.Map()

padding = 0.005  # お好みで調整

m.fit_bounds([
    [min_lat - padding, min_lon - padding],
    [max_lat + padding, max_lon + padding]
])

# 世帯類型を変更
def normalize_family_type_4(x):
    if pd.isna(x):
        return "その他"

    if "単独" in x:
        return "単独世帯"

    if "子供" in x:
        return "家族世帯（子供あり）"

    if "夫婦のみ" in x:
        return "家族世帯（子供なし）"

    return "その他"

geo_household["世帯類型_4区分"] = (
    geo_household["世帯類型"]
    .apply(normalize_family_type_4)
)

color_map = {
    "単独世帯": "red",
    "家族世帯（子供あり）": "green",
    "家族世帯（子供なし）": "orange",
    "その他": "blue"
}

for _, row in geo_household.iterrows():
    c = color_map[row["世帯類型_4区分"]]

    folium.CircleMarker(
        location=[row["緯度"], row["経度"]],
        radius=3,
        color=c,
        fill=True,
        fill_color=c,
        fill_opacity=0.6,
        popup=(
            f"世帯人数: {row['世帯人数']}<br>"
            f"世帯類型: {row['世帯類型_4区分']}"
        )
    ).add_to(m)


m

In [None]:
m.save("chofu_map.html")