## 发病率(incidence)统计
统计每个州前五发病率国家

纵坐标：incidence rate

横坐标：年龄段(0-80)

数据间隔: 10

数据表达形式: 折线图形式

### 统计内容:
1. 黑色素肿瘤：5张图
2. 非黑色素肿瘤：5张图


### 头文件及配置

In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# 内嵌矢量图
%config InlineBackend.figure_format = 'svg'

### 合并北美洲和南美洲的数据

In [None]:

na_dir_name = "data/input/na_incidence_melanoma/"
lac_dir_name= "data/input/lac_incidence_melanoma/"

 # 获取dir中的所有文件名
na_dir = [ f for f in os.listdir(na_dir_name) ]
lac_dir = [ f for f in os.listdir(lac_dir_name) ]

america_directory = "data/input/america_incidence_melanoma/"

# 创建america数据目录
if not os.path.exists(america_directory):
    os.makedirs(america_directory)

# 合并南美洲和北美洲的数据
for nafile in na_dir:
    year = nafile.split('_')[3].split('.')[0]

    lacfile = f"lac_incidence_melanoma_{year}.json"

    # 构建目标文件名
    output_file_name = f"america_incidence_melanoma_{year}.json"
    output_file_path = os.path.join(america_directory, output_file_name)

    # 读取na json文件
    filepath = os.path.join(na_dir_name, nafile)
    with open(filepath, 'r', encoding='utf-8') as na_filp:
        data_na = json.load(na_filp)
    
    # 读取lac json文件
    if lacfile in lac_dir:
        filepath = os.path.join(lac_dir_name, lacfile)
        with open(filepath, 'r', encoding='utf-8') as lac_filp:
            data_lac = json.load(lac_filp)
    
    data_merge = data_na + data_lac
    with open(output_file_path, "w", encoding="utf-8") as fout:
        json.dump(data_merge, fout, ensure_ascii=False, indent=4)

### 确定每个洲的前五国家函数

In [None]:

def top_five(region_name):
    filepath = f'{region_name}_incidence_melanoma_80.json'
    dirpath = f'data/input/{region_name}_incidence_melanoma/'

    with open(os.path.join(dirpath, filepath), "r", encoding="utf-8") as region:
        data = json.load(region)
    
    sorted_data = sorted(data, key=lambda x: x['asr'], reverse=True)
    top_five_labels = [item['label'] for item in sorted_data[:5]]
    return top_five_labels


### 画图函数

In [None]:
# markers = {
#     '1': {'marker': 'o', 'size': 8},
#     '2': {'marker': 's', 'size': 10},
#     '3': {'marker': 'D', 'size': 6},
#     '4': {'marker': '^', 'size': 12},
#     '5': {'marker': 'v', 'size': 8},
# }

def image_show(results, region):
    # 转换数据为DataFrame
    df = pd.DataFrame(results).T.astype(float)
    df.index = df.index.astype(int)

    plt.figure(figsize=(8, 4.5))

    for country in df.columns:
        plt.plot(df.index, df[country], marker='o', markersize=4, label=country)

    plt.title(region.capitalize())
    plt.xlabel('Age(Years)')
    plt.ylabel('Incidence rate(1/100,000 population)')

    # 添加图例
    plt.legend(
        loc='upper center', 
        bbox_to_anchor=(0.5, -0.15),
        # fancybox=True,
        frameon=False,
        shadow=False, 
        ncol=5
    )

    # 获取当前轴
    ax = plt.gca()

    # 移除上侧和右侧的边界线
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    # 网格
    plt.grid(False)

    # 保存为svg格式
    plt.savefig(f'data/output/{region}_incidence_melanoma.svg', format='svg')

    plt.show()

### 根据前五国家的Label，统计各个年龄段的数据，并绘制图像

In [None]:

regions = ['asia', 'america', 'africa', 'europe', 'oceania']

# 遍历每个地区
for region in regions:
    # 获取前五个国家的label
    top_five_labels = top_five(region)

    # 输出结果和输出目录
    results = {}
    data_dir = f'data/input/{region}_incidence_melanoma/'

    # 遍历文件夹内的数据文件
    for filename in os.listdir(data_dir):
        year = filename.split('_')[3].split('.')[0]

        # 读取json文件
        filepath = os.path.join(data_dir, filename)
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        if year not in results:
            results[year] = {}
        
        # 存储asr数据
        for item in data:
            if item['label'] in top_five_labels:
                results[year][item['label']] = item['asr']
    
    # 按年份排序
    years = list(results.keys())
    years.sort(key=int)

    results = {year: results[year] for year in years}

    with open(f'data/output/{region}_incidence_melanoma.json', 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=4)
    
    image_show(results, region)
