# 正在使用的文件

## 未解决问题

- 历史数据的获取、存储
- 数据获取方法通用化

## 导入依赖的包

In [1]:
import openmeteo_requests
import requests_cache
from retry_requests import retry
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

## 城市信息、指标样式

In [2]:
# *******************
# 城市名称及样本点经纬度

# 1. 印尼 城市 7 样本点 40
id_cities = [
    {
        "city" : "Riau, Indonesia",
        "rank" : "(#1 20%)",
        "file" : "ID(01)Riau",
        "latitude": [1.553974, 0.806145, 0.919427, -0.067045, -0.523711, -0.250341, 0.589762],
        "longitude": [100.728349, 100.668972, 101.269972, 102.16127, 102.360924, 101.633008, 100.975263]
    },
    {
        "city" : "North Sumatra(Sumatera Utara), Indonesia",
        "rank" : "(#2 12%)",
        "file" : "ID(02)North_Sumatra",
        "latitude": [3.924106, 3.262224, 2.581545, 1.998695, 1.483714],
        "longitude": [98.175585, 99.213301, 99.867643, 100.148443, 99.943489]
    },
    {
        "city" : "Central Kalimantan(Kalimantan Tengah), Indonesia",
        "rank" : "(#3 12%)",
        "file" : "ID(03)Central_Kalimantan",
        "latitude": [-2.253934, -2.365038, -2.289209, -2.719467, -2.037487, -3.362721],
        "longitude": [111.376936, 111.785881, 112.293945, 112.587835, 112.889265, 113.766025]
    },
    {
        "city" : "West Kalimantan(Kalimantan Barat), Indonesia",
        "rank" : "(#4 10%)",
        "file" : "ID(04)West_Kalimantan",
        "latitude": [1.47967, 0.329089, 0.138312, 0.261922, -1.657287, -2.535276],
        "longitude": [109.677382, 109.824987, 110.438844, 111.385051, 110.458016, 110.797532]
    },
    {
        "city" : "East Kalimantan(Kalimantan Timur), Indonesia",
        "rank" : "(#5 9%)",
        "file" : "ID(05)East_Kalimantan",
        "latitude": [-2.107315, -1.62545, 0.189201, 1.292902, 1.223743],
        "longitude": [116.031492, 116.182897, 116.900165, 116.93554, 117.829622]
    },
    {
        "city" : "South Sumatra(Sumatera Selatan), Indonesia",
        "rank" : "(#6)",
        "file" : "ID(06)South_Sumatra",
        "latitude": [-3.757042, -3.558638, -2.812771, -2.890193, -2.534812, -2.09579],
        "longitude": [105.158038, 103.838161, 103.177101, 105.019148, 104.245696, 103.776292]
    },
    {
        "city" : "Jambi, Indonesia",
        "rank" : "(#7)",
        "file" : "ID(07)Jambi",
        "latitude": [-0.937376, -2.111836, -1.828045, -1.554998, -1.339655],
        "longitude": [103.195025, 102.680019, 103.436483, 102.829648, 103.726253]
    }
]
# 2. 马来 城市 5 样本点 22
my_cities = [
    {
        "city" : "Sabah, Malaysia",
        "rank" : "(#1 24%)",
        "file" : "MY(01)Sabah",
        "latitude": [5.802839, 5.574102, 5.28119, 4.783628, 4.579199],
        "longitude": [117.54368, 118.266649, 119.108477, 118.005032, 117.746018]
    },{
        "city" : "Sarawak, Malaysia",
        "rank" : "(#2 21%)",
        "file" : "MY(02)Sarawak",
        "latitude": [4.230258, 3.660208, 3.212916, 2.729693, 2.446493],
        "longitude": [114.085347, 113.68672, 113.272647, 112.423702, 111.762531]
    },{
        "city" : "Johor, Malaysia",
        "rank" : "(#3 16%)",
        "file" : "MY(03)Johor",
        "latitude": [1.77747, 1.594528, 2.383085, 2.31583],
        "longitude": [104.03914, 103.564952, 103.128471, 102.515939]
    },{
        "city" : "Pahang, Malaysia",
        "rank" : "(#4 16%)",
        "file" : "MY(04)Pahang",
        "latitude": [2.886642, 3.349747, 3.610569, 3.982571],
        "longitude": [102.801372, 102.22922, 103.061951, 102.377541]
    },{
        "city" : "Perak, Malaysia",
        "rank" : "(#5 10%)",
        "file" : "MY(05)Perak",
        "latitude": [4.370213, 4.159839, 4.234578, 4.90561],
        "longitude": [100.754673, 100.934989, 100.701692, 100.695199]
    }
]

# **************************
# 东南亚天气 通用指标样式 5个
styles = [ #列名、最小年、y轴、标题、文件路径
    {   # 累计降水量
        "column": "cum_sum_precipitation_sum",
        "min_history_year": 2022,
        "ylabel": "Precipitation (mm)",
        "title": "Cumulative Annual Precipitation of ",
        "pathname": "a_Cumulative_Precip"
    },{ # 降水7日滚动
        "column": "precip_sum7",
        "min_history_year": 2023,
        "ylabel": "Precipitation (mm)",
        "title": "Last 7 days Precipitation Summary of ",
        "pathname": "b_Precip_SUM(7)"
    },{  # 降水30日滚动
        "column": "precip_sum30",
        "min_history_year": 2022,
        "ylabel": "Precipitation (mm)",
        "title": "Last 30 days Precipitation Summary of ",
        "pathname": "c_Precip_SUM(30)"
    },{  # 土壤墒情
        "column": "soil_moisture_28_to_100cm_mean",
        "min_history_year": 2022,
        "ylabel": "Soil Moisture (m³/m³)",
        "title": "Mean Soil Moisture (28-100cm) of ",
        "pathname": "d_Soil_Moisture"
    },{  # 平均气温
        "column": "temper_ma5",
        "min_history_year": 2022,
        "ylabel": "Temperature (°C)",
        "title": "Last 5 days Mean Temperature of ",
        "pathname": "e_Mean_Temper"
    }
]


## def 获取CSV数据

In [3]:
def read_csv(dataset_path, city_name, start_date, end_date):
    history_pd = pd.read_csv(dataset_path)  #'./dataset/MY.csv'
    city_history_pd = history_pd[
        (history_pd['date'] >= start_date) &
        (history_pd['date'] <= end_date) &
        (history_pd['city'] == city_name)]  #'Sabah, Malaysia(#1 24%)'
    return city_history_pd

## def 获取OM数据 （open-meteo api）

In [4]:
def request_openmeteo_data(start_date, end_date, latitude, longitude, daily):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)
    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "start_date": start_date,
        "end_date": end_date,
        "latitude": latitude,
        "longitude": longitude,
        "daily": daily
    }
    responses = openmeteo.weather_api(url, params=params)
    return responses

## def OM样本数据→城市数据

In [5]:
def process_sample_data(responses):
    df_all_sample = pd.DataFrame()
    for response in responses:
        daily = response.Daily()
        daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
        daily_precipitation_sum = daily.Variables(1).ValuesAsNumpy()
        daily_soil_moisture_28_to_100cm_mean = daily.Variables(2).ValuesAsNumpy()
        # 日期
        daily_data = {"date": pd.date_range(
            start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
            end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
            freq = pd.Timedelta(seconds = daily.Interval()),
            inclusive = "left"
        )}
        # 指标
        daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
        daily_data["precipitation_sum"] = daily_precipitation_sum
        daily_data["soil_moisture_28_to_100cm_mean"] = daily_soil_moisture_28_to_100cm_mean
        #整合
        daily_dataframe = pd.DataFrame(data = daily_data)
        df_all_sample = pd.concat([df_all_sample, daily_dataframe], ignore_index=True)
    # 样本合并
    df_l2 = df_all_sample.groupby('date', as_index=False)[['temperature_2m_mean', 'precipitation_sum', 'soil_moisture_28_to_100cm_mean']].mean()

    return df_l2

## def CVS、OM数据加工

In [6]:
def polt_data_prepare(merge_pd):
    # 转换：date 列为 datetime 类型
    merge_pd['date'] = pd.to_datetime(merge_pd['date'], errors='coerce')
    # 添加：年份列
    merge_pd['year'] = merge_pd['date'].dt.year
    # 添加：日序列
    merge_pd['day_of_year'] = merge_pd['date'].dt.dayofyear
    # 添加：累计降水列
    merge_pd['cum_sum_precipitation_sum'] = merge_pd.groupby('year')['precipitation_sum'].cumsum()
    # 添加：7日降水和
    merge_pd['precip_sum7'] = merge_pd['precipitation_sum'].rolling(window=7, min_periods=1).sum()
    # 添加：30日降水和
    merge_pd['precip_sum30'] = merge_pd['precipitation_sum'].rolling(window=30, min_periods=1).sum()
    # 添加：5日气温平均
    merge_pd['temper_ma5'] = merge_pd['temperature_2m_mean'].rolling(window=5, min_periods=1).mean()
    # 处理闰年
    merge_pd = merge_pd[merge_pd['day_of_year'] <= 365]

    return merge_pd


## def 城市数据→绘图（年度日数据）
- 01 降水（年累计）
- 02 降水（滚动七日总合）
- 03 降水（滚动卅日总合）
- 04 土壤墒情
- 05 气温（滚动五日均值）

In [7]:
def dig(df_l2, style, city_name, file_city_name):
    # 样式
    column = style["column"]       # 列名（指标）
    min_history_year = style["min_history_year"] # 显示最小的年份
    ylabel = style["ylabel"]       # y轴名称
    title = style["title"]         # 标题
    pathname = style["pathname"]   # 文件名


    # 历史数据-分析
    df_l2_history = df_l2[df_l2['year'] != year_of_today]
    ## 历史数据平均
    average_accumulated = df_l2_history.groupby('day_of_year')[column].mean().reset_index()
    ## 历史数据区间 5%-95%
    quantiles = df_l2_history.groupby('day_of_year')[column].quantile([0.05, 0.95]).unstack().reset_index()

    # 可视化
    fig = plt.figure(figsize=(10.5, 6))
    fig.text(0.5, 0.5, '© Xiamen Xiangyu', fontsize=30, color='gray',
             alpha=0.2, ha='center', va='center', rotation=30)
    ## (1)历史数据区间
    plt.fill_between(quantiles['day_of_year'],
                     quantiles[0.05],
                     quantiles[0.95],
                     color='skyblue',
                     alpha= 0.4,
                     label='5%-95%')
    ## (2)历史数据平均
    plt.plot(average_accumulated['day_of_year'],
             average_accumulated[column],
             "k--",
             linewidth=1.2,
             label='10yr_average')
    ## (3)历史历年数据
    years = df_l2['year'].unique()
    for year in years:
        if year != year_of_today and year >= min_history_year:
            year_data = df_l2[df_l2['year'] == year]
            plt.plot(year_data['day_of_year'],
                     year_data[column],
                     alpha=0.6,
                     linewidth=1.2,
                     label=year)
    ## (4)今年数据
    year_data = df_l2[df_l2['year'] == year_of_today]
    plt.plot(year_data['day_of_year'],
             year_data[column],
             alpha=1.0,
             linewidth=1.5,
             color='red',
             label=year)
    ##
    month_ticks = [1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335]
    month_labels = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
                    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
    plt.xticks(month_ticks, month_labels)
    plt.xlabel('Months')
    plt.ylabel(ylabel)
    plt.title(f'{title}{city_name}')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.tight_layout()
    # plt.show()
    fig.savefig(f'./diagram/{file_city_name}_{pathname}.jpg', dpi=300)
    plt.close(fig)

## def 合并图片

In [8]:
def dig_merge(city_list, col_num, output_path):
    image_paths = []
    for city in city_list:
        #image_paths.append(f'./floder/{city['file']}_Weekly_Precip.jpg')
        for style in styles:
            image_paths.append(f'./diagram/{city['file']}_{style['pathname']}.jpg')

    images = [Image.open(path) for path in image_paths]

    img_width, img_height = images[0].size

    grid_img = Image.new('RGB', (img_width * col_num, img_height * int(len(images)//col_num)))

    for idx, img in enumerate(images):
        row = idx // col_num
        col = idx % col_num
        grid_img.paste(img, (col * img_width, row * img_height))
    #
    grid_img.save(output_path)

# 主程序

In [9]:
# *******************
# 基础信息
year_of_today = 2025
# API查询参数
csv_start_date = "2015-01-01"
csv_end_date   = "2025-08-01"
api_start_date = csv_end_date
api_end_date   = "2025-08-14"
# 温度（日平均）、降水（雨+雪）、墒情（28-100）
daily = ["temperature_2m_mean",
         "precipitation_sum",
         "soil_moisture_28_to_100cm_mean"]

countries = [
    {
        # Malaysia
        "country_name" : "Malaysia",
        "dataset_input_path"  : "./dataset/MY.csv",
        "dataset_output_path" : "./dataset/MY(2).csv",
        "grid_dig_output_path" : "./diagram_grid/MY.jpg",
        "city_list": my_cities,
    },{
        # Indonesia
        "country_name" : "Indonesia",
        "dataset_input_path"  : "./dataset/ID.csv",
        "dataset_output_path" : "./dataset/ID(2).csv",
        "grid_dig_output_path" : "./diagram_grid/ID.jpg",
        "city_list": id_cities,
    }
]


for country in countries:
    # 基础信息
    dataset_input_path  = country["dataset_input_path"]
    dataset_output_path = country["dataset_output_path"]
    grid_dig_output_path = country["grid_dig_output_path"]
    city_list = country["city_list"]

    output_pd = pd.DataFrame()
    for city in city_list:
        # 读取 city 信息
        city_name = city['city']
        rank = city['rank']
        file_city_name = city["file"]
        latitude = city["latitude"]
        longitude = city["longitude"]

        # 获取 csv 数据（筛选出：城市、日期）
        csv_pd = read_csv(dataset_input_path, city_name, csv_start_date, csv_end_date)

        # 获取 api 数据
        responses = request_openmeteo_data(api_start_date, api_end_date, latitude, longitude, daily)

        # 处理 api 样本数据
        api_pd = process_sample_data(responses)

        # 整合 csv、api 数据并处理
        merge_pd = pd.concat([csv_pd, api_pd], ignore_index=True)
        polt_data_pd = polt_data_prepare(merge_pd)

        # 更新历史数据
        merge_pd["city"] = city_name
        output_pd = pd.concat([output_pd, merge_pd], ignore_index=True)

        # 图片绘制数据
        for style in styles:
            dig(polt_data_pd, style, city_name+rank, file_city_name)

    print(f"Finish：{country["country_name"]}")

    # 更新历史存档数据
    output_pd.to_csv(dataset_output_path, index=False, encoding='utf-8-sig')

    # 合并图表
    dig_merge(city_list, int(len(styles)), grid_dig_output_path)


Finish：Malaysia
Finish：Indonesia
