In [12]:
import xarray as xr
import pandas as pd
import numpy as np
from scipy import stats
import calendar
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = 'plotly_white'
from scipy.stats import lognorm
plot_template = dict(
    layout=go.Layout({
        'font_size': 8,
        'xaxis_title_font_size': 8,
        'yaxis_title_font_size': 8,
        }   
))
from ConvertChineseToPinyin import convert_to_pinyin_capitalized
stationinfo = pd.read_excel('../data/fork.xls', sheet_name='Sheet1')
name_long_dict = dict(zip(stationinfo['NAME'], stationinfo['LONG']))
name_lat_dict = dict(zip(stationinfo['NAME'], stationinfo['LAT']))
name_pinyin_dict = {
    '玛多': 'MaDuo',
    '达日': 'DaRi',
    '久治': 'JiuZhi',
    '红原': 'HongYuan',
    '若尔盖': 'RuoErGai',
    '玛曲': 'MaQu',
    '玛沁': 'MaQin',
    '河南': 'HeNan',
    '兴海': 'XingHai',
    '贵南': 'GuiNan',
    '共和': 'GongHe',
    '贵德': 'GuiDe',
    '同仁': 'TongRen'
}
# Open the dataset
ds = xr.open_dataset('I:/ECMWFSeasonalForecast/ECMWF_sys51_allVars_EnsMean_2017_allMonth_lead1To6_ChinaArea.grib')
ds


In [2]:
ecmwf_season_pcp = {}

for station in stationinfo['NAME']:
    long = name_long_dict[station]
    lat = name_lat_dict[station]

    df = pd.DataFrame(index=pd.date_range(start='2017-01-01', end='2023-12-31', freq='MS'), columns=[f'lead{i}' for i in range(1, 7)])
    df.index.name = 'date'
    Jan_forecasts = []
    Feb_forecasts = []
    Mar_forecasts = []
    Apr_forecasts = []
    May_forecasts = []
    Jun_forecasts = []
    Jul_forecasts = []
    Aug_forecasts = []
    Sep_forecasts = []
    Oct_forecasts = []
    Nov_forecasts = []
    Dec_forecasts = []
    years = np.arange(2017,2024)
    for year in years:
        ds = xr.open_dataset(f'I:/ECMWFSeasonalForecast/ECMWF_sys51_allVars_EnsMean_{year}_allMonth_lead1To6_ChinaArea.grib')
        dates = pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31', freq='MS')

        for date in dates:
            forecast_dates = pd.date_range(start=date, periods=6, freq='MS')
            days_ = 0
            month_precip = []
            for forecast_date in forecast_dates:
                num_days = calendar.monthrange(forecast_date.year, forecast_date.month)[1]
                days_ += num_days
                tprate_forecast = ds['tprate'].sel(
                    time=f'{date.year}-{date.month:02d}-01',
                    latitude=lat,
                    longitude=long,
                    method='nearest',
                    step=f'{days_} days',
                )
                tprate_forecast = tprate_forecast.values*num_days*24*3600*1000
                month_precip.append(tprate_forecast)
                if forecast_date.month == 1:
                    Jan_forecasts.append(tprate_forecast)
                elif forecast_date.month == 2:
                    Feb_forecasts.append(tprate_forecast)
                elif forecast_date.month == 3:
                    Mar_forecasts.append(tprate_forecast)
                elif forecast_date.month == 4:
                    Apr_forecasts.append(tprate_forecast)
                elif forecast_date.month == 5:
                    May_forecasts.append(tprate_forecast)
                elif forecast_date.month == 6:
                    Jun_forecasts.append(tprate_forecast)
                elif forecast_date.month == 7:
                    Jul_forecasts.append(tprate_forecast)
                elif forecast_date.month == 8:
                    Aug_forecasts.append(tprate_forecast)
                elif forecast_date.month == 9:
                    Sep_forecasts.append(tprate_forecast)
                elif forecast_date.month == 10:
                    Oct_forecasts.append(tprate_forecast)
                elif forecast_date.month == 11:
                    Nov_forecasts.append(tprate_forecast)
                elif forecast_date.month == 12:
                    Dec_forecasts.append(tprate_forecast)

            df.loc[date,:] = month_precip
    ecmwf_season_pcp[station] = df
    df.to_csv(f'../result/ECMWFSeasonalForecasts/RawECMWFSeasonalPrecipitationForecast_{station}.csv')


In [6]:
ecmwf_season_max_temp = {}
for station in stationinfo['NAME']:
    long = name_long_dict[station]
    lat = name_lat_dict[station]
    years = np.arange(2017,2024)
    df = pd.DataFrame(index=pd.date_range(start='2017-01-01', end='2023-12-31', freq='MS'), columns=[f'lead{i}' for i in range(1, 7)])
    df.index.name = 'date'
    Jan_forecasts = []
    Feb_forecasts = []
    Mar_forecasts = []
    Apr_forecasts = []
    May_forecasts = []
    Jun_forecasts = []
    Jul_forecasts = []
    Aug_forecasts = []
    Sep_forecasts = []
    Oct_forecasts = []
    Nov_forecasts = []
    Dec_forecasts = []
    for year in years:
        ds = xr.open_dataset(f'I:/ECMWFSeasonalForecast/ECMWF_sys51_allVars_EnsMean_{year}_allMonth_lead1To6_ChinaArea.grib')
        dates = pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31', freq='MS')

        for date in dates:
            forecast_dates = pd.date_range(start=date, periods=6, freq='MS')
            days_ = 0
            month_mx2t24 = []
            for forecast_date in forecast_dates:
                num_days = calendar.monthrange(forecast_date.year, forecast_date.month)[1]
                days_ += num_days
                mx2t24_forecast = ds['mx2t24'].sel(
                    time=f'{date.year}-{date.month:02d}-01',
                    latitude=lat,
                    longitude=long,
                    method='nearest',
                    step=f'{days_} days',
                )
                mx2t24_forecast = mx2t24_forecast.values-273.15
                # mx2t24_forecast = (mx2t24_forecast.values - 273.15) * 9/5 + 32
                month_mx2t24.append(mx2t24_forecast)
                if forecast_date.month == 1:
                    Jan_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 2:
                    Feb_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 3:
                    Mar_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 4:
                    Apr_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 5:
                    May_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 6:
                    Jun_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 7:
                    Jul_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 8:
                    Aug_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 9:
                    Sep_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 10:
                    Oct_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 11:
                    Nov_forecasts.append(mx2t24_forecast)
                elif forecast_date.month == 12:
                    Dec_forecasts.append(mx2t24_forecast)

            df.loc[date,:] = month_mx2t24
    ecmwf_season_max_temp[station] = df
    df.to_csv(f'../result/ECMWFSeasonalForecasts/RawECMWFSeasonalMaxTempForecast_{station}.csv')


In [3]:

ecmwf_season_min_temp = {}
for station in stationinfo['NAME']:
    long = name_long_dict[station]
    lat = name_lat_dict[station]
    years = np.arange(2017,2024)
    df = pd.DataFrame(index=pd.date_range(start='2017-01-01', end='2023-12-31', freq='MS'), columns=[f'lead{i}' for i in range(1, 7)])
    df.index.name = 'date'
    Jan_forecasts = []
    Feb_forecasts = []
    Mar_forecasts = []
    Apr_forecasts = []
    May_forecasts = []
    Jun_forecasts = []
    Jul_forecasts = []
    Aug_forecasts = []
    Sep_forecasts = []
    Oct_forecasts = []
    Nov_forecasts = []
    Dec_forecasts = []
    for year in years:
        ds = xr.open_dataset(f'I:/ECMWFSeasonalForecast/ECMWF_sys51_allVars_EnsMean_{year}_allMonth_lead1To6_ChinaArea.grib')
        dates = pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31', freq='MS')

        for date in dates:
            forecast_dates = pd.date_range(start=date, periods=6, freq='MS')
            days_ = 0
            month_mn2t24 = []
            for forecast_date in forecast_dates:
                num_days = calendar.monthrange(forecast_date.year, forecast_date.month)[1]
                days_ += num_days
                mn2t24_forecast = ds['mn2t24'].sel(
                    time=f'{date.year}-{date.month:02d}-01',
                    latitude=lat,
                    longitude=long,
                    method='nearest',
                    step=f'{days_} days',
                )
                mn2t24_forecast = mn2t24_forecast.values-273.15
                # mn2t24_forecast = (mn2t24_forecast.values - 273.15) * 9/5 + 32
                month_mn2t24.append(mn2t24_forecast)
                if forecast_date.month == 1:
                    Jan_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 2:
                    Feb_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 3:
                    Mar_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 4:
                    Apr_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 5:
                    May_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 6:
                    Jun_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 7:
                    Jul_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 8:
                    Aug_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 9:
                    Sep_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 10:
                    Oct_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 11:
                    Nov_forecasts.append(mn2t24_forecast)
                elif forecast_date.month == 12:
                    Dec_forecasts.append(mn2t24_forecast)

            df.loc[date,:] = month_mn2t24
    ecmwf_season_min_temp[station] = df
    df.to_csv(f'../result/ECMWFSeasonalForecasts/RawECMWFSeasonalMinTempForecast_{station}.csv')

In [8]:
print(ecmwf_season_max_temp)


{'MaDuo':                 lead1      lead2      lead3      lead4      lead5      lead6
date                                                                        
2017-01-01 -10.457642   -7.80127  -5.604584  -0.672943   6.131012  10.960358
2017-02-01  -6.151764  -5.520355  -1.167328    5.75235  10.721344  13.619293
2017-03-01  -6.982147  -1.940521   5.439606  10.890778  13.872711  12.802643
2017-04-01  -1.231049   6.163971  10.707428  13.626617  12.996002   8.429108
2017-05-01   4.663727  11.227203  13.899078  12.981354   8.948883    1.07608
...               ...        ...        ...        ...        ...        ...
2023-08-01  13.180084   9.685455   2.005524  -3.567719  -7.441528  -8.739349
2023-09-01  10.036041   2.299957  -4.086029  -7.738739  -9.158905  -7.497589
2023-10-01    1.13028  -4.402435  -8.552521  -9.149017  -8.181793  -4.915375
2023-11-01  -3.691986   -7.33902  -8.672668  -7.273621  -4.040131   0.434967
2023-12-01   -6.46582  -9.459747  -7.977539   -5.19516   0.287506 

In [13]:
hydro_mete_stations = {
    "Tangnaihai": [
        '玛多', '达日', '久治', '红原', '若尔盖', '玛曲', '玛沁', '河南', '兴海'
    ],
    "Guide": [
        '玛多', '达日', '久治', '红原', '若尔盖', '玛曲', '玛沁', '河南', '兴海',
        '贵南', '共和', '贵德', '同仁'
    ],
    "Xunhua": [
        '玛多', '达日', '久治', '红原', '若尔盖', '玛曲', '玛沁', '河南', '兴海',
        '贵南', '共和', '贵德', '同仁'
    ]
}

station_area = pd.read_csv('../data/MeteGaugeStationControlArea.csv')
station_area_dict = dict(zip(station_area['station'], station_area['Shape_Area']))

# Compute ratios for each location
ratios = {}
for location, stations in hydro_mete_stations.items():
    total_area = sum(station_area_dict[s] for s in stations)
    ratios[location] = {s: station_area_dict[s] / total_area for s in stations}

# Display the ratios
for location, station_ratios in ratios.items():
    print(f"\n{location} ratios:")
    for station, ratio in station_ratios.items():
        print(f"  {station}: {ratio:.4f}")
index_ = pd.date_range(start='2017-01-01',end='2023-12-31',freq='MS')
tangnaihai_pcp_df = pd.DataFrame(index=index_,columns=['lead1','lead2','lead3','lead4','lead5','lead6'])
guide_pcp_df = pd.DataFrame(index=index_,columns=['lead1','lead2','lead3','lead4','lead5','lead6'])
xunhua_pcp_df = pd.DataFrame(index=index_,columns=['lead1','lead2','lead3','lead4','lead5','lead6'])

tangnaihai_max_temp_df = pd.DataFrame(index=index_,columns=['lead1','lead2','lead3','lead4','lead5','lead6'])
guide_max_temp_df = pd.DataFrame(index=index_,columns=['lead1','lead2','lead3','lead4','lead5','lead6'])
xunhua_max_temp_df = pd.DataFrame(index=index_,columns=['lead1','lead2','lead3','lead4','lead5','lead6'])

tangnaihai_min_temp_df = pd.DataFrame(index=index_,columns=['lead1','lead2','lead3','lead4','lead5','lead6'])
guide_min_temp_df = pd.DataFrame(index=index_,columns=['lead1','lead2','lead3','lead4','lead5','lead6'])
xunhua_min_temp_df = pd.DataFrame(index=index_,columns=['lead1','lead2','lead3','lead4','lead5','lead6'])

tangnaihai_pcp_df.index.name='date'
guide_pcp_df.index.name='date'
xunhua_pcp_df.index.name='date'

tangnaihai_max_temp_df.index.name='date'
guide_max_temp_df.index.name='date'
xunhua_max_temp_df.index.name='date'

tangnaihai_min_temp_df.index.name='date'
guide_min_temp_df.index.name='date'
xunhua_min_temp_df.index.name='date'

# Calculate weighted averages for each location and variable
for location in ['Tangnaihai', 'Guide', 'Xunhua']:
    for lead in range(1, 7):
        # Precipitation

        print(name_pinyin_dict)
        
        pcp_weighted_sum = sum(
            ecmwf_season_pcp[name_pinyin_dict[station]][f'lead{lead}'] * 
            ratios[location][station] 
            for station in hydro_mete_stations[location]
        )
        eval(f"{location.lower()}_pcp_df")[f'lead{lead}'] = pcp_weighted_sum
        
        # Maximum Temperature
        max_temp_weighted_sum = sum(ecmwf_season_max_temp[name_pinyin_dict[station]][f'lead{lead}'] * ratios[location][station] 
                                    for station in hydro_mete_stations[location])
        eval(f"{location.lower()}_max_temp_df")[f'lead{lead}'] = max_temp_weighted_sum
        
        # Minimum Temperature
        min_temp_weighted_sum = sum(ecmwf_season_min_temp[name_pinyin_dict[station]][f'lead{lead}'] * ratios[location][station] 
                                    for station in hydro_mete_stations[location])
        eval(f"{location.lower()}_min_temp_df")[f'lead{lead}'] = min_temp_weighted_sum

# Save results to CSV files
for location in ['Tangnaihai', 'Guide', 'Xunhua']:
    # Precipitation
    eval(f"{location.lower()}_pcp_df").to_csv(f'../result/ECMWFSeasonalForecasts/WeightedECMWFSeasonalPrecipitationForecast_{location}.csv')
    
    # Maximum Temperature
    eval(f"{location.lower()}_max_temp_df").to_csv(f'../result/ECMWFSeasonalForecasts/WeightedECMWFSeasonalMaxTempForecast_{location}.csv')
    
    # Minimum Temperature
    eval(f"{location.lower()}_min_temp_df").to_csv(f'../result/ECMWFSeasonalForecasts/WeightedECMWFSeasonalMinTempForecast_{location}.csv')

print("Weighted averages calculated and saved to CSV files.")



Tangnaihai ratios:
  玛多: 0.2682
  达日: 0.1422
  久治: 0.0790
  红原: 0.0618
  若尔盖: 0.0500
  玛曲: 0.0455
  玛沁: 0.1180
  河南: 0.0750
  兴海: 0.1604

Guide ratios:
  玛多: 0.2199
  达日: 0.1166
  久治: 0.0648
  红原: 0.0507
  若尔盖: 0.0410
  玛曲: 0.0373
  玛沁: 0.0967
  河南: 0.0615
  兴海: 0.1315
  贵南: 0.0496
  共和: 0.0410
  贵德: 0.0405
  同仁: 0.0489

Xunhua ratios:
  玛多: 0.2199
  达日: 0.1166
  久治: 0.0648
  红原: 0.0507
  若尔盖: 0.0410
  玛曲: 0.0373
  玛沁: 0.0967
  河南: 0.0615
  兴海: 0.1315
  贵南: 0.0496
  共和: 0.0410
  贵德: 0.0405
  同仁: 0.0489
{'玛多': 'MaDuo', '达日': 'DaRi', '久治': 'JiuZhi', '红原': 'HongYuan', '若尔盖': 'RuoErGai', '玛曲': 'MaQu', '玛沁': 'MaQin', '河南': 'HeNan', '兴海': 'XingHai', '贵南': 'GuiNan', '共和': 'GongHe', '贵德': 'GuiDe', '同仁': 'TongRen'}
{'玛多': 'MaDuo', '达日': 'DaRi', '久治': 'JiuZhi', '红原': 'HongYuan', '若尔盖': 'RuoErGai', '玛曲': 'MaQu', '玛沁': 'MaQin', '河南': 'HeNan', '兴海': 'XingHai', '贵南': 'GuiNan', '共和': 'GongHe', '贵德': 'GuiDe', '同仁': 'TongRen'}
{'玛多': 'MaDuo', '达日': 'DaRi', '久治': 'JiuZhi', '红原': 'HongYuan', '若尔盖': 'Ruo

In [None]:
maduo = pd.read_csv('D:/DataSpace/HydroMeteAnthropicDatabase/7.FilledRawMeteObsInfo/ChinaLandDailyMeteV3(InsertSolarRadiation)/玛多.csv', index_col=['DATE'], parse_dates=['DATE'])
maduo = maduo.loc[:'2016-12-31','P2020(mm)']
maduo = maduo.resample('MS').sum()

month_params = pd.DataFrame(index=range(1, 13), columns=['obs_mean', 'obs_std', 'pre_mean', 'pre_std'])
for month in range(1, 13):
    obs_data = maduo[maduo.index.month == month]
    pre_data = each_monthly_forecasts[month]
    # compute average and standard deviation
    pre_mean = np.mean(pre_data)
    pre_std = np.std(pre_data)
    obs_mean = np.mean(obs_data)
    obs_std = np.std(obs_data)
    month_params.loc[month, 'obs_mean'] = obs_mean
    month_params.loc[month, 'obs_std'] = obs_std
    month_params.loc[month, 'pre_mean'] = pre_mean
    month_params.loc[month, 'pre_std'] = pre_std

dates = pd.date_range(start='2017-01-01', end='2023-12-31', freq='MS')
for date in dates:
    forecast_dates = pd.date_range(start=date, periods=6, freq='MS')
    forecast_dict = {i:forecast_dates[i-1] for i in range(1,7)}
    for forecast_date,i in zip(forecast_dates,range(1,7)):
        month = forecast_dict[i].month
        obs_mean = month_params.loc[month,'obs_mean']
        obs_std = month_params.loc[month,'obs_std']
        pre_mean = month_params.loc[month,'pre_mean']
        pre_std = month_params.loc[month,'pre_std']
        # 使用对数修正方法
        df.loc[date,f'lead{i}'] = np.exp(np.log(obs_mean) + (np.log(obs_std) / np.log(pre_std)) * (np.log(df.loc[date,f'lead{i}']) - np.log(pre_mean)))

df.to_csv('../result/ECMWFSeasonalForecasts/CorrectedECMWFSeasonalForecast_Maduo.csv')

# Visualize df using plotly, plot by row
fig = go.Figure()

for index, row in df.iterrows():
    fig.add_trace(go.Scatter(
        x=['lead1', 'lead2', 'lead3', 'lead4', 'lead5', 'lead6'],
        y=row,
        mode='lines+markers',
        name=index.strftime('%Y-%m-%d')
    ))

fig.update_layout(
    title='Corrected ECMWF Seasonal Forecast for Maduo',
    xaxis_title='Lead Time',
    yaxis_title='Precipitation (mm)',
    legend_title='Forecast Date',
    template=plot_template
)

fig.show()


In [None]:
# Read and process data
maduo = pd.read_csv('D:/DataSpace/HydroMeteAnthropicDatabase/7.FilledRawMeteObsInfo/ChinaLandDailyMeteV3(InsertSolarRadiation)/玛多.csv', index_col=['DATE'], parse_dates=['DATE'])
maduo = maduo.loc[:'2016-12-31','P2020(mm)']
maduo = maduo.resample('MS').sum()



# Define distributions for each month
month_distributions = {
    1: stats.lognorm,
    2: stats.lognorm,
    3: stats.lognorm,
    4: stats.lognorm,
    5: stats.weibull_min,
    6: stats.lognorm,
    7: stats.t,
    8: stats.beta,
    9: stats.lognorm,
    10: stats.gamma,
    11: stats.lognorm,
    12: stats.t,
}

# Initialize results DataFrame
results_df = pd.DataFrame(columns=['Month', 'Distribution', 'Parameters'])

# Fit distributions and visualize for each month
for month in range(1, 13):
    monthly_data = maduo[maduo.index.month == month]
    dist = month_distributions[month]
    
    # Fit distribution to data
    params = dist.fit(monthly_data)

    # print(params)
    
    # Add results to DataFrame
    results_df = pd.concat([results_df, pd.DataFrame({
        'Month': month,
        'Distribution': [dist.name],
        'Parameters': [params]
    })], ignore_index=True)
    
    # Visualize the results
    fig = go.Figure()
    
    print('##',each_monthly_forecasts[month])
    # Create histogram
    fig.add_trace(go.Histogram(x=monthly_data, name='Histogram', opacity=0.7, nbinsx=20, histnorm='density'))
    fig.add_trace(go.Histogram(x=each_monthly_forecasts[month], name='Histogram', opacity=0.7, nbinsx=20, histnorm='density'))
    
    # Add fitted distribution curve
    x_range = np.linspace(monthly_data.min(), monthly_data.max(), 100)
    y_fit = dist.pdf(x_range, *params)
    fig.add_trace(go.Scatter(x=x_range, y=y_fit, mode='lines', name=f'{dist.name} fit'))

    # Add KDE curve
    kde = stats.gaussian_kde(monthly_data)
    y_kde = kde(x_range)
    fig.add_trace(go.Scatter(x=x_range, y=y_kde, mode='lines', name='KDE'))

    fig.update_layout(
        title=f'Precipitation Distribution Fit for {calendar.month_abbr[month]} in Maduo',
        xaxis_title='Precipitation (mm)',
        yaxis_title='Density',
        template=plot_template
    )

    fig.show()

# Display results
# print(results_df)

# Save results to CSV
results_df.to_csv('../result/ECMWFSeasonalForecasts/MonthlyPrecipitationDistributionFit_Maduo.csv', index=False)

# 添加注释，解释不同分布拟合参数的含义
# 对于 lognorm 分布 (1, 2, 3, 4, 6, 9, 11月):
# params[0] = s: 形状参数
# params[1] = loc: 位置参数
# params[2] = scale: 尺度参数

# 对于 weibull_min 分布 (5月):
# params[0] = c: 形状参数
# params[1] = loc: 位置参数
# params[2] = scale: 尺度参数

# 对于 t 分布 (7, 12月):
# params[0] = df: 自由度
# params[1] = loc: 位置参数
# params[2] = scale: 尺度参数

# 对于 beta 分布 (8月):
# params[0] = a: 形状参数
# params[1] = b: 形状参数
# params[2] = loc: 位置参数
# params[3] = scale: 尺度参数

# 对于 gamma 分布 (10月):
# params[0] = a: 形状参数
# params[1] = loc: 位置参数
# params[2] = scale: 尺度参数

# 定义修正预测值的函数
def correct_forecast(month, params, forecast_value):
    dist = month_distributions[month]
    
    if dist.name == 'lognorm':
        s, loc, scale = params[0], params[1], params[2]
        z = (np.log(forecast_value) - loc) / scale
        corrected_value = np.exp(z * scale + loc)
        # cdf = dist.cdf(forecast_value, s, loc, scale)
        # corrected_value = dist.ppf(cdf, s, loc, scale)
    elif dist.name == 'weibull_min':
        c, loc, scale = params[0], params[1], params[2]
        cdf = dist.cdf(forecast_value, c, loc, scale)
        corrected_value = dist.ppf(cdf, c, loc, scale)
    elif dist.name == 't':
        df, loc, scale = params[0], params[1], params[2]
        cdf = dist.cdf(forecast_value, df, loc, scale)
        corrected_value = dist.ppf(cdf, df, loc, scale)
    elif dist.name == 'beta':
        a, b, loc, scale = params[0], params[1], params[2], params[3]
        cdf = dist.cdf(forecast_value, a, b, loc, scale)
        corrected_value = dist.ppf(cdf, a, b, loc, scale)
    elif dist.name == 'gamma':
        a, loc, scale = params[0], params[1], params[2]
        cdf = dist.cdf(forecast_value, a, loc, scale)
        corrected_value = dist.ppf(cdf, a, loc, scale)
    else:
        raise ValueError(f"Unsupported distribution: {dist.name}")
    
    return corrected_value

# 使用示例
# 假设我们有一个1月份的预测值
january_forecast = 10.0
january_params = results_df.loc[results_df['Month'] == 1, 'Parameters'].values[0]
print(january_params)
corrected_january_forecast = correct_forecast(1, january_params, january_forecast)
print(f"Original January forecast: {january_forecast}")
print(f"Corrected January forecast: {corrected_january_forecast}")