Datawhale 笔记

市场博弈价格评估

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.linear_model import LinearRegression

base_path = Path(r"D:\aliyun\Round_1 data_new\Round_1 data_new")  # 确保数据都放在同级的data目录下

# 读取市场数据
electricity_price = pd.read_csv(base_path / "electricity price.csv")
# 读取市场主体（各发电机组）数据
unit = pd.read_csv(base_path / "unit.csv")
sample_submit = electricity_price[electricity_price["clearing price (CNY/MWh)"].isna()].drop(columns="demand")
sample_submit.to_csv(base_path / "sample_submit.csv", index=False)

# 将day和time列合并成timestamp列，便于提取时间戳特征
electricity_price["timestamp"] = pd.to_datetime(
    electricity_price["day"] + " " + electricity_price["time"].str.replace("24:00:00", "00:00"))

# 处理24:00:00的情况，即表示第二天的00:00:00
mask = electricity_price['timestamp'].dt.time == pd.Timestamp('00:00:00').time()

# 需要将这些行的日期部分加一天
electricity_price.loc[mask, 'timestamp'] += pd.Timedelta(days=1)

# 设置列的顺序，同时去除day和time列
electricity_price = electricity_price[["timestamp", "demand", "clearing price (CNY/MWh)"]]
unit['coal consumption (g coal/KWh)']=unit['coal consumption (g coal/KWh)']*(1-unit['power consumption rate (%)']/100)

sorted_unit = unit.sort_values("coal consumption (g coal/KWh)")  # 按照一度电的耗煤量（近似为边际成本）降序排序
sorted_unit['used_time']=0
# # 预先计算 sorted_unit 的累积和
sorted_unit['cumulative_capacity'] = sorted_unit['Capacity（MW）'].cumsum()
sorted_unit


In [None]:
prices = []

# 找到最后一个满足总需求的机组报价
for demand in electricity_price["demand"]:    
    price = sorted_unit[sorted_unit['cumulative_capacity'] >= demand]["coal consumption (g coal/KWh)"].iloc[0]    
    prices.append(price)
    
print(len(prices))
prices[:5]

In [None]:
from sklearn.metrics import mean_squared_error

model = LinearRegression()
# 55392为训练集的长度
train_length = 55392
prices = np.array(prices).reshape(-1, 1)
X= np.log(prices[:train_length])
X1 = np.log(electricity_price["demand"].iloc[:train_length].values.reshape(-1, 1))
y =  electricity_price["clearing price (CNY/MWh)"].iloc[:train_length].values.reshape(-1, 1)
y=y.flatten()
model.fit(X1, y)
y_pred = model.predict(X1)
y_pred = y_pred.flatten()  # 2维矩阵转为1维

mse = mean_squared_error(y_pred, y)
print(mse)

y_pred4 = model.predict(np.log(electricity_price["demand"].iloc[train_length:].values.reshape(-1, 1)))
y_pred4 = y_pred4.flatten()
sample_submit["clearing price (CNY/MWh)"] =np.round(y_pred4,4)
sample_submit.to_csv("submit.csv", index=False)