In [None]:
import pandas as pd
import numpy as np

In [None]:
# ========== 时间特征 ==========
def add_date_features(df, date_col="date"):
    df[date_col] = pd.to_datetime(df[date_col])
    df["year"] = df[date_col].dt.year
    df["month"] = df[date_col].dt.month
    df["day"] = df[date_col].dt.day
    df["dayofweek"] = df[date_col].dt.dayofweek
    df["weekofyear"] = df[date_col].dt.isocalendar().week.astype(int)
    df["is_weekend"] = df["dayofweek"].isin([5,6]).astype(int)
    return df


    

In [None]:
# ========== 滑动窗口特征 ==========

def add_lag_features(df, group_cols, target_col, lags=[1,7,14]):
    df = df.sort_values(group_cols + ["date"])
    for lag in lags:
        df[f"{target_col}_lag{lag}"] = df.groupby(group_cols)[target_col].shift(lag)
    return df

def add_rolling_features(df, group_cols, target_col, windows=[7,14,28]):
    df = df.sort_values(group_cols + ["date"])
    for win in windows:
        df[f"{target_col}_rollmean{win}"] = (
            df.groupby(group_cols)[target_col].shift(1).rolling(win).mean()
        )
        df[f"{target_col}_rollstd{win}"] = (
            df.groupby(group_cols)[target_col].shift(1).rolling(win).std()
        )
    return df

In [None]:
# ========== 类别特征编码 ==========
from sklearn.preprocessing import LabelEncoder
def label_encode(train, test, cols):
    for col in cols:
        le = LabelEncoder()
        le.fit(pd.concat([train[col], test[col]]))
        train[col] = le.transform(train[col])
        test[col] = le.transform(test[col])
    return train, test

In [None]:
# ========== 外部数据 join 示例 ==========
def merge_external(train, test, oil, holidays, stores):
    # 填补油价缺失
    oil["dcoilwtico"] = oil["dcoilwtico"].fillna(method="ffill")
    train = train.merge(oil, on="date", how="left")
    test = test.merge(oil, on="date", how="left")

    # 假期
    holidays["is_holiday"] = holidays["type"].isin(
        ["Holiday", "Additional", "Bridge"]).astype(int)
    holidays = holidays.groupby("date")["is_holiday"].max().reset_index()
    train = train.merge(holidays, on="date", how="left").fillna({"is_holiday": 0})
    test = test.merge(holidays, on="date", how="left").fillna({"is_holiday": 0})

    # 门店
    train = train.merge(stores, on="store_nbr", how="left")
    test = test.merge(stores, on="store_nbr", how="left")

    return train, test