<a href="https://colab.research.google.com/github/yeonyeo/LG_Aimers/blob/main/08_22_submission.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import glob
import re
import random
import numpy as np
import pandas as pd
from datetime import timedelta

import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

In [2]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
set_seed(42)

In [3]:
LOOKBACK, PREDICT = 28, 7
BATCH_SIZE, EPOCHS = 16, 40
LR = 1e-3
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
def _read_train():
    path = '/content/drive/MyDrive/open/train/train.csv'
    if not os.path.exists(path):
        path = './train.csv'
    df = pd.read_csv(path)
    df['영업일자'] = pd.to_datetime(df['영업일자'])
    return df

In [5]:
def _dow_one_hot(dates: np.ndarray) -> np.ndarray:
    dows = pd.to_datetime(dates).weekday.values if hasattr(dates, "values") else pd.to_datetime(dates).weekday
    out = np.zeros((len(dates), 7), dtype=np.float32)
    for i, d in enumerate(dows):
        out[i, int(d)] = 1.0
    return out

In [6]:
# ✅ 공휴일 제외 요일별 중앙값
def same_dow_median_from_last28(dates_28: np.ndarray, values_28: np.ndarray, horizon=7, holiday_mask=None):
    dates_28 = pd.to_datetime(dates_28)
    if holiday_mask is None:
        holiday_mask = np.zeros(len(dates_28), dtype=bool)

    med = {}
    for dow in range(7):
        mask = (dates_28.weekday == dow) & (~holiday_mask)  # ✅ 공휴일 제외
        arr = values_28[mask]
        if len(arr) > 0:
            med[dow] = float(np.median(arr))
    overall = float(np.median(values_28[~holiday_mask])) if np.any(~holiday_mask) else float(np.median(values_28))

    last_date = dates_28.max()
    preds = []
    for h in range(1, horizon+1):
        dow = int((last_date + timedelta(days=h)).weekday())
        preds.append(med.get(dow, overall))
    return np.array(preds, dtype=np.float32)

In [7]:
def postprocess_from_last28(yhat: np.ndarray, last28_vals: np.ndarray, eps_floor=1.0, cap_quantile=99, cap_scale=1.2):
    yhat = np.maximum(yhat, 0.0)
    yhat = np.where(yhat == 0.0, eps_floor, yhat)
    if len(last28_vals) > 0:
        cap = np.percentile(last28_vals, cap_quantile) * cap_scale
        if np.isfinite(cap) and cap > 0:
            yhat = np.minimum(yhat, cap)
    return yhat

In [8]:
holidays = pd.to_datetime([
    "2023-01-01","2023-03-01","2023-05-05","2023-05-27","2023-08-15",
    "2023-09-28","2023-09-29","2023-09-30","2023-10-03","2023-10-09",
    "2023-12-25","2024-01-01","2024-02-09","2024-02-10","2024-02-11","2024-02-12",
    "2024-03-01","2024-04-10","2024-05-05","2024-05-06","2024-05-15",
    "2024-06-06"
])

In [9]:
def add_features(df):
    df['is_weekend'] = (df['영업일자'].dt.weekday >= 5).astype(int)
    df['is_holiday'] = df['영업일자'].isin(holidays).astype(int)
    return df

In [10]:
class MultiOutputLSTM(nn.Module):
    def __init__(self, input_dim=10, hidden_dim=64, num_layers=2, output_dim=7, dropout=0.1):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1])
        return out

In [11]:
class MultiOutputGRU(nn.Module):
    def __init__(self, input_dim=10, hidden_dim=64, num_layers=2, output_dim=7, dropout=0.1):
        super().__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out[:, -1])
        return out

In [12]:
def train_model(train_df: pd.DataFrame, model_type="lstm"):
    trained = {}
    for key, g in tqdm(train_df.groupby('영업장명_메뉴명'), desc=f'Training {model_type.upper()} (per item)'):
        g = g.sort_values('영업일자').copy()
        if len(g) < LOOKBACK + PREDICT:
            continue

        scaler = MinMaxScaler()
        qty = g[['매출수량']].values.astype(np.float32)
        qty_scaled = scaler.fit_transform(qty)

        dows = _dow_one_hot(g['영업일자'].values)
        feats = np.concatenate([qty_scaled, dows,
                                g[['is_weekend','is_holiday']].values.astype(np.float32)], axis=1)  # (N,10)

        X_list, Y_list = [], []
        for i in range(len(g) - LOOKBACK - PREDICT + 1):
            X = feats[i:i+LOOKBACK]    # (28,10)
            y = qty_scaled[i+LOOKBACK:i+LOOKBACK+PREDICT, 0]
            X_list.append(X)
            Y_list.append(y)

        X_train = torch.tensor(np.stack(X_list)).float().to(DEVICE)
        y_train = torch.tensor(np.stack(Y_list)).float().to(DEVICE)

        if model_type == "lstm":
            model = MultiOutputLSTM(input_dim=10, output_dim=PREDICT).to(DEVICE)
        else:
            model = MultiOutputGRU(input_dim=10, output_dim=PREDICT).to(DEVICE)

        opt = torch.optim.Adam(model.parameters(), lr=LR)
        criterion = nn.MSELoss()

        model.train()
        for epoch in range(EPOCHS):
            idx = torch.randperm(X_train.size(0), device=DEVICE)
            for i in range(0, X_train.size(0), BATCH_SIZE):
                b = idx[i:i+BATCH_SIZE]
                xb, yb = X_train[b], y_train[b]
                pred = model(xb)
                loss = criterion(pred, yb)
                opt.zero_grad()
                loss.backward()
                opt.step()

        trained[key] = {'model': model.eval(), 'scaler': scaler}
    return trained

In [13]:
def predict(test_df, trained_models: dict, model_type: str, test_prefix: str, alpha=0.6):
    results = []
    for key, g in test_df.groupby('영업장명_메뉴명'):
        if key not in trained_models: continue
        g = g.sort_values('영업일자').copy()
        if len(g) < LOOKBACK: continue

        model = trained_models[key]['model']
        scaler = trained_models[key]['scaler']

        last28_vals = g['매출수량'].values[-LOOKBACK:].astype(np.float32)
        last28_dates = pd.to_datetime(g['영업일자']).values[-LOOKBACK:]

        x_qty_scaled = scaler.transform(last28_vals.reshape(-1,1))
        x_dow = _dow_one_hot(last28_dates)
        x_other = g[['is_weekend','is_holiday']].values[-LOOKBACK:].astype(np.float32)
        x_input = np.concatenate([x_qty_scaled, x_dow, x_other], axis=1)[None, ...]

        with torch.no_grad():
            pred_scaled = model(torch.tensor(x_input).float().to(DEVICE)).squeeze(0).cpu().numpy()

        restored = [scaler.inverse_transform([[s]])[0,0] for s in pred_scaled]
        restored = np.maximum(restored, 0.0)

        dow_med = same_dow_median_from_last28(last28_dates, last28_vals, horizon=PREDICT)
        blended = alpha * restored + (1 - alpha) * dow_med
        blended = postprocess_from_last28(blended, last28_vals)

        pred_dates = [f"{test_prefix}+{i+1}일" for i in range(PREDICT)]
        for d, val in zip(pred_dates, blended.tolist()):
            results.append({'영업일자': d, '영업장명_메뉴명': key, '매출수량': val})
    return pd.DataFrame(results)

In [14]:
def ensemble_predict(test_df, trained_lstm, trained_gru, test_prefix):
    lstm_pred = predict(test_df, trained_lstm, "lstm", test_prefix, alpha=0.6)
    gru_pred = predict(test_df, trained_gru, "gru", test_prefix, alpha=0.6)
    merged = lstm_pred.copy()
    merged['매출수량'] = (lstm_pred['매출수량'] + gru_pred['매출수량']) / 2
    return merged

In [15]:
# =====================
# Submission
# =====================
def convert_to_submission_format(pred_df: pd.DataFrame, sample_submission: pd.DataFrame):
    pred_df['영업일자'] = pred_df['영업일자'].astype(str)
    pred_df['영업장명_메뉴명'] = pred_df['영업장명_메뉴명'].astype(str)
    pred_dict = dict(zip(zip(pred_df['영업일자'], pred_df['영업장명_메뉴명']), pred_df['매출수량']))
    final_df = sample_submission.copy()
    for row_idx in final_df.index:
        date = final_df.loc[row_idx, '영업일자']
        for col in final_df.columns[1:]:
            final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
    return final_df

In [16]:
def main():
    train_df = _read_train()                # ✅ train.csv Colab 경로로 읽기
    train_df = add_features(train_df)

    trained_lstm = train_model(train_df, model_type="lstm")
    trained_gru = train_model(train_df, model_type="gru")

    all_preds = []
    test_files = sorted(glob.glob('/content/drive/MyDrive/open/test/TEST_*.csv'))   # ✅ test 파일 경로
    for path in test_files:
        test_df = pd.read_csv(path)
        test_df['영업일자'] = pd.to_datetime(test_df['영업일자'])
        test_df = add_features(test_df)
        test_prefix = re.search(r'(TEST_\d+)', os.path.basename(path)).group(1)
        pred_df = ensemble_predict(test_df, trained_lstm, trained_gru, test_prefix)
        all_preds.append(pred_df)

    full_pred_df = pd.concat(all_preds, ignore_index=True) if len(all_preds) else pd.DataFrame(columns=['영업일자','영업장명_메뉴명','매출수량'])

    sample_path = '/content/drive/MyDrive/open/sample_submission.csv'   # ✅ Colab 경로
    sample = pd.read_csv(sample_path)
    submission = convert_to_submission_format(full_pred_df, sample)
    submission.to_csv('/content/drive/MyDrive/open/0822submission.csv', index=False, encoding='utf-8-sig')  # ✅ 저장경로

if __name__ == "__main__":
    main()



Training LSTM (per item): 100%|██████████| 193/193 [10:24<00:00,  3.23s/it]
Training GRU (per item): 100%|██████████| 193/193 [09:46<00:00,  3.04s/it]
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] =