In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/DSML/DACON/LG Aimers/3기/Online

/content/drive/MyDrive/DSML/DACON/LG Aimers/3기/Online


In [3]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split

In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [5]:
train_data = pd.read_csv('./data/train.csv').drop(columns=['ID', '제품'])

In [6]:
CFG = {
    'TRAIN_WINDOW_SIZE':7,
    'PREDICT_SIZE':21,
    'EPOCHS':10,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':2048,
    'SEED':50
}

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [8]:
# Data Scaling
scale_max_dict = {}
scale_min_dict = {}

for idx in tqdm(range(len(train_data))):
    maxi = np.max(train_data.iloc[idx,4:])
    mini = np.min(train_data.iloc[idx,4:])

    if maxi == mini :
        train_data.iloc[idx,4:] = 0
    else:
        train_data.iloc[idx,4:] = (train_data.iloc[idx,4:] - mini) / (maxi - mini)

    scale_max_dict[idx] = maxi
    scale_min_dict[idx] = mini

  0%|          | 0/15890 [00:00<?, ?it/s]

In [9]:
# Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ['대분류', '중분류', '소분류', '브랜드']

for col in categorical_columns:
    label_encoder.fit(train_data[col])
    train_data[col] = label_encoder.transform(train_data[col])

In [10]:
class CustomDataset(Dataset):
    def __init__(self, data, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE'], is_inference=False):
        self.data = data.values # convert DataFrame to numpy array
        self.train_size = train_size
        self.predict_size = predict_size
        self.window_size = self.train_size + self.predict_size
        self.is_inference = is_inference

    def __len__(self):
        if self.is_inference:
            return len(self.data)
        else:
            return self.data.shape[0] * (self.data.shape[1] - self.window_size - 3)

    def __getitem__(self, idx):
        if self.is_inference:
            # 추론 시
            encode_info = self.data[idx, :4]
            window = self.data[idx, -self.train_size:]
            input_data = np.column_stack((np.tile(encode_info, (self.train_size, 1)), window))
            return input_data
        else:
            # 학습 시
            row = idx // (self.data.shape[1] - self.window_size - 3)
            col = idx % (self.data.shape[1] - self.window_size - 3)
            encode_info = self.data[row, :4]
            sales_data = self.data[row, 4:]
            window = sales_data[col : col + self.window_size]
            input_data = np.column_stack((np.tile(encode_info, (self.train_size, 1)), window[:self.train_size]))
            target_data = window[self.train_size:]
            return input_data, target_data

In [11]:
# CustomDataset 인스턴스 생성
dataset = CustomDataset(train_data)

# 전체 데이터셋의 크기
total_size = len(dataset)

# 분리할 데이터셋의 크기 계산
train_size = int(total_size * 0.8)
val_size = total_size - train_size

# random_split 함수를 사용해 데이터셋 분리
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# DataLoader 인스턴스 생성
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

In [12]:
class TemporalAttention(nn.Module):
    def __init__(self, hidden_size):
        super(TemporalAttention, self).__init__()
        self.query_transform = nn.Linear(hidden_size, hidden_size)
        self.key_transform = nn.Linear(hidden_size, hidden_size)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        query = self.query_transform(x)
        key = self.key_transform(x)

        scores = torch.matmul(query, key.transpose(-2, -1))
        attention_weights = self.softmax(scores)
        output = torch.matmul(attention_weights, x)

        return output

In [17]:
class BaseModelWithTemporalAttention(nn.Module):
    def __init__(self, input_size=5, hidden_size=1024, output_size=CFG['PREDICT_SIZE']):
        super(BaseModelWithTemporalAttention, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.temporal_attention = TemporalAttention(hidden_size)  # Add temporal attention
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size//2),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden_size//2, output_size)
        )
        self.actv = nn.ReLU()

    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)

        lstm_out, _ = self.lstm(x, hidden)

        # Apply temporal attention
        attention_output = self.temporal_attention(lstm_out)

        # Only use the last output sequence from attention mechanism
        last_output = attention_output[:, -1, :]

        output = self.actv(self.fc(last_output))
        return output.squeeze(1)

    def init_hidden(self, batch_size, device):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device),
                torch.zeros(1, batch_size, self.hidden_size, device=device))

In [18]:
def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None

    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        train_mae = []
        for X, Y in tqdm(iter(train_loader)):
            X = X.float().to(device)
            Y = Y.float().to(device)

            optimizer.zero_grad()

            output = model(X)
            loss = criterion(output, Y)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        val_loss = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')

        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            print('Model Saved')
    return best_model

In [19]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []

    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.float().to(device)
            Y = Y.float().to(device)

            output = model(X)
            loss = criterion(output, Y)

            val_loss.append(loss.item())
    return np.mean(val_loss)

In [20]:
model = BaseModelWithTemporalAttention()
print(model)
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
infer_model = train(model, optimizer, train_loader, val_loader, device)

BaseModelWithTemporalAttention(
  (lstm): LSTM(5, 1024, batch_first=True)
  (temporal_attention): TemporalAttention(
    (query_transform): Linear(in_features=1024, out_features=1024, bias=True)
    (key_transform): Linear(in_features=1024, out_features=1024, bias=True)
    (softmax): Softmax(dim=-1)
  )
  (fc): Sequential(
    (0): Linear(in_features=1024, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=512, out_features=21, bias=True)
  )
  (actv): ReLU()
)


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.02722] Val Loss : [0.03025]
Model Saved


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.02290] Val Loss : [0.01859]
Model Saved


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.01950] Val Loss : [0.01896]


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.01907] Val Loss : [0.01911]


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.01882] Val Loss : [0.01880]


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [6] Train Loss : [0.01871] Val Loss : [0.01815]
Model Saved


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [7] Train Loss : [0.01864] Val Loss : [0.01815]


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [8] Train Loss : [0.01849] Val Loss : [0.01789]
Model Saved


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [9] Train Loss : [0.01843] Val Loss : [0.01787]
Model Saved


  0%|          | 0/2682 [00:00<?, ?it/s]

  0%|          | 0/671 [00:00<?, ?it/s]

Epoch : [10] Train Loss : [0.01838] Val Loss : [0.01788]


In [21]:
test_dataset = CustomDataset(data=train_data, is_inference=True)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

In [22]:
def inference(model, test_loader, device):
    predictions = []

    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.float().to(device)

            output = model(X)

            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()

            predictions.extend(output)

    return np.array(predictions)

In [23]:
pred = inference(infer_model, test_loader, device)

  0%|          | 0/8 [00:00<?, ?it/s]

In [24]:
for idx in range(len(pred)):
    pred[idx, :] = pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]

# 결과 후처리
pred = np.round(pred, 0).astype(int)

In [25]:
pred.shape

(15890, 21)

In [26]:
submit = pd.read_csv('./data/sample_submission.csv')

In [27]:
submit.iloc[:,1:] = pred
submit.head()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2,2,0,0,0,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
3,3,0,0,1,1,1,1,1,2,2,...,2,2,2,2,2,2,2,2,2,2
4,4,0,0,0,0,0,0,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [28]:
submit.to_csv('./data/hidden_90_Temporal.csv', index=False)