데이터 전처리

In [11]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch


# 데이터 병합 및 전처리 함수
def preprocess_data(df1, df2, test = False):
    # 데이터 병합
    df = pd.merge(df1, df2, on='건물번호', how='left')
    
    # 필요한 전처리 수행 (예: 결측치 처리, 데이터 형변환 등)
    # 결측치 처리
    df = df.fillna(0)

    # '건물번호' 원-핫 인코딩
    df = pd.get_dummies(df, columns=['건물번호'])
    
    # '일시'를 연, 월, 일, 시간으로 분리
    df['일시'] = pd.to_datetime(df['일시'], format='%Y%m%d %H')
    df['연'] = df['일시'].dt.year
    df['월'] = df['일시'].dt.month
    df['일'] = df['일시'].dt.day
    df['시간'] = df['일시'].dt.hour
    df = df.drop('일시', axis=1)  # '일시' 열 제거



    X = df.drop('전력소비량(kWh)', axis=1, errors='ignore')  # '전력소비량(kWh)' 열이 없는 경우 무시
    
    if test:
        return X
    else:
        y = df['전력소비량(kWh)']
        return X, y





# 예측에 사용할 피처 선택
features = ['건물번호', '기온(C)', '강수량(mm)', '풍속(m/s)', '습도(%)', '일조(hr)', '일사(MJ/m2)', '연면적(m2)', '냉방면적(m2)', '시간']
target = '전력소비량(kWh)'

# 데이터셋 클래스 정의
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


# 데이터 불러오기
df1_train = pd.read_csv('C:\Workspace\power_consumption_comp/data/train.csv')
df2_train = pd.read_csv('C:\Workspace\power_consumption_comp\data/building_info.csv')
df1_test = pd.read_csv('C:\Workspace\power_consumption_comp\data/test.csv')

# 데이터 전처리
X_train, y_train = preprocess_data(df1_train, df2_train)
X_test = preprocess_data(df1_test, df2_train, test = True)  # 테스트 데이터에는 '전력소비량(kWh)'가 없으므로 df2_train을 사용

# 데이터셋 객체 생성
train_dataset = MyDataset(X_train, y_train)
test_dataset = MyDataset(X_test,  torch.zeros(len(X_test))) # 타겟 데이터가 없는 경우 0으로 채움



# DataLoader 객체 생성
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

네트워크

In [None]:
import torch
from torch import nn

class ConvLSTM(nn.Module):
    def __init__(self):
        super(ConvLSTM, self).__init__()
        self.conv = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(input_size=64, hidden_size=64, num_layers=1, batch_first=True)
        self.fc = nn.Linear(64, 1)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.conv(x)
        x = x.transpose(1, 2)  # Swap time and channel dimensions for LSTM
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])  # Use only the last output of the sequence
        return x


학습

In [None]:
import torch
from torch import optim

# Initialize the model, loss function, and optimizer
model = ConvLSTM(X)
criterion = nn.MSELoss()  # For regression tasks
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move the model to the device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Training loop
for epoch in range(100):  # 100 epochs
    model.train()
    for inputs, targets in train_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        valid_loss = 0
        for inputs, targets in test_dataloader:
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            valid_loss += loss.item()

    print(f'Epoch {epoch+1}, Validation Loss: {valid_loss/len(valid_loader)}')


예측