<a href="https://colab.research.google.com/github/yyyewon/walkvsrun/blob/main/encoder_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt
import numpy as np

# === 데이터셋 준비 === #
class TimeSeriesDataset(Dataset):
    def __init__(self, data, features, target, seq_length=50):
        self.features = data[features].values
        self.targets = data[target].values
        self.seq_length = seq_length

    def __len__(self):
        return len(self.features) - self.seq_length

    def __getitem__(self, idx):
        X = self.features[idx:idx + self.seq_length]  # [seq_length, input_dim]
        y = self.targets[idx + self.seq_length]
        return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

# === Transformer 기반 분류기 === #
class TransformerClassifier(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=128, nhead=8, num_layers=4):
        super(TransformerClassifier, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        x = self.embedding(x)  # [batch_size, seq_length, d_model]
        x = x.permute(1, 0, 2)  # Transformer expects [seq_length, batch_size, d_model]
        x = self.transformer(x)  # [seq_length, batch_size, d_model]
        x = x[-1]  # Use last token output for classification
        return self.fc(x)

# === 데이터 읽기 === #
file_path = "walkvsrun_sorted.csv"
data = pd.read_csv(file_path)

# === 데이터 전처리 === #
data['full_datetime'] = pd.to_datetime(data['full_datetime'], errors='coerce')
data = data.dropna().reset_index(drop=True)  # 결측값 제거

data['time_diff'] = data['full_datetime'].diff().dt.total_seconds()
data = data[data['time_diff'] <= 1].reset_index(drop=True)  # 1초 이상 차이나는 데이터 제거

data.drop(columns=['date', 'time', 'username', 'wrist', 'full_datetime', 'time_diff'], inplace=True)

features = ['acceleration_x', 'acceleration_y', 'acceleration_z', 'gyro_x', 'gyro_y', 'gyro_z']
target = 'activity'

# 데이터 정규화
data[features] = (data[features] - data[features].mean()) / data[features].std()

# Train/Test Split
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42, stratify=data[target])

# Dataset과 DataLoader 생성
seq_length = 100  # 윈도우 크기 설정
input_dim = len(features)
num_classes = data[target].nunique()

train_dataset = TimeSeriesDataset(train_data, features, target, seq_length)
test_dataset = TimeSeriesDataset(test_data, features, target, seq_length)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# === 모델 학습 === #
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerClassifier(input_dim=input_dim, num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

# 학습 루프
epochs = 10
loss_values, accuracy_values, f1_values = [], [], []

for epoch in range(epochs):
    model.train()
    train_loss = 0

    for X, y in train_loader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    loss_values.append(train_loss / len(train_loader))

    # 모델 평가
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device), y.to(device)
            output = model(X)
            _, predicted = torch.max(output, 1)
            y_true.extend(y.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    accuracy_values.append(accuracy)
    f1_values.append(f1)

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss_values[-1]:.4f}, Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")

# === 시각화 === #
plt.figure(figsize=(10, 6))
plt.plot(range(1, epochs + 1), loss_values, marker='o', linestyle='-', label='Training Loss', color='b')
plt.plot(range(1, epochs + 1), accuracy_values, marker='s', linestyle='--', label='Accuracy', color='g')
plt.plot(range(1, epochs + 1), f1_values, marker='d', linestyle='-.', label='F1 Score', color='r')
plt.xlabel('Epochs')
plt.ylabel('Metric Values')
plt.title('Transformer Training Metrics (Loss, Accuracy, F1 Score)')
plt.legend()
plt.grid()
plt.show()





Epoch 1/10, Loss: 0.6976, Accuracy: 0.5002, F1 Score: 0.3336
Epoch 2/10, Loss: 0.6938, Accuracy: 0.4998, F1 Score: 0.3331
