In [None]:
import os
import pandas as pd
import torch
import torchvision.io as io
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
from tqdm import tqdm

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
labels_df = pd.read_csv('/kaggle/input/bhw-1-dl-2024-2025/bhw1/labels.csv')
categories = sorted(labels_df['Category'].unique())
n_classes = len(categories)

In [None]:
category_to_idx = {category: idx for idx, category in enumerate(categories)}
idx_to_category = {idx: category for category, idx in category_to_idx.items()}

In [None]:
def conv_block(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )

class DenseNet(nn.Module):
    def __init__(self, num_classes):
        super(DenseNet, self).__init__()
        
        self.conv1 = nn.Sequential(
            conv_block(3, 32),
            conv_block(32, 64),
            conv_block(64, 128),
            conv_block(128, 256),
            conv_block(256, 512),
            nn.MaxPool2d(2)
        )
        
        self.conv2 = nn.Sequential(
            conv_block(512, 64),
            conv_block(64, 128),
            conv_block(128, 256),
            conv_block(256, 512),
            conv_block(512, 1024),
            nn.MaxPool2d(2)
        )
        
        self.conv3 = nn.Sequential(
            conv_block(3072, 32),
            conv_block(32, 128),
            conv_block(128, 256),
            conv_block(256, 512),
            conv_block(512, 1024),
            nn.MaxPool2d(2)
        )

        self.сonv4 = nn.Conv2d(13312, 200, kernel_size=1, bias=False)
        self.norm = nn.BatchNorm2d(200)
        self.relu = nn.ReLU(inplace=True)
        self.pool = nn.AdaptiveAvgPool2d(output_size=1)

    def forward(self, x):
        x = self.conv1(x)

        N, C, H, W = x.size()
        unfold1 = F.unfold(x, 2, stride=2).view(N, C * 4, H // 2, W // 2)

        x = self.conv2(x)
        x = torch.cat((x, unfold1), dim=1)
        
        N, C, H, W = x.size()
        unfold2 = F.unfold(x, 2, stride=2).view(N, C * 4, H // 2, W // 2)

        x = self.conv3(x)
        x = torch.cat((x, unfold2), dim=1)

        x = self.conv4(x)
        x = self.norm(x)
        x = self.relu(x)
        x = self.pool(x)

        return x.flatten()

In [None]:
model = DenseNet(200).to(device)
print(model)

In [None]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.2), 
    transforms.RandomVerticalFlip(p=0.2),
    transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 0.5)),
    transforms.RandomRotation(degrees=5),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 
])

val_transform = transforms.Compose([
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [None]:
class TrainDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.img_labels = self.img_labels[self.img_labels['Id'].apply(
            lambda x: os.path.isfile(os.path.join(self.img_dir, x)))]
        self.img_labels['Category'] = self.img_labels['Category'].map(category_to_idx)

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = io.read_image(img_path).float() / 255.0
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, label

In [None]:
class TestDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_ids = [img for img in os.listdir(img_dir) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
        self.img_ids.sort()
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        img_path = os.path.join(self.img_dir, img_id)
        image = io.read_image(img_path).float() / 255.0 
        if self.transform:
            image = self.transform(image)
        return image, img_id

In [None]:
full_train_dataset = TrainDataset(
    annotations_file='/kaggle/input/bhw-1-dl-2024-2025/bhw1/labels.csv', 
    img_dir='/kaggle/input/bhw-1-dl-2024-2025/bhw1/trainval'
)

train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))

In [None]:
train_dataset = TrainDataset(
    annotations_file='/kaggle/input/bhw-1-dl-2024-2025/bhw1/labels.csv', 
    img_dir='/kaggle/input/bhw-1-dl-2024-2025/bhw1/trainval', 
    transform=train_transform
)

val_dataset = TrainDataset(
    annotations_file='/kaggle/input/bhw-1-dl-2024-2025/bhw1/labels.csv', 
    img_dir='/kaggle/input/bhw-1-dl-2024-2025/bhw1/trainval', 
    transform=val_transform
)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=4)

test_dataset = TestDataset(img_dir='/kaggle/input/bhw-1-dl-2024-2025/bhw1/test', transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=4)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)

In [None]:
best_val_accuracy = 0.0
best_model_state = None

num_epochs = 30

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    loop = tqdm(train_loader, desc=f'epoch [{epoch+1}/{num_epochs}] - train')
    for images, labels in loop:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        loop.set_postfix(loss=loss.item(), accuracy=100 * correct / total)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = 100 * correct / total
    print(f'epoch [{epoch+1}/{num_epochs}], loss: {epoch_loss:.4f}, accuracy: {epoch_acc:.2f}%')
    
    model.eval()
    val_running_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        loop = tqdm(val_loader, desc=f'epoch [{epoch+1}/{num_epochs}] - val')
        for images, labels in loop:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            val_running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            
            loop.set_postfix(loss=loss.item(), accuracy=100 * val_correct / val_total)
            
    val_epoch_loss = val_running_loss / len(val_dataset)
    val_epoch_acc = 100 * val_correct / val_total
    print(f'val_loss: {val_epoch_loss:.4f}, val_accuracy: {val_epoch_acc:.2f}%')
    
    if val_epoch_acc > best_val_accuracy:
        best_val_accuracy = val_epoch_acc
        best_model_state = model.state_dict()
        torch.save(best_model_state, 'best_model.pth')
        print(f'val_accuracy: {best_val_accuracy:.2f}%')

model.load_state_dict(best_model_state)

In [None]:
model.eval()
test_predictions = []
test_image_ids = []

with torch.no_grad():
    loop = tqdm(test_loader, desc='Test')
    for images, img_ids in loop:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        predicted_categories = [idx_to_category[p.item()] for p in predicted]
        test_predictions.extend(predicted_categories)
        test_image_ids.extend(img_ids)

In [None]:
sample_submission = pd.read_csv('/kaggle/input/bhw-1-dl-2024-2025/bhw1/sample_submission.csv')

submission = pd.DataFrame({
    'Id': test_image_ids,
    'Category': test_predictions
})

submission = submission.set_index('Id').loc[sample_submission['Id']].reset_index()

submission.to_csv('/kaggle/working/submission.csv', index=False)