# Trenbalon Team: Kaggle Submission Notebook

This notebook demonstrates our solution for the datathon challenge. The goal of the task is to classify images effectively using Swin Transformers, leveraging transfer learning techniques and efficient training strategies.

### Team Members
- **İlker Yetimoğlu**
- **Yusuf Demir**
- **Ahmet Emin Ersoy**

The notebook is organized into sections for better readability and to ensure seamless reproducibility across different environments.

## Environment Setup

In this section, we set up the environment by mounting Google Drive (if applicable) and installing necessary libraries.

In [1]:
!pip install datasets transformers evaluate

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m4.

In [2]:
import os
import numpy as np
from datasets import load_dataset, Features, ClassLabel, Value, Image
from PIL import Image as PILImage
import torch
import evaluate

from transformers import (AutoFeatureExtractor, SwinForImageClassification,
                          Trainer, TrainingArguments, EarlyStoppingCallback)

from torchvision import transforms

from sklearn.model_selection import train_test_split

## Data Loading and Preprocessing

Here, we load the dataset and preprocess it for training. This includes image normalization and resizing using torchvision transforms.

In [None]:
train_csv_path = "datathon-ai-qualification-round/train_data.csv" # YOUR traind_data.csv PATH
test_csv_path = "datathon-ai-qualification-round/test.csv" # YOUR test_data.csv PATH
global_path = "datathon-ai-qualification-round" # YOUR GLOBAL data PATH


cities = ["Istanbul", "Ankara", "Izmir"]


train_dataset = load_dataset("csv", data_files=train_csv_path, split="train")
test_dataset = load_dataset("csv", data_files=test_csv_path, split="train")

def add_image_path_train(examples):
    examples["image"] = [os.path.join(global_path, "train", "train", fname) for fname in examples["filename"]]
    return examples

def add_image_path_test(examples):
    examples["image"] = [os.path.join(global_path, "test", fname) for fname in examples["filename"]]
    return examples

train_dataset = train_dataset.map(add_image_path_train, batched=True)
test_dataset = test_dataset.map(add_image_path_test, batched=True)

def city_to_id(example):
    if example["city"] in cities:
        example["label"] = cities.index(example["city"])
    else:
        pass
    return example

train_dataset = train_dataset.map(city_to_id)

features_train = Features({
    "filename": Value("string"),
    "city": Value("string"),
    "label": ClassLabel(names=cities),
    "image": Image()
})

features_test = Features({
    "filename": Value("string"),
    "city": Value("string"),
    "image": Image()
})

train_dataset = train_dataset.cast(features=features_train)
test_dataset = test_dataset.cast(features=features_test)


In [None]:
labels = train_dataset["label"]

train_indices, val_indices = train_test_split(
    range(len(train_dataset)),
    test_size=0.2,
    stratify=labels,
    random_state=42
)

train_ds = train_dataset.select(train_indices)
val_ds = train_dataset.select(val_indices)

In [None]:
def transform_train(examples):
    images = [train_transform(img.convert("RGB")) for img in examples["image"]]
    examples["pixel_values"] = images
    return examples

def transform_val(examples):
    images = [val_transform(img.convert("RGB")) for img in examples["image"]]
    examples["pixel_values"] = images
    return examples

def transform_test(examples):
    images = [val_transform(img.convert("RGB")) for img in examples["image"]]
    examples["pixel_values"] = images
    return examples

train_ds = train_ds.with_transform(transform_train)
val_ds = val_ds.with_transform(transform_val)
test_dataset = test_dataset.with_transform(transform_test)

## Model Setup
We initialize the Swin Transformer model for image classification using pre-trained weights from Hugging Face.

In [None]:
train_ds = train_ds.remove_columns(["filename", "city"])
val_ds = val_ds.remove_columns(["filename", "city"])
test_dataset = test_dataset.remove_columns(["filename", "city"])

In [None]:
model_name = "microsoft/swin-large-patch4-window7-224"
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)

resize_size = (feature_extractor.size["height"], feature_extractor.size["width"])

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(resize_size, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

val_transform = transforms.Compose([
    transforms.Resize(resize_size),
    transforms.CenterCrop(resize_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

In [None]:
model = SwinForImageClassification.from_pretrained(
    model_name,
    num_labels=len(cities),
    id2label={i: c for i, c in enumerate(cities)},
    label2id={c: i for i, c in enumerate(cities)},
    ignore_mismatched_sizes=True
)

In [None]:
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    results = f1_metric.compute(predictions=preds, references=labels, average="macro")
    macro_f1 = results["f1"]
    return {"macro_f1": macro_f1}

In [None]:
def my_data_collator(features):
    pixel_values = torch.stack([f["pixel_values"] for f in features])
    labels = torch.tensor([f["label"] for f in features])
    return {"pixel_values": pixel_values, "labels": labels}

## Training
The training loop is configured using the Hugging Face `Trainer` API. It supports distributed training and includes early stopping for optimal performance.

In [None]:
batch_size = 32
num_epochs = 15
learning_rate = 5e-5
weight_decay = 0.01

training_args = TrainingArguments(
    output_dir="swin_large_2",
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=200,
    eval_steps=200,
    logging_steps=200,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    greater_is_better=True,
    remove_unused_columns=False,
    report_to="none",
    fp16=True if torch.cuda.is_available() else False,
)
print(f"GPU IS AVALIABLE: {torch.cuda.is_available()}")
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
    data_collator=my_data_collator
)

In [None]:
trainer.train(resume_from_checkpoint=True)

## Evaluation and Results

In [None]:
import os
import pandas as pd
from transformers import AutoModelForImageClassification, AutoFeatureExtractor
from datasets import load_dataset
from PIL import Image
import torch

In [None]:
cities = ["Istanbul", "Ankara", "Izmir"]

test_csv_path = "datathon-ai-qualification-round/test.csv" # YOUR test.csv PATH
test_dataset = load_dataset("csv", data_files=test_csv_path, split="train")
global_path = "datathon-ai-qualification-round" #YOUR GLOBAL data PATH

def add_image_path_test(examples):
    examples["image"] = [os.path.join(global_path, "test", "test", fname) for fname in examples["filename"]]
    return examples

test_dataset = test_dataset.map(add_image_path_test, batched=True)

In [None]:
model_path = "swin_large_2/swin_large_2_best"
model = AutoModelForImageClassification.from_pretrained(model_path)
feature_extractor = AutoFeatureExtractor.from_pretrained(model_path)

model.eval()

In [None]:
def predict_city(example):
    image_path = example["image"]
    image = Image.open(image_path).convert("RGB")


    inputs = feature_extractor(images=image, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_label = torch.argmax(probs, dim=-1).item()

    example["city"] = cities[predicted_label]
    return example

## Submission Preparation

Finally, the predictions are saved in the required format for submission to Kaggle.

In [None]:
test_dataset = test_dataset.map(predict_city)

test_results = test_dataset.to_pandas()

test_results = test_results[["filename", "city"]]
output_csv_path = "swin_large_2/test_with_predictions.csv"
test_results.to_csv(output_csv_path, index=False)

print(f"Sonuçlar {output_csv_path} dosyasına kaydedildi.")
