In [None]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:64'

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torchvision.models.efficientnet as efficientnet
from tqdm import tqdm
from torch.nn import Linear
from torch.nn.functional import l1_loss
from torch import optim
from torch.utils.data import DataLoader
from statistics import mean
from copy import deepcopy
from utils import (
    Dataset,
    TrainResult,
    TestResult,
    weighted_mse_loss,
    train_transform,
    test_transform
)

In [None]:
torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
df = pd.read_parquet('train.pqt')

In [None]:
samples = []
samples_sizes = [len(df) // 5] * 5
samples_sizes[-1] += len(df) % samples_sizes[-1]
for size in samples_sizes:
    samples.append(df.sample(size, weights=df['weight']))
    df.drop(samples[-1].index, inplace=True)
    samples[-1].reset_index(drop=True, inplace=True)

In [None]:
model = efficientnet.efficientnet_v2_s(weights=efficientnet.EfficientNet_V2_S_Weights.IMAGENET1K_V1)
model.classifier[1] = Linear(model.classifier[1].in_features, 1)

In [None]:
model.requires_grad_(True)
model = model.train().to(device)

all_params = sum(param.numel() for param in model.parameters())
grad_params = sum(param.numel() for param in model.parameters() if param.requires_grad)
print(all_params)
print(grad_params)

In [None]:
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer, batch_size) -> TrainResult:
    model.train()

    size = len(dataloader.dataset)
    size_batches = len(dataloader)
    checkpoint = 100
    running_loss = 0.0
    running_diff = 0.0
    losses = []
    diffs = []
    min_rate = float('inf')
    max_rate = float('-inf')

    for batch, (x, y, w) in enumerate(dataloader):
        x = x.to(device)
        y = y.to(device)
        w = w.to(device)

        pred = model(x)
        loss = loss_fn(pred, y, w)
        diff = l1_loss(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        min_ = pred.detach().flatten().min().item()
        max_ = pred.detach().flatten().max().item()
        if min_ < min_rate:
            min_rate = min_
        if max_ > max_rate:
            max_rate = max_

        running_loss += loss.item()
        running_diff += diff
        is_last_batch = batch + 1 == size_batches
        batch_edge = ((batch + 1) % checkpoint) or checkpoint

        if is_last_batch or batch_edge == checkpoint:
            current = (batch * batch_size) + len(x)
            point_loss = running_loss / batch_edge
            point_diff = running_diff / batch_edge
            losses.append(point_loss)
            diffs.append(point_diff)
            print(f"Avg diff: {point_diff:>1f}, Avg loss: {point_loss:>1f}, Min rate: {min_rate:>1f}, Max rate: {max_rate:>1f}  [{current:>5d}/{size:>5d}]")
            running_loss = 0.0
            running_diff = 0.0
            min_rate = float('inf')
            max_rate = float('-inf')

    return TrainResult(losses, diffs)

In [None]:
def test_loop(dataloader, model, loss_fn) -> TestResult:
    model.eval()

    size = len(dataloader.dataset)
    running_loss = .0
    accuracy = .0
    min_rate = float('inf')
    max_rate = float('-inf')
    max_diff = float('-inf')
    plot_x = np.array([], dtype=np.float32)
    plot_y = np.array([], dtype=np.float32)

    with torch.no_grad():
        for x, y, w in tqdm(dataloader):
            x = x.to(device)
            y = y.to(device)
            w = w.to(device)

            pred = model(x)
            loss = loss_fn(pred, y, w)
            running_loss += loss.item() * y.shape[0]

            diffs = (pred - y).flatten()
            max_diff_ = diffs.absolute().max().item()
            max_diff = max_diff_ if max_diff_ > max_diff else max_diff

            accuracy += diffs.absolute().sum().item()
            min_ = pred.flatten().min().item()
            max_ = pred.flatten().max().item()
            if min_ < min_rate:
                min_rate = min_
            if max_ > max_rate:
                max_rate = max_

            plot_x = np.concatenate([plot_x, y.flatten().cpu().numpy()])
            plot_y = np.concatenate([plot_y, diffs.cpu().numpy()])

    running_loss /= size
    running_diff = accuracy / size
    accuracy = running_diff * 2 if running_diff else 0.0

    df = pd.DataFrame({'rate': plot_x, 'diffs': plot_y})
    df['abs_diffs'] = df['diffs'].copy().abs()
    df['range'] = pd.cut(df['rate'], bins=np.arange(0.0, 1.001, 0.05)).apply(lambda x: x.right)
    df = df.groupby('range', as_index=False).mean()

    return TestResult(accuracy, running_loss, running_diff, min_rate, max_rate, max_diff, df)

In [None]:
batch_size = 8
epochs = []
train_y = []
test_y = []
best_state = {
    'accuracy': float('-inf'),
    'loss': float('inf'),
    'diff': float('inf'),
    'min_rate': float('inf'),
    'max_rate': float('-inf'),
    'max_diff': float('-inf'),
    'epoch': None,
    'state': None
}
results_template = 'Test Results: \n Accuracy: {:>1f}, Avg loss: {:>1f}, Min rate: {:>1f}, Max rate: {:>1f}, Max diff {:>1f}'

In [None]:
for t in range(30):
    print(f"Epoch {t + 1}\n-------------------------------")

    if t == 20:
        optimizer = optim.AdamW(model.parameters(), lr=1e-5)

    train_results = []
    test_results = []
    for k, test_df in enumerate(samples):
        print(f"\nk-{k + 1}")
        train_df = pd.concat([sample for sample in samples if sample is not test_df], ignore_index=True)
        train_data = Dataset(train_df, transforms=train_transform)
        test_data = Dataset(test_df, transforms=test_transform)
        train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
        test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
        train_result = train_loop(train_dataloader, model, weighted_mse_loss, optimizer, batch_size)
        test_result = test_loop(test_dataloader, model, weighted_mse_loss)
        train_results.append(train_result)
        test_results.append(test_result)

    train_result = TrainResult.concat(train_results)
    test_result = TestResult.concat(test_results)
    epochs.append(t)
    train_y.append(mean(train_result.losses))
    test_y.append(test_result.loss)

    if test_result.loss < best_state['loss']:
        best_state['accuracy'] = test_result.accuracy
        best_state['loss'] = test_result.loss
        best_state['diff'] = test_result.diff
        best_state['min_rate'] = test_result.min_rate
        best_state['max_rate'] = test_result.max_rate
        best_state['max_diff'] = test_result.max_diff
        best_state['epoch'] = t + 1
        best_state['state'] = deepcopy(model.state_dict())
        torch.save(model, 'current-EfficientNetV2-S.pth')

    print(
        results_template.format(
            test_result.accuracy,
            test_result.loss,
            test_result.min_rate,
            test_result.max_rate,
            test_result.max_diff
        )
    )
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20,5))
    ax1.plot(epochs, train_y)
    ax1.plot(epochs, test_y)
    ax2.plot(test_result.df['range'], test_result.df['diffs'])
    ax3.plot(test_result.df['range'], test_result.df['abs_diffs'])
    plt.show()