In [None]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split

import pytorch.core.models
import pytorch.core.models.utils
import schemas.climsim
import visualization.performance

In [None]:
df = pd.read_feather("/home/data/subset_train.arrow")
X = df.loc[:, schemas.climsim.INPUT_COLUMNS].values
y = df.loc[:, schemas.climsim.OUTPUT_COLUMNS].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

class Dataset:
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [None]:
model = pytorch.core.models.MLP()
dataset_train = Dataset(X_train, y_train)
trainloader = torch.utils.data.DataLoader(
    dataset_train,
    batch_size=3072,
    shuffle=True,
    num_workers=4,
    prefetch_factor=4,
    pin_memory=True,
)
dataset_test = Dataset(X_test, y_test)
valloader = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=3072,
    shuffle=True,
    num_workers=4,
    prefetch_factor=4,
    pin_memory=True,
)

model, best_weights, loss = pytorch.core.models.utils.train(
    model=model,
    dataloaders={"Training": trainloader, "Validation": valloader},
    num_epochs=10,
)

visualization.performance.loss_curve(loss, close=False)