In [None]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split

import src.pytorch.core.dataset
import src.pytorch.core.dataset.kaggle
import src.pytorch.core.models
import src.pytorch.core.models.utils
import src.schemas.climsim
import src.visualization.performance

In [None]:
TESTSET_DATA_PATH = "/home/data/test.arrow"
TESTSET_PREDICTION_WEIGHTS_PATH = "/home/data/sample_submission.arrow"
MODEL_WEIGHTS_PATH = "./model.pt"

In [None]:
df = pd.read_feather("/home/data/subset_train.arrow")
X = df.loc[:, src.schemas.climsim.INPUT_COLUMNS].values
y = df.loc[:, src.schemas.climsim.OUTPUT_COLUMNS].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = src.pytorch.core.models.MLP()
dataset_train = src.pytorch.core.dataset.Dataset(X_train, y_train)
trainloader = torch.utils.data.DataLoader(
    dataset_train,
    batch_size=3072,
    shuffle=True,
    num_workers=4,
    prefetch_factor=4,
    pin_memory=True,
)
dataset_test = src.pytorch.core.dataset.Dataset(X_test, y_test)
valloader = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=3072,
    shuffle=True,
    num_workers=4,
    prefetch_factor=4,
    pin_memory=True,
)

model, best_weights, loss = src.pytorch.core.models.utils.train(
    model=model,
    dataloaders={"Training": trainloader, "Validation": valloader},
    num_epochs=10,
)

src.visualization.performance.loss_curve(loss, close=False)

In [None]:
src.pytorch.core.models.utils.save_weights(model, MODEL_WEIGHTS_PATH)

In [None]:
# Load data
df_submission = pd.read_feather(TESTSET_DATA_PATH)
df_weights = pd.read_feather(TESTSET_PREDICTION_WEIGHTS_PATH)

# Load model
model = src.pytorch.core.models.MLP()
src.pytorch.core.models.utils.load_model(model, MODEL_WEIGHTS_PATH)
model.to(src.env.DEVICE)

In [None]:
src.pytorch.core.dataset.kaggle.output_compressed_parquet(
    model=model,
    df=df_submission,
    weights=df_weights,
)