In [1]:
import sys

sys.path.append("../..")

In [3]:
import logging

import torch
import matplotlib.pyplot as plt

from emgrep.datasets.dataloader import Dataloader

In [4]:
logging.basicConfig(level=logging.INFO)

In [21]:
dataloader = Dataloader(
    sec_len=3000,
    positives="subject",
    block_len=300,
    block_stride=300, 
    days=[1, 2], 
    times=[1, 2],
)

In [22]:
ds_sample = dataloader.inter_subject_data(data_path="../../data/01_raw/", subjects=[1, 2])

INFO:root:Getting sequences for 1 at day 1 at time 1.
INFO:root:Getting sequences for 1 at day 1 at time 2.
INFO:root:Getting sequences for 1 at day 2 at time 1.
INFO:root:Getting sequences for 1 at day 2 at time 2.
INFO:root:Getting sequences for 2 at day 1 at time 1.
INFO:root:Getting sequences for 2 at day 1 at time 2.
INFO:root:Getting sequences for 2 at day 2 at time 1.
INFO:root:Getting sequences for 2 at day 2 at time 2.


In [23]:
# get the first sample
sample = ds_sample[0]

anchor, pos, info = sample

x, y = anchor
px, py = pos

info

{'subject': 1, 'day': 1, 'time': 1}

In [29]:
x.shape, y.shape, pos[0].shape, pos[1].shape, info

(torch.Size([10, 300, 16]),
 torch.Size([10, 300]),
 torch.Size([10, 300, 16]),
 torch.Size([10, 300]),
 {'subject': 1, 'day': 1, 'time': 1})

starting from code: https://colab.research.google.com/github/louisfb01/examples/blob/master/colabs/pytorch/Simple_PyTorch_Integration.ipynb#scrollTo=8xZQizxsD-_q

In [52]:
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
from emgrep.models.cpc_model import CPCModel, CPCAR, CPCEncoder

In [48]:
DEBUG = True
USE_WANDB = False
config = dict(
    epochs=5,
    batch_size=8 if DEBUG else 64,
    deterministic=True,
    embed_size=64,
    autoreg_size=64,
    n_autoreg_layers=1,
    learning_rate=1e-4)


In [50]:
if USE_WANDB:
    import wandb
    wandb.login()
    run = wandb.init(project='emg-cpc')
    
if config["deterministic"]:
    # Ensure deterministic behavior
    torch.backends.cudnn.deterministic = True
    random.seed(hash("setting random seeds") % 2**32 - 1)
    np.random.seed(hash("improves reproducibility") % 2**32 - 1)
    torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
    torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [51]:
def train_one_epoch(loader, optimizer, loss_fn, epoch_index, tb_writer):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(loader):
        # Every data instance is an input + label pair
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % 1000 == 999:
            last_loss = running_loss / 1000 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            tb_x = epoch_index * len(training_loader) + i + 1
            tb_writer.add_scalar('Loss/train', last_loss, tb_x)
            running_loss = 0.

    return last_loss

In [34]:
encoder = CPCEncoder(config["embed_size"])
regressive_head = CPCAR(dimEncoded=config["embed_size"], dimOutput=config["_size"], numLayers=1)
model = CPCModel(encoder=encoder, ar=regressive_head)
optimizer = torch.optim.Adam(lr=1e-4)


timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))
epoch_number = 0

EPOCHS = 5

best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number, writer)

    # We don't need gradients on to do reporting
    model.train(False)

    running_vloss = 0.0
    for i, vdata in enumerate(validation_loader):
        vinputs, vlabels = vdata
        voutputs = model(vinputs)
        vloss = loss_fn(voutputs, vlabels)
        running_vloss += vloss

    avg_vloss = running_vloss / (i + 1)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : avg_loss, 'Validation' : avg_vloss },
                    epoch_number + 1)
    writer.flush()

    # Track best performance, and save the model's state
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = 'model_{}_{}'.format(timestamp, epoch_number)
        torch.save(model.state_dict(), model_path)

    epoch_number += 1