In [0]:
import os
import logging
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import pytorch_lightning as pl
import torch.optim as optimpip
import pdb
from torch.nn import functional as F
from pytorch_lightning import Trainer
from pathlib import Path
from torch.utils.data import DataLoader, IterableDataset

In [0]:
class CoolDataset(IterableDataset):
    def __init__(self, dir_path: str, seq_length: int = 120, event_col="event_heelstrike"):
        # this should go in the csv, make sure you change the open_csv then to automatically read headers
        self.colnames = ["marker_{}".format(i) for i in range(74)]
        self.colnames[-1] = "event_heelstrike"
        self.colnames[-2] = "event_footoff"
        
        self.input_cols = ["marker_1", "marker_13"]
        self.event_col = event_col
        super().__init__()
        self.files = tuple(Path(dir_path).glob("**/*.csv"))
        assert seq_length % 2 == 0, "Please pass an even seq length"
        self.seq_length = seq_length

    def __iter__(self):
        # what file and what event in that file have we processed last?
        self.file_nr = 0
        self.event_in_file = 0
        return self

    def __next__(self):
        # can be cached, might be inefficient right now
        df = self.read_file(self.files[self.file_nr])
        events = df[df[self.event_col] == 1]
        if self.event_in_file >= len(events):
            logging.info("File is complete. Going to new file")
            self.file_nr += 1
            if self.file_nr < len(self.files):
                self.event_in_file = 0
                return next(self)
            else:
                # processed the last file, we are done
                raise StopIteration
        else:
            # just get the next event in this file
            event_frame = events.iloc[self.event_in_file].name
            inp, out = self.sample_seq_around_event_frame(df, event_frame)
            self.event_in_file += 1
        return inp, out

    def sample_seq_around_event_frame(self, df, event_idx):
        """returns a dataframe of shape (self.seq_length, number of markers/input) + event_col"""
        random_comp = 5  # random.random() uniform random number between a and b
        low_idx = event_idx - self.seq_length // 2 + random_comp  # but make sure its not lower than 0
        high_idx = event_idx + self.seq_length // 2 + random_comp  # but make sure it s not higher than len of the series
        inp = df.iloc[low_idx:high_idx, :][self.input_cols]
        out = df.iloc[low_idx:high_idx][self.event_col]
        # assert inp.shape[0] == len(out) == self.seq_length
        return inp, out

    def read_file(self, f):
        # asserts maybe
        df = pd.read_csv(open(f, "r"))
        df.columns = self.colnames
        return df

    def count_events_per_file(self):
        return [(df[self.event_col] == 1).sum() for df in self.dataset]

In [0]:
class CoolSystem(pl.LightningModule):

    def __init__(self, input_dim, hidden_dim, batch_size, output_dim=1,num_layers=2):
        super(CoolSystem, self).__init__()
        # configs
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_layers = num_layers

        # model construct
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
        self.lstm = nn.LSTM(self.hidden_dim, output_dim)

    def init_hidden(self):
        # This is what we'll initialise our hidden state as
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))

    def forward(self, input):
        # Forward pass through LSTM layer
        # shape of lstm_out: [input_size, batch_size, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both
        # have shape (num_layers, batch_size, hidden_dim).
        lstm_out, self.hidden = self.lstm(input.view(len(input), self.batch_size, -1))

        # Only take the output from the final timetep
        # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
        y_pred = self.linear(lstm_out[-1].view(self.batch_size, -1))
        return y_pred.view(-1)

    def training_step(self, batch, batch_idx):
        # REQUIRED
        x, y = batch
        y_hat = self.forward(x)
        loss = F.cross_entropy(y_hat, y)
        tensorboard_logs = {'train_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}

    def test_step(self, batch, batch_idx):
        # OPTIONAL
        x, y = batch
        y_hat = self.forward(x)
        return {'test_loss': F.cross_entropy(y_hat, y)}

    def configure_optimizers(self):
        # REQUIRED
        # can return multiple optimizers and learning_rate schedulers
        # (LBFGS it is automatically supported, no need for closure function)
        return torch.optim.Adam(self.parameters(), lr=0.02)

    @pl.data_loader
    def train_dataloader(self):
        # REQUIRED
        return DataLoader(CoolDataset(Path("./train")), batch_size=self.batch_size)

    @pl.data_loader
    def test_dataloader(self):
        # OPTIONAL
        return DataLoader(CoolDataset(Path("./test"), batch_size=self.batch_size))

In [0]:
if __name__ == '__main__':
    # ds = CoolDataset(r"P:\Projects\NCM\NCM_EXP\NCM_STM\NCM_HRX_Walking")
    pdb.set_trace()
    ds = CoolDataset(r"/content/drive/My\ Drive/Colab\ Notebooks/LMBTrain/data/csv/test")
    for inp, out in ds:
        print(out[out == 1])

    model = CoolSystem(30,4,32,1,3)

    loss_fn = torch.nn.MSELoss(size_average=False)
    optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)

    #####################
    # Train model
    #####################

    hist = np.zeros(num_epochs)

    for t in range(num_epochs):
        # Clear stored gradient
        model.zero_grad()

        # Initialise hidden state
        # Don't do this if you want your LSTM to be stateful
        model.hidden = model.init_hidden()

        # Forward pass
        y_pred = model(X_train)

        loss = loss_fn(y_pred, y_train)
        if t % 100 == 0:
            print("Epoch ", t, "MSE: ", loss.item())
        hist[t] = loss.item()

        # Zero out gradient, else they will accumulate between epochs
        optimiser.zero_grad()

        # Backward pass
        loss.backward()

        # Update parameters
        optimiser.step()

    """