# Directory

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
%cd /content/drive/My\ Drive/Colab\ Notebooks

/content/drive/My Drive/Colab Notebooks


# Libraries

In [0]:
# Importing Libraries and Packages
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset

# calculate train time, writing train data to files etc.
import os
import pandas as pd
import numpy as np
from pathlib import Path
from scipy import signal
import pdb

# Dataset

In [0]:
# Extracting data from csv files
class CoolDataset(Dataset):

    # Initialise your data, download etc
    def __init__(self, dir_path: str, input_size: str, convolution=True):
        super().__init__()

        self.files = tuple(Path(dir_path).glob("**/*.csv"))
        self.input_size = input_size
        self.window = signal.gaussian(8, std=3)
        self.convolution = convolution

    def __len__(self):
        return len(self.files)


    def __getitem__(self, idx):
        if idx < len(self.files):
            data, filename = self.read_file(self.files[idx])

            indx = data.iloc[:, 0].to_numpy()
            input = data.iloc[0:, 0:self.input_size].to_numpy()
            output = data.iloc[:]['FS'].to_numpy()

            if self.convolution:
                output = signal.convolve(output, self.window, mode='same')

            input_data = torch.as_tensor(torch.from_numpy(input).float())
            output_data = torch.as_tensor(torch.from_numpy(output).float())
            assert input_data.shape[0] == output_data.shape[0]
        return indx, input_data, output_data, filename

        
    def read_file(self, f):

        df = pd.read_csv(open(f, "r"))
        fname = os.path.basename(f)
        if fname[0:2] == 'RT':
          df = df.drop(['ID','LHEE_X','LHEE_Y',	'LHEE_Z',	'LTOE_X',	'LTOE_Y',	'LTOE_Z',	'LHLX_X',	'LHLX_Y',	'LHLX_Z',	'LPMT5_X',	'LPMT5_Y',	'LPMT5_Z',
                      'LPMT1_X',	'LPMT1_Y',	'LPMT1_Z',	'LDMT1_X',	'LDMT1_Y',	'LDMT1_Z',	'LDMT5_X',	'LDMT5_Y',	'LDMT5_Z',	'LVHEE_X',	'LVHEE_Y',	
                      'LVHEE_Z',	'LVTOE_X',	'LVTOE_Y',	'LVTOE_Z',	'LVHLX_X','LVHLX_Y',	'LVHLX_Z',	'LVPMT5_X',	'LVPMT5_Y',	'LVPMT5_Z',	'LVPMT1_X',	
                      'LVPMT1_Y',	'LVPMT1_Z',	'LVDMT1_X',	'LVDMT1_Y',	'LVDMT1_Z',	'LVDMT5_X',	'LVDMT5_Y',	'LVDMT5_Z','SACR_LHEE',	'SACR_LTOE',	
                      'SACR_LHLX',	'SACR_LPMT5',	'SACR_LPMT1',	'SACR_LDMT1',	'SACR_LDMT5','RVDMT5_X','RVPMT5_X','RVDMT1_X','RVPMT1_X','RVHLX_X','RVTOE_X','RVHEE_X'],axis = 1)
        elif fname[0:2] == 'LT':
          df = df.drop(['ID','RHEE_X','RHEE_Y',	'RHEE_Z',	'RTOE_X',	'RTOE_Y',	'RTOE_Z','RHLX_X',	'RHLX_Y',	'RHLX_Z',	'RPMT5_X',	'RPMT5_Y',	'RPMT5_Z',
                      'RPMT1_X',	'RPMT1_Y',	'RPMT1_Z',	'RDMT1_X',	'RDMT1_Y','RDMT1_Z',	'RDMT5_X',	'RDMT5_Y',	'RDMT5_Z',	'RVHEE_X',	'RVHEE_Y',	
                      'RVHEE_Z',	'RVTOE_X',	'RVTOE_Y',	'RVTOE_Z',	'RVHLX_X','RVHLX_Y',	'RVHLX_Z',	'RVPMT5_X',	'RVPMT5_Y',	'RVPMT5_Z',	'RVPMT1_X',	
                      'RVPMT1_Y',	'RVPMT1_Z',	'RVDMT1_X',	'RVDMT1_Y',	'RVDMT1_Z',	'RVDMT5_X',	'RVDMT5_Y',	'RVDMT5_Z','SACR_RHEE',	'SACR_RTOE',	
                      'SACR_RHLX',	'SACR_RPMT5',	'SACR_RPMT1',	'SACR_RDMT1',	'SACR_RDMT5','LVDMT5_X','LVPMT5_X','LVDMT1_X','LVPMT1_X','LVHLX_X','LVTOE_X','LVHEE_X'],axis = 1)
        return df,fname

# Model

In [0]:
class Network(nn.Module):
    # TO DO
    def __init__(self, config):
        super(Network, self).__init__()

        # Model construct Configuration
        self.input_size = config.input_size
        self.hidden_size = config.hidden_size
        self.output_size = config.output_size
        self.batch_size = config.batch_size
        self.num_layers = config.num_layers
        self.drop_out = config.drop_out
        self.device = config.device

        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, dropout=self.drop_out, batch_first=True,bidirectional=True)
        self.linear = nn.Linear(self.hidden_size * 2, self.output_size)

    def forward(self, x):
        hidden, cell = self.init_hidden()
        out, (hn, cn) = self.lstm(x, (hidden, cell))
        logits = self.linear(out)

        return logits[:, :, -1]

    def init_hidden(self):
        weight = next((self.parameters())).data

        hidden, cell = (weight.new(self.num_layers * 2, self.batch_size, self.hidden_size).zero_().to(self.device),
                        weight.new(self.num_layers * 2, self.batch_size, self.hidden_size).zero_().to(self.device))
        return hidden, cell


# Peak Detection

In [0]:
class peak_detection:

    def peak_comp(self, annotated, predicted):
        dist = []

        if len(predicted) == 0 or len(annotated) == 0:
            return -1

        # if len(predicted) != len(annotated):
        #     return -1

        for a in annotated:
            dist = dist + [min(np.abs(predicted - a))]

        if not len(dist):
            return -1

        return min(dist)

    def width_comp(self, annotated, predicted):
        width = []
        pred_diff = np.abs(predicted[3]-predicted[2])
        true_diff = np.abs(annotated[3]-annotated[2])

        if len(true_diff) == 0 or len(pred_diff) == 0:
            return -1

        # if len(predicted) != len(annotated):
        #     return -1

        for a in true_diff:
            width = width + [min(np.abs(a - pred_diff))]

        if not len(width):
            return -1

        return min(width)

    def eval_prediction(self, y_pred, y_true, filename, plot=True, shift=0):
        sdist = []
        swidth = []

        peakind, _ = signal.find_peaks(y_pred,0.9)
        peakind_true, _ = signal.find_peaks(y_true,0.9)
        results = signal.peak_widths(y_pred, peakind, rel_height=0.5)
        results_true = signal.peak_widths(y_true, peakind_true, rel_height=0.5)

        for k in peakind:
            if plot:
                plt.axvline(x=k)
        sdist.append(self.peak_comp(self, peakind_true, [k + shift for k in peakind]))
        swidth.append(self.width_comp(self, results_true, [k for k in results]))

        if plot:
            plt.plot(y_pred)
            plt.plot(y_true)
            plt.title(filename)
            axes = plt.gca()
            axes.set_xlim([0, y_true.shape[0]])
            my_file = f"{filename[0][0:-9]}"
            plt.savefig(os.path.join(self.png_dir, my_file))
            plt.close()

        return sdist, swidth

    def plot_stats(self, sdist,filename):
        plt.hist(sdist, 100, [0, 100])
        filtered = [k for k in sdist if k >= 0]

        def off_by(threshold, filtered):
            ob = [k for k in filtered if k <= threshold]
            nel = float(len(filtered))
            print("<= %d: %f" % (threshold, len(ob) / float(nel)))

        print("Error distribution:")
        off_by(1, filtered)
        off_by(3, filtered)
        off_by(5, filtered)
        off_by(10, filtered)
        off_by(60, filtered)
        print("Mean distance: %f" % (np.mean(filtered)))
        plt.savefig(os.path.join(self.png_dir,f"{filename[0][0:-9]}_distance_error.png"))
        plt.close()

    def plot_width(self, swidth,filename):
        plt.hist(swidth, 100, [0, 100])
        filtered = [k for k in swidth if k >= 0]

        def off_by(threshold, filtered):
            ob = [k for k in filtered if k <= threshold]
            nel = float(len(filtered))
            print("<= %d: %f" % (threshold, len(ob) / float(nel)))

        print("Width Error distribution:")
        off_by(1, filtered)
        off_by(3, filtered)
        off_by(5, filtered)
        off_by(10, filtered)
        off_by(60, filtered)
        print("Mean Width: %f" % (np.mean(filtered)))
        plt.savefig(os.path.join(self.png_dir,f"{filename[0][0:-9]}_Width_error.png"))
        plt.close()

# Training

In [0]:
class Trainer:

    def __init__(self, model, config):

        # System configuration
        self.device = config.device

        # Model Construction
        self.model = Network(config).float()
        self.model.load_state_dict(model)
        self.model.to(self.device)
        print(self.model)

        # Peak detection and Evaluation
        self.eval_prediction = peak_detection.eval_prediction
        self.peak_comp = peak_detection.peak_comp
        self.width_comp = peak_detection.width_comp
        self.plot_stats = peak_detection.plot_stats
        self.plot_width = peak_detection.plot_width
        self.output_dir = config.output_dir
        self.png_dir = config.png_dir
        self.globaliter = 0

        # DataLoader
        self.test_loader = DataLoader(CoolDataset(r"data/iteration3/test/", config.input_size,convolution=True),batch_size=config.batch_size, drop_last=True, shuffle=False)

    def test(self):

        self.model.eval()
        with torch.no_grad():

            for indx, data, target,filename in self.test_loader:
                error_dist = []
                error_width = []

                data, target=  data.to(self.device), target.to(self.device)

                predictions = self.model(data.float())
                pred = torch.sigmoid(predictions)
                print(filename)
                for i in range(0, pred.shape[0]):
                    dist, width = (self.eval_prediction(self, pred[i], target[i], filename))
                    error_dist.extend(dist)
                    error_width.extend(width)
                    

                self.plot_stats(self, error_dist,filename)
                self.plot_width(self, error_width,filename)

                dist_file = np.column_stack((error_dist, filename))
                width_file = np.column_stack((error_width, filename))
                df_dist = pd.DataFrame(data=dist_file, )
                df_width = pd.DataFrame(data=width_file, )
                df_dist.columns = ["Error in distance", "FileName"]
                df_width.columns = ["Error in width", "FileName"]
                my_file = f"{filename[0][0:-9]}"
                df_dist.to_csv(os.path.join(self.output_dir, f"{my_file}_dist.csv"))
                df_width.to_csv(os.path.join(self.output_dir, f"{my_file}_width.csv"))

                for i in range(0, predictions.shape[0]):
                    indx_pred_target = np.transpose(np.row_stack((indx[i], pred[i], target[i])))
                    df = pd.DataFrame(data=indx_pred_target, )
                    df.columns = ["indx", "pred", "true_output"]
                    df.to_csv(os.path.join(self.output_dir, f"{my_file}.csv"))

                self.globaliter += 1

            

# Main

In [0]:
class Config:

    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

In [9]:
def main(loaded_model, model_config):
    trainer = Trainer(loaded_model,  model_config)
    trainer.test()

if __name__ == '__main__':

    if torch.cuda.is_available():
        device = torch.device("cuda")
        print("Running on the GPU")
    else:
        device = torch.device("cpu")
        print("Running on the CPU")

    "Model"
    model_files = tuple(Path(r'data/TrainedModel/FS').glob("**/*pt"))
    model_name = os.path.basename((model_files[0]))
    print(model_name)
    loaded_model = torch.load(r'data/TrainedModel/FS' + model_name, map_location=torch.device('cpu'))
    
    model_config = Config(
          device=device,
          input_size=42,
          batch_size=1,
          hidden_size=512,
          num_layers=5,
          lr=0.0001,
          drop_out=0.2,
          output_size=1,
          output_dir=r'data/output/csv/',
          png_dir=r'data/output/pngs/',
      )
    main(loaded_model, model_config)


Running on the CPU
FS-1-SP-13-WF-42-IS-64-BS-128-SL-512-HS-5-NL-0.001-LR-200-epochs-FScheckpoint.pt


FileNotFoundError: ignored