In [23]:
# Author: MINDFUL
# Purpose: Configuration ( Linear Regression )

#--------------------------------
# Parameters: All Paths (I / O)
#--------------------------------

path_results = "/develop/results/linear_regression/"
path_dataset = "/develop/data/regression/linear/data.csv"

#-------------------------------
# Parameters: Training Model
#-------------------------------

# Config: Validation Rate

valid_rate = 1

# Config: Randomization

seed = 123 

# Config: CPU 

num_workers = 1

# Config: GPU

use_gpu = 0
gpu_list = [0, 1]

# Config: Gradient Descent

batch_size = 16
num_epochs = 100 
learning_rate = 0.01

# Create: Parameter Container 

params = { "path_results": path_results, "path_dataset": path_dataset,
           "valid_rate": valid_rate, "seed": seed, "num_workers": num_workers, "use_gpu": use_gpu, 
           "gpu_list": gpu_list, "batch_size": batch_size, "num_epochs": num_epochs, "learning_rate": learning_rate }


In [24]:

# Before we begin, lets import required libraries (Basic & Custom). Below is a summary their importance:

#--------------------------------
# Import: Basic Python Libraries
#--------------------------------

# 1) os: system operations (e.g., reading and writing files/folders) 
# 2) cv2: Basic image operations (e.g. reading images), image processing, computer vision, and more
# 3) shutil: system operations similar to os but with some extra utility
# 4) Numpy: Linear algebra, data representation (e.g., matrices, vectors), and more
# 5) Matplotlib: Visualizations / Plots
# 6) Torch: Why you are here today (pytorch) 
# 7) Pytorch Lightning: Pytorch but more organized and simplified. This is likely the future of pytorch.

#--------------------------------
# Import: Custom Python Libraries
#--------------------------------

# 1) Logger: Experiment logging tool for results


In [42]:
#--------------------------------
# Import: Basic Python Libraries
#--------------------------------

import os
import cv2
import torch
import shutil
import numpy as np
import matplotlib.pyplot as plt

from typing import Optional
from torch.utils.data import DataLoader
from pytorch_lightning.plugins import DDPPlugin
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning import LightningModule, LightningDataModule

#--------------------------------
# Import: Custom Python Libraries
#--------------------------------

from custom_logger import Logger


ModuleNotFoundError: No module named 'custom_logger'

In [43]:
# Now lets prepare our dataset loader. Its just a CSV file so we can load it appropriately.

In [33]:
#--------------------------------
# Initialize: Custom Dataset 
#--------------------------------

class Dataset:

    def __init__(self, samples, labels):

        self.labels = labels
        self.samples = samples

#--------------------------------
# Load: Training Dataset (.CSV)
#--------------------------------

def load_data(path):

    data_file = open(path, "r")

    data = []
    for line in data_file:
        data.append([ float(ele.strip("\n")) for ele in line.split(",") ])

    data = np.asarray(data)

    samples, labels = data[:, :-1], data[:, -1]

    return Dataset(samples, labels)


In [34]:
# For pytorch, datasets need a specific format in order to take advantage of pytorch utitlies (e.g., dataloaders).
# Because of this, lets make a data module that accomplishes this requirement

In [35]:
class PLDM(LightningDataModule):

    def __init__(self, params):
        
        super().__init__() 
                           
        # Load: Dataset Parameters
                           
        self.data = params["train"]
                           
        # Load: Processing Parameters

        self.batch = params["batch_size"]
        self.workers = params["num_workers"]
        
    #----------------------------
    # Create: Training Datasets 
    #----------------------------
                           
    def setup(self, stage: Optional[str] = None):

        # Create: Pytorch Datasets

        self.train = Dataset(self.data)
        self.valid = Dataset(self.data)

    #----------------------------
    # Create: Training DataLoader
    #----------------------------

    def train_dataloader(self):

        return DataLoader( self.train, batch_size = self.batch,
                           num_workers = self.workers, shuffle = 1, persistent_workers = 1 )

    #----------------------------
    # Create: Validation Loader
    #----------------------------

    def val_dataloader(self):

        return DataLoader( self.valid, batch_size = self.batch,
                           num_workers = self.workers, persistent_workers = 1 )


In [39]:
#--------------------------------
# Initialize: Lightining Model
#--------------------------------

class Linear_Regression(LightningModule):

    def __init__(self, params):
    
        super().__init__()
        
        # Load: Model Parameters
        
        self.max_epochs = params["num_epochs"]
        self.learning_rate = params["learning_rate"]
        
        # Initialize: Regression Model 
        
        self.regressor = torch.nn.Linear(1, 1)
        
    #----------------------------      
    # Create: Objective Function       
    #----------------------------      
                                       
    def objective(self, preds, labels):
    
        # Format: Labels
    
        labels = labels.type(preds.type())
    
        # Objective: Mean Squared Error
        
        cost = nn.MSELoss()
        
    #----------------------------
    # Create: Optimizer Function
    #----------------------------

    def configure_optimizers(self):

        optimizer = torch.optim.Adam(self.parameters(), lr = self.learning_rate)

        return optimizer

    #----------------------------
    # Create: Model Forward Pass
    #----------------------------

    def forward(self, samples):

        return self.regressor(samples)

    #----------------------------
    # Create: Train Cycle (Epoch)
    #----------------------------

    def training_step(self, batch, batch_idx):

        # Load: Data Batch

        samples, labels = batch

        preds = self(samples)

        # Calculate: Training Loss
   
        loss = self.objective(preds, labels)

        return loss

    #----------------------------
    # Run: Post Training Script
    #----------------------------

    def training_epoch_end(self, train_step_outputs):

        # Update: Training Plots

        if(self.current_epoch > 0):

            logger = self.logger.experiment

            logger.log_training_loss(self.current_epoch)

            # Finalize: Learned Features & Metrics ( Video )

            if(self.current_epoch == self.max_epochs - 1):

                logger.finalize()

    #----------------------------
    # Create: Validation Cycle 
    #----------------------------

    def validation_step(self, batch, batch_idx):

        samples, labels = batch

        preds = self(samples)

        return samples, labels, preds

    #----------------------------
    # Run: Post Validation Script
    #----------------------------

    def validation_epoch_end(self, val_step_outputs):

        # Organize: Validation Outputs

        all_samples, all_labels, all_preds = [], [], []

        for group in val_step_outputs:

            samples, labels, preds = group

            all_labels.append( labels )
            all_samples.append( samples )
            all_preds.append( preds.detach() )

        all_preds = torch.cat(all_preds)
        all_labels = torch.cat(all_labels)
        all_samples = torch.cat(all_samples)

        # Logger: Visualizations  

        logger = self.logger.experiment
        logger.log_linear_regression(all_samples, all_labels, all_preds, self.current_epoch)


In [40]:
# Lastly, lets create a "Trainer" that will train and validate our model

In [41]:
# Initialize: Gloabl Seed

seed_everything(seed, workers = True)

# Generate: Synthetic Dataset
     
dataset = load_data(path_dataset)
        
params["train"] = dataset
    
# Initialize: Formatter

dataset = PLDM(params)

# Initialize: Model

model = Linear_Regression(params)

# Initialize: Logger 

logger = Logger(path_results, name = "", version = 0)

# Train: Model

if(use_gpu):

    # Initialize: GPU Trainer

    trainer = Trainer( logger = logger,
                       deterministic = True,
                       default_root_dir = path_results,
                       check_val_every_n_epoch = valid_rate,
                       max_epochs = num_epochs, num_nodes = 1,
                       num_sanity_val_steps = 0, gpus = gpu_list,
                       plugins = DDPPlugin(find_unused_parameters=False, ) )
else:

    # Initialize: CPU Trainer

    trainer = Trainer( logger = logger,
                       deterministic = True,
                       max_epochs = num_epochs,
                       num_sanity_val_steps = 0,
                       default_root_dir = path_results,
                       check_val_every_n_epoch = valid_rate )

trainer.fit(model, dataset)


Global seed set to 123


NameError: name 'Logger' is not defined