<a href="https://colab.research.google.com/github/veerendra12/CS598-DL4H-Project/blob/main/notebooks/Configuration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torchvision.transforms as transforms

### Configuration
This notebook serves as central configuration file for entire model pipeline. It includes settings like:


*   Data set and results storage directory paths
*   Hyper parameters like *number of epochs*, *learning rate*, *mini batch size* etc..
*   Tranformations for training and validation
*   And so on...



In [None]:
"""
What % of overall data needs to be sampled for training and validation combined. 
Example: 0.5 indicates, 50% of whole data is randomly sampled for train and validation sets
"""
SAMPLE_RATIO = 0.5 # 100%

"""Base directory for NIH data set"""
BASE_DIR = '/content/DataSets/NIHDataSet/'

"""List of 14 named class labels for NIH CXR data set"""
CLASS_LABELS = ['Atelectasis',
                'Cardiomegaly',
                'Effusion',
                'Infiltration',
                'Mass',
                'Nodule',
                'Pneumonia',
                'Pneumothorax',
                'Consolidation',
                'Edema',
                'Emphysema',
                'Fibrosis',
                'Pleural_Thickening',
                'Hernia']

CONFIG = {
    "RANDOM_SEED": 2022,
    
    "LEARNING_RATE": 1e-4,
    
    "BATCH_SIZE": 16,
    "NUM_EPOCHS": 2,
    "NUM_WORKERS": 2,
    
    "IMAGE_HEIGHT": 256,
    "IMAGE_WIDTH": 256,
    
    "PIN_MEMORY": True,
    "LOAD_MODEL": False,

    "IMAGE_DATA_SET_TYPE": '',
    "MASK_SUFFIX_NO_EXTN": '',
    
    "SAMPLE_RATIO": SAMPLE_RATIO,

    "TEST_TRAIN_SPLIT": 0.2, """Test to train ratio. Example, 0.2 indicates 80% training data and 20% test data"""

    "TRAIN_CSV": "train_" + str(SAMPLE_RATIO) + ".csv",
    "VALIDATION_CSV": "validation_" + str(SAMPLE_RATIO) + ".csv",

    "BASE_DIR": BASE_DIR,
    "IMAGE_DIR": BASE_DIR + "images/",
    "NIH_CXR_DATA_ONEHOT_FULL_CSV": BASE_DIR +'NIH_CXR_Data_OneHot_Full.csv',

    # Mean and Standard deviation of Imagenet set
    "MEAN": [0.485, 0.456, 0.406],
    "STD": [0.229, 0.224, 0.225],
    "CLASS_LABELS": CLASS_LABELS,
    "NUM_CLASSES": len(CLASS_LABELS),
    "SEGMENTATION_BEST_MODEL": '/content/DataSets/LungSegmentationDataset/results/unet_chesxray_mcu_8.pth',
    "FEM1_BEST_MODEL": BASE_DIR + "results/fem-1_nih_2.pth",
    "FEM2_BEST_MODEL": BASE_DIR + "results/fem-2_nih_2.pth"
}

""" Transoformations for NIH CXR Data set during classification training and validation """
NIH_TRANSFORMS = {
        'train': transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Resize(CONFIG['IMAGE_HEIGHT']),
            transforms.CenterCrop(CONFIG['IMAGE_WIDTH']),
            transforms.ToTensor(),
            transforms.Normalize(CONFIG['MEAN'], CONFIG['STD'])
        ]),
        'validation': transforms.Compose([
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.0, 0.0, 0.0], [1.0, 1.0, 1.0])
        ]),
    }

""" Transoformations for Segementation Model """
SEGMENTATION_TRANSOFRM = transforms.Compose([
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.0, 0.0, 0.0], [1.0, 1.0, 1.0])
        ])



CONFIG['NIH_TRANSFORMS'] = NIH_TRANSFORMS
CONFIG['SEGMENTATION_TRANSOFRM'] = SEGMENTATION_TRANSOFRM