# Baseline Setup:

### Architecture
Run pre-training with contrastive learning (Moco framework)
* Use backbone - Pretrained ResNet50
* Fine tune backbone using contrastive learning (Moco) -> augment medical imaging dataset (Pre-text task)
  * Produce: `moco_resnet50_encoder.pth`
* Fine tune ResNet for classification
  * Produce: `finetuned_resnet50_medical.pth`

### Dataset: CheXpert
* Subset: Pneumonia classification only; smaller dataset (to accomodate class imbalance)

### 

In [None]:
# import libraries
import os
import sys
import argparse
from tqdm import tqdm

import numpy as np
import seaborn as sns
import pandas as pd
import math
from copy import deepcopy

%matplotlib inline
import matplotlib.pyplot as plt
from IPython.display import display

import pickle
from datetime import datetime

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import random_split, DataLoader

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score, recall_score

from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.preprocessing import label_binarize

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Collab Needs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Define filepaths to required input (scripts, data) and outputs

In [None]:
# ----------------------------------------------------
# Inputs root
# ----------------------------------------------------

# FP_ROOT_INPUTS = "/content/drive/MyDrive/00 JHU/7_Py_DL/PyTorch_SimCLR_Team_Project/Data_Reduced/"
FP_ROOT="/content/drive/MyDrive/Colab Notebooks/7_Py_DL/FP/"

# ----------------------------------------------------
# Dataset info
# ----------------------------------------------------
# Images zip files
DATASET_PATH_SPLIT = FP_ROOT + "/Data/CheXpert_reduced_dataset_split.zip"

# Labels
TRAIN_LABELS_CSV = FP_ROOT + "/Data/final_project_updated_names_train.csv"
VAL_LABELS_CSV = FP_ROOT + "/Data/final_project_updated_names_val.csv"
TEST_LABELS_CSV = FP_ROOT + "/Data/final_project_updated_names_test.csv"

# ----------------------------------------------------
# Model SRC
# ----------------------------------------------------
ROOT_FP_SRC = FP_ROOT + "src/"

# ----------------------------------------------------
# Outputs
# ----------------------------------------------------
ROOT_ARTIFACT_SAVE = FP_ROOT + "artifacts/"

In [None]:
# Add the project /src to the system path
sys.path.append(ROOT_FP_SRC)

In [None]:
# Import required module code

from moco.train_moco import train_moco
from moco.model_builder import MoCo
from moco.utils import save_state, load_state
# ----------------------------------------------------

import finetune_resnet

## Unzip data

In [None]:
# Unzip the dataset (image) files to /tmp
DATA_DEST_UNZIPPED = "/tmp/CheXpert_dataset/"
!unzip {DATASET_PATH_SPLIT} -d {DATA_DEST_UNZIPPED}

## 1) Run Pre-training - Contrastive Learning

In [None]:
! python src/train_moco.py \
    --csv_path "$SRC_VIDEO_DATA_UNZIPPED" \
    --root_dir "$TRAIN_LABELS_CSV" \
    --artifact_root "$ROOT_ARTIFACT_SAVE" 