# Environment and Preparation

In [1]:
#Hyperparameters
hard_label_weight=0.25
DS_weight=0.5
DI_weight=0.5
T=2

In [2]:
# OS and system path
import os
os.chdir("C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18")

In [3]:
# OS and system path
import sys
sys.path.append('C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18/Surgical_VQA')

In [4]:
#Warning related 
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
#Environments
import argparse
import pandas as pd
from lib2to3.pytree import convert
from torch import nn
from torch import optim
import torch.utils.data
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from transformers import BertTokenizer
from torch.utils.data  import DataLoader
from utils import *
from dataloaders.dataloaderClassification import *
from models.VisualBertClassification import VisualBertClassification
from models.VisualBertResMLPClassification import VisualBertResMLPClassification

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# functions to be used later
def seed_everything(seed=27):
    '''
    Set random seed for reproducible experiments
    Inputs: seed number
    '''
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def train(args, train_dataloader, model, criterion, optimizer, epoch, tokenizer, device):

    model.train()

    total_loss = 0.0
    label_true = None
    label_pred = None
    label_score = None


    for i, (_, visual_features, q, labels) in enumerate(train_dataloader,0):

        # prepare questions
        questions = []
        for question in q: questions.append(question)
        inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)


        # GPU / CPU
        visual_features = visual_features.to(device)
        labels = labels.to(device)

        outputs = model(inputs, visual_features)
        loss = criterion(outputs, labels)

        # zero the parameter gradients
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print statistics
        total_loss += loss.item()

        scores, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)
        label_true = labels.data.cpu() if label_true == None else torch.cat((label_true, labels.data.cpu()), 0)
        label_pred = predicted.data.cpu() if label_pred == None else torch.cat((label_pred, predicted.data.cpu()), 0)
        label_score = scores.data.cpu() if label_score == None else torch.cat((label_score, scores.data.cpu()), 0)

    # loss and acc
    acc, c_acc = calc_acc(label_true, label_pred), calc_classwise_acc(label_true, label_pred)
    precision, recall, fscore = calc_precision_recall_fscore(label_true, label_pred)
    print('Train: epoch: %d loss: %.6f | Acc: %.6f | Precision: %.6f | Recall: %.6f | FScore: %.6f' %(epoch, total_loss, acc, precision, recall, fscore))
    return acc


def validate(args, val_loader, model, criterion, epoch, tokenizer, device, save_output = False):

    model.eval()

    total_loss = 0.0
    label_true = None
    label_pred = None
    label_score = None
    file_names = list()

    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for i, (file_name, visual_features, q, labels) in enumerate(val_loader,0):
            # prepare questions
            questions = []
            for question in q: questions.append(question)
            inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)

            # GPU / CPU
            visual_features = visual_features.to(device)
            labels = labels.to(device)

            outputs = model(inputs, visual_features)
            loss = criterion(outputs,labels)

            total_loss += loss.item()

            scores, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)
            label_true = labels.data.cpu() if label_true == None else torch.cat((label_true, labels.data.cpu()), 0)
            label_pred = predicted.data.cpu() if label_pred == None else torch.cat((label_pred, predicted.data.cpu()), 0)
            label_score = scores.data.cpu() if label_score == None else torch.cat((label_score, scores.data.cpu()), 0)
            for f in file_name: file_names.append(f)

    acc = calc_acc(label_true, label_pred)
    c_acc = 0.0
    precision, recall, fscore = calc_precision_recall_fscore(label_true, label_pred)

    print('Test: epoch: %d loss: %.6f | Acc: %.6f | Precision: %.6f | Recall: %.6f | FScore: %.6f' %(epoch, total_loss, acc, precision, recall, fscore))

    if save_output:
        '''
            Saving predictions
        '''
        if os.path.exists(args.checkpoint_dir + 'text_files') == False:
            os.mkdir(args.checkpoint_dir + 'text_files' )
        file1 = open(args.checkpoint_dir + 'text_files/labels.txt', 'w')
        file1.write(str(label_true))
        file1.close()

        file1 = open(args.checkpoint_dir + 'text_files/predictions.txt', 'w')
        file1.write(str(label_pred))
        file1.close()

        if args.dataset_type == 'med_vqa':
            if args.dataset_cat == 'cat1':
                convert_arr = ['cta - ct angiography', 'no', 'us - ultrasound', 'xr - plain film', 'noncontrast', 'yes', 't2', 'ct w/contrast (iv)', 'mr - flair', 'mammograph', 'ct with iv contrast',
                            'gi and iv', 't1', 'mr - t2 weighted', 'mr - t1w w/gadolinium', 'contrast', 'iv', 'an - angiogram', 'mra - mr angiography/venography', 'nm - nuclear medicine', 'mr - dwi diffusion weighted',
                            'ct - gi & iv contrast', 'ct noncontrast', 'mr - other pulse seq.', 'ct with gi and iv contrast', 'flair', 'mr - t1w w/gd (fat suppressed)', 'ugi - upper gi', 'mr - adc map (app diff coeff)',
                            'bas - barium swallow', 'pet - positron emission', 'mr - pdw proton density', 'mr - t1w - noncontrast', 'be - barium enema', 'us-d - doppler ultrasound', 'mr - stir', 'mr - flair w/gd',
                            'ct with gi contrast', 'venogram', 'mr t2* gradient,gre,mpgr,swan,swi', 'mr - fiesta', 'ct - myelogram', 'gi', 'sbft - small bowel', 'pet-ct fusion']
            elif args.dataset_cat == 'cat2':
                convert_arr = ['axial', 'longitudinal', 'coronal', 'lateral', 'ap', 'sagittal', 'mammo - mlo', 'pa', 'mammo - cc', 'transverse', 'mammo - mag cc', 'frontal', 'oblique', '3d reconstruction', 'decubitus', 'mammo - xcc']
            else:
                convert_arr = ['lung, mediastinum, pleura', 'skull and contents', 'genitourinary', 'spine and contents', 'musculoskeletal', 'heart and great vessels', 'vascular and lymphatic', 'gastrointestinal', 'face, sinuses, and neck', 'breast']
        elif args.dataset_type == 'c80':
            convert_arr = ['no', 'calot triangle dissection', 'yes', '1', '2', 'gallbladder dissection',
                            'clipping cutting', 'gallbladder retraction', '0', 'cleaning coagulation',
                            'gallbladder packaging', 'preparation', '3']
        elif args.dataset_type == 'm18':
            convert_arr = ['kidney', 'Idle', 'Grasping', 'Retraction', 'Tissue_Manipulation',
                            'Tool_Manipulation', 'Cutting', 'Cauterization', 'Suction',
                            'Looping', 'Suturing', 'Clipping', 'Staple', 'Ultrasound_Sensing',
                            'left-top', 'right-top', 'left-bottom', 'right-bottom']

        df = pd.DataFrame(columns=["Img", "Ground Truth", "Prediction"])
        for i in range(len(label_true)):
            df = df.append({'Img': file_names[i], 'Ground Truth': convert_arr[label_true[i]], 'Prediction': convert_arr[label_pred[i]]}, ignore_index=True)

        df.to_csv(args.checkpoint_dir + args.checkpoint_dir.split('/')[1] + '_' + args.checkpoint_dir.split('/')[2] + '_eval.csv')

    return (acc, c_acc, precision, recall, fscore)

In [7]:
# functions to be used later
def validate_18(args, val_loader, model, criterion, epoch, tokenizer, device, save_output = False):

    model.eval()

    total_loss = 0.0
    label_true = None
    label_pred = None
    label_score = None
    file_names = list()

    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for i, (file_name, visual_features, q, labels, _) in enumerate(val_loader,0):
            # prepare questions
            questions = []
            for question in q: questions.append(question)
            inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)

            # GPU / CPU
            visual_features = visual_features.to(device)
            labels = labels.to(device)

            outputs = model(inputs, visual_features)
            loss = criterion(outputs,labels)

            total_loss += loss.item()

            scores, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)
            label_true = labels.data.cpu() if label_true == None else torch.cat((label_true, labels.data.cpu()), 0)
            label_pred = predicted.data.cpu() if label_pred == None else torch.cat((label_pred, predicted.data.cpu()), 0)
            label_score = scores.data.cpu() if label_score == None else torch.cat((label_score, scores.data.cpu()), 0)
            for f in file_name: file_names.append(f)

    acc = calc_acc(label_true, label_pred)
    c_acc = 0.0
    # c_acc = calc_classwise_acc(label_true, label_pred)
    precision, recall, fscore = calc_precision_recall_fscore(label_true, label_pred)

    print('Test: epoch: %d loss: %.6f | Acc: %.6f | Precision: %.6f | Recall: %.6f | FScore: %.6f' %(epoch, total_loss, acc, precision, recall, fscore))

    if save_output:
        '''
            Saving predictions
        '''
        if os.path.exists(args.checkpoint_dir + 'text_files') == False:
            os.mkdir(args.checkpoint_dir + 'text_files' )
        file1 = open(args.checkpoint_dir + 'text_files/labels.txt', 'w')
        file1.write(str(label_true))
        file1.close()

        file1 = open(args.checkpoint_dir + 'text_files/predictions.txt', 'w')
        file1.write(str(label_pred))
        file1.close()

        if args.dataset_type == 'med_vqa':
            if args.dataset_cat == 'cat1':
                convert_arr = ['cta - ct angiography', 'no', 'us - ultrasound', 'xr - plain film', 'noncontrast', 'yes', 't2', 'ct w/contrast (iv)', 'mr - flair', 'mammograph', 'ct with iv contrast',
                            'gi and iv', 't1', 'mr - t2 weighted', 'mr - t1w w/gadolinium', 'contrast', 'iv', 'an - angiogram', 'mra - mr angiography/venography', 'nm - nuclear medicine', 'mr - dwi diffusion weighted',
                            'ct - gi & iv contrast', 'ct noncontrast', 'mr - other pulse seq.', 'ct with gi and iv contrast', 'flair', 'mr - t1w w/gd (fat suppressed)', 'ugi - upper gi', 'mr - adc map (app diff coeff)',
                            'bas - barium swallow', 'pet - positron emission', 'mr - pdw proton density', 'mr - t1w - noncontrast', 'be - barium enema', 'us-d - doppler ultrasound', 'mr - stir', 'mr - flair w/gd',
                            'ct with gi contrast', 'venogram', 'mr t2* gradient,gre,mpgr,swan,swi', 'mr - fiesta', 'ct - myelogram', 'gi', 'sbft - small bowel', 'pet-ct fusion']
            elif args.dataset_cat == 'cat2':
                convert_arr = ['axial', 'longitudinal', 'coronal', 'lateral', 'ap', 'sagittal', 'mammo - mlo', 'pa', 'mammo - cc', 'transverse', 'mammo - mag cc', 'frontal', 'oblique', '3d reconstruction', 'decubitus', 'mammo - xcc']
            else:
                convert_arr = ['lung, mediastinum, pleura', 'skull and contents', 'genitourinary', 'spine and contents', 'musculoskeletal', 'heart and great vessels', 'vascular and lymphatic', 'gastrointestinal', 'face, sinuses, and neck', 'breast']
        elif args.dataset_type == 'c80':
            convert_arr = ['no', 'calot triangle dissection', 'yes', '1', '2', 'gallbladder dissection',
                            'clipping cutting', 'gallbladder retraction', '0', 'cleaning coagulation',
                            'gallbladder packaging', 'preparation', '3']
        elif args.dataset_type == 'm18':
            convert_arr = ['kidney', 'Idle', 'Grasping', 'Retraction', 'Tissue_Manipulation',
                            'Tool_Manipulation', 'Cutting', 'Cauterization', 'Suction',
                            'Looping', 'Suturing', 'Clipping', 'Staple', 'Ultrasound_Sensing',
                            'left-top', 'right-top', 'left-bottom', 'right-bottom']

        df = pd.DataFrame(columns=["Img", "Ground Truth", "Prediction"])
        for i in range(len(label_true)):
            df = df.append({'Img': file_names[i], 'Ground Truth': convert_arr[label_true[i]], 'Prediction': convert_arr[label_pred[i]]}, ignore_index=True)

        df.to_csv(args.checkpoint_dir + args.checkpoint_dir.split('/')[1] + '_' + args.checkpoint_dir.split('/')[2] + '_eval.csv')

    return (acc, c_acc, precision, recall, fscore)

In [8]:
parser = argparse.ArgumentParser(description='VisualQuestionAnswerClassification')

In [9]:
parser.add_argument('--emb_dim',        type=int,   default=300,                                help='dimension of word embeddings.')

_StoreAction(option_strings=['--emb_dim'], dest='emb_dim', nargs=None, const=None, default=300, type=<class 'int'>, choices=None, required=False, help='dimension of word embeddings.', metavar=None)

In [10]:
parser.add_argument('--n_heads',        type=int,   default=8,                                  help='Multi-head attention.')
parser.add_argument('--dropout',        type=float, default=0.1,                                help='dropout')
parser.add_argument('--encoder_layers', type=int,   default=6,                                  help='the number of layers of encoder in Transformer.')

_StoreAction(option_strings=['--encoder_layers'], dest='encoder_layers', nargs=None, const=None, default=6, type=<class 'int'>, choices=None, required=False, help='the number of layers of encoder in Transformer.', metavar=None)

In [11]:
    # Training parameters
    parser.add_argument('--epochs',         type=int,   default=80,                                 help='number of epochs to train for (if early stopping is not triggered).') #80, 26
    parser.add_argument('--batch_size',     type=int,   default=64,                                 help='batch_size')
    parser.add_argument('--workers',        type=int,   default=1,                                  help='for data-loading; right now, only 1 works with h5pys.')
    parser.add_argument('--print_freq',     type=int,   default=100,                                help='print training/validation stats every __ batches.')

    # existing checkpoint
    parser.add_argument('--checkpoint',     default=None,                                           help='path to checkpoint, None if none.')

    parser.add_argument('--lr',             type=float, default=0.00001,                            help='0.000005, 0.00001, 0.000005')
    parser.add_argument('--checkpoint_dir', default= '/content/drive/MyDrive/Colab Notebooks/research/multi-modality/svqa/checkpoints/18/',    help='med_vqa_c$version$/m18/c80//m18_vid$temporal_size$/c80_vid$temporal_size$') #clf_v1_2_1x1/med_vqa_c3
    parser.add_argument('--dataset_type',   default= 'm18',                                     help='med_vqa/m18/c80/m18_vid/c80_vid')
    parser.add_argument('--dataset_cat',    default= 'None',                                        help='cat1/cat2/cat3')
    parser.add_argument('--transformer_ver',default= 'vbrm',                                        help='vb/vbrm')
    parser.add_argument('--tokenizer_ver',  default= 'v2',                                          help='v2/v3')
    parser.add_argument('--patch_size',     default= 5,                                             help='1/2/3/4/5')
    parser.add_argument('--temporal_size',  default= 3,                                             help='1/2/3/4/5')
    parser.add_argument('--question_len',   default= 25,                                            help='25')
    parser.add_argument('--num_class',      default= 2,                                             help='25')
    parser.add_argument('--validate',       default=False,                                          help='When only validation required False/True')

_StoreAction(option_strings=['--validate'], dest='validate', nargs=None, const=None, default=False, type=None, choices=None, required=False, help='When only validation required False/True', metavar=None)

In [12]:
parser.add_argument('-f')

_StoreAction(option_strings=['-f'], dest='f', nargs=None, const=None, default=None, type=None, choices=None, required=False, help=None, metavar=None)

In [13]:
args = parser.parse_args()

In [14]:
# load checkpoint, these parameters can't be modified
final_args = {"emb_dim": 300, "n_heads": 8, "dropout": 0.1, "encoder_layers": 6}

In [15]:
seed_everything()

In [16]:
# GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # sets device for model and PyTorch tensors
cudnn.benchmark = True  # set to true only if inputs to model are fixed size; otherwise lot of computational overhead
print('device =', device)

device = cuda


In [17]:
# best model initialize
start_epoch = 1
best_epoch = [0]
best_results = [0.0]
epochs_since_improvement = 0

In [18]:
# tokenizer
tokenizer = None
tokenizer = BertTokenizer.from_pretrained("C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18")

In [19]:
tokenizer

BertTokenizer(name_or_path='C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18', vocab_size=237, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True)

In [20]:
args.num_class = 18

In [22]:
model = VisualBertClassification(vocab_size=len(tokenizer), layers=6, n_heads=8, num_class = args.num_class)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

In [None]:
def str2list(target_str):
  res=target_str.strip('[')
  res=res.strip(']')
  res=res.split(',')

  for i in range(len(res)):
    res[i] = res[i].strip() # remove the space

  new_list = [float(x) for x in res]
  return new_list[:20]

In [None]:
# training function
def train_lwf(args, train_dataloader, model, criterion, optimizer, epoch, tokenizer, device):

    model.train()

    total_loss = 0.0
    label_true = None
    label_pred = None
    label_score = None


    for i, (_, visual_features, q, labels, t5_loss) in enumerate(train_dataloader,0):

        label_number = labels.numpy()[0]

        # prepare questions
        questions = []
        for question in q: questions.append(question)
        inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)

        # t5 loss
        t5_loss_list = []
        for j in range(len(t5_loss)):
          tmp = str2list(t5_loss[j])
          t5_loss_list.append(tmp)
        check = np.reciprocal(t5_loss_list)
        t5_loss_tensor = torch.tensor(check)

        t5_loss_tensor = t5_loss_tensor.to(device)

        # GPU / CPU
        visual_features = visual_features.to(device)
        labels = labels.to(device)

        outputs = model(inputs, visual_features)
        soft_target = model_old(inputs, visual_features)

        loss1 = criterion(outputs, labels)

        outputs_S = F.softmax(outputs[:,:out_features]/T,dim=1)
        outputs_T = F.softmax(soft_target[:,:out_features]/T,dim=1)

        outputs_t5_loss = F.softmax(t5_loss_tensor[:,:out_features]/T,dim=1)

        loss2 = outputs_T.mul(-1*torch.log(outputs_S))
        loss2 = loss2.sum(1)
        loss2 = loss2.mean()*T*T

        loss3 = outputs_t5_loss.mul(-1*torch.log(outputs_S))
        loss3 = loss3.sum(1)
        loss3 = loss3.mean()*T*T

        loss = loss1 * acc_weight.at[label_number,'weight_true_label'] + loss2 * acc_weight.at[label_number,'weight_soft'] + loss3 * acc_weight.at[label_number,'weight_llm']

        # zero the parameter gradients
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print statistics
        total_loss += loss.item()

        scores, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)
        label_true = labels.data.cpu() if label_true == None else torch.cat((label_true, labels.data.cpu()), 0)
        label_pred = predicted.data.cpu() if label_pred == None else torch.cat((label_pred, predicted.data.cpu()), 0)
        label_score = scores.data.cpu() if label_score == None else torch.cat((label_score, scores.data.cpu()), 0)

    # loss and acc
    acc, c_acc = calc_acc(label_true, label_pred), calc_classwise_acc(label_true, label_pred)
    precision, recall, fscore = calc_precision_recall_fscore(label_true, label_pred)
    print('Train: epoch: %d loss: %.6f | Acc: %.6f | Precision: %.6f | Recall: %.6f | FScore: %.6f' %(epoch, total_loss, acc, precision, recall, fscore))
    return acc


# Calculate imbalance ratio (IR) for the given dataset

In [None]:
def question2tool_18(question):
    question = question.strip('?') # remove the question mark
    split = question.split()
    tool = 'error'
    for i in range(len(split)):
        if split[i] in instrument_18:
            tool = split[i]

    return tool

instrument_18 = ['bipolar_forceps','prograsp_forceps','monopolar_curved_scissors','ultrasound_probe','large_needle_driver','suction','clip_applier','stapler']

def question2tool_17(question):
    question = question.strip('?') # remove the question mark
    split = question.split()
    tool = 'error'
    for i in range(len(split)):
        if split[i] in instrument_17:
            tool = split[i]
    
    return tool

instrument_17 = ['bipolar_forceps','prograsp_forceps','monopolar_curved_scissors','ultrasound_probe','large_needle_driver']

def question2tool_DAISI(question):
    question = question.strip('?') # remove the question mark
    split = question.split()
    tool = split[-1]
    return tool

In [None]:
train_seq = np.arange(1,380).tolist()
val_seq = np.arange(380,475).tolist()

folder_head = 'C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/new_data_daisi/seq_'
folder_tail = '/vqa/*.txt'

train_seq_new = np.arange(1,1500).tolist()
val_seq_new = np.arange(1500,1875).tolist()

folder_head_new = 'C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/DAISI_v1_0830/seq_'
folder_tail_new = '/vqa/*.txt'

train_dataset = DAISI_VQA_Combine(train_seq, folder_head, folder_tail, train_seq_new, folder_head_new, folder_tail_new, patch_size = 5)
train_dataloader = DataLoader(dataset=train_dataset, batch_size= 1, shuffle=True)
val_dataset = DAISI_VQA_Combine(val_seq, folder_head, folder_tail, val_seq_new, folder_head_new, folder_tail_new, patch_size = 5)
val_dataloader = DataLoader(dataset=val_dataset, batch_size= 1, shuffle=False)

In [None]:
q = []
a = []

for i in range(len(train_dataset.vqas)):
    q.append(train_dataset.vqas[i][1].split('|')[0])
    a.append(train_dataset.vqas[i][1].split('|')[1])
    
from pandas.core.frame import DataFrame

c={"question" : q,
   "answer" : a}
data_daisi=DataFrame(c)

for i in range(len(data_daisi)):
    data_daisi.at[i,'q_type'] = question2tool_DAISI(data_daisi.at[i,'question'])

data_daisi['type'] = 'daisi'

In [None]:
data_daisi

In [None]:
train_seq = [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15]
val_seq = [1, 5, 16]

folder_head = 'C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18/seq_'
folder_tail = '/vqa/Classification_t5_loss/*.txt'

train_dataset = EndoVis18VQAClassification(train_seq, folder_head, folder_tail, patch_size = 5)
train_dataloader = DataLoader(dataset=train_dataset, batch_size= 1, shuffle=True)

val_dataset = EndoVis18VQAClassification(val_seq, folder_head, folder_tail, patch_size = 5)
val_dataloader = DataLoader(dataset=val_dataset, batch_size= 1, shuffle=False)

In [None]:
q = []
a = []

for i in range(len(train_dataset.vqas)):
    q.append(train_dataset.vqas[i][1].split('|')[0])
    a.append(train_dataset.vqas[i][1].split('|')[1])
    
from pandas.core.frame import DataFrame

c={"question" : q,
   "answer" : a}
data=DataFrame(c)

for i in range(len(data)):
    data.at[i,'q_type'] = question2tool_18(data.at[i,'question'])

data['type'] = 18

In [None]:
data

In [None]:
train_seq = [1,2,6,9]
val_seq = [8]

folder_head = 'C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis17/seq_'
folder_tail = '/vqa/*.txt'

train_dataset = EndoVis17VQAClassification(train_seq, folder_head, folder_tail, patch_size = 5)
train_dataloader = DataLoader(dataset=train_dataset, batch_size= 64, shuffle=True)
val_dataset = EndoVis17VQAClassification(val_seq, folder_head, folder_tail, patch_size = 5)
val_dataloader = DataLoader(dataset=val_dataset, batch_size= 64, shuffle=False)

In [None]:
q = []
a = []

for i in range(len(train_dataset.vqas)):
    q.append(train_dataset.vqas[i][1].split('|')[0])
    a.append(train_dataset.vqas[i][1].split('|')[1])
    
from pandas.core.frame import DataFrame

c={"question" : q,
   "answer" : a}
data_17=DataFrame(c)

data_17['q_type'] = ''
for i in range(len(data_17)):
    data_17.at[i,'q_type'] = question2tool_17(data_17.at[i,'question'])

data_17['type'] = 17

In [None]:
data_17

In [None]:
data_tmp = data.append(data_17)
data_tmp = data_tmp.append(data_daisi)
data_all = data_tmp[data_tmp['q_type']!='error']

In [None]:
y=data_all['q_type'].value_counts().values
x=data_all['q_type'].value_counts().index
frequency = pd.DataFrame()
frequency.index = x
frequency['17+18+daisi'] = y

y_17=data_all[data_all['type']==17]['q_type'].value_counts().values
x=data_all[data_all['type']==17]['q_type'].value_counts().index
frequency_17 = pd.DataFrame()
frequency_17.index = x
frequency_17['17'] = y_17

y_daisi=data_all[data_all['type']=='daisi']['q_type'].value_counts().values
x=data_all[data_all['type']=='daisi']['q_type'].value_counts().index
frequency_daisi = pd.DataFrame()
frequency_daisi.index = x
frequency_daisi['daisi'] = y_daisi

y_18=data_all[data_all['type']==18]['q_type'].value_counts().values
x=data_all[data_all['type']==18]['q_type'].value_counts().index
frequency_18 = pd.DataFrame()
frequency_18.index = x
frequency_18['18'] = y_18

In [None]:
frequency_all = frequency.join(frequency_18).join(frequency_17).join(frequency_daisi)

frequency_all = frequency_all.fillna(0)
frequency_all['17+18'] = frequency_all['17'] + frequency_all['18']

frequency_all

In [None]:
tmp = frequency_all[frequency_all['17+18+daisi']!=1]

In [None]:
tmp

In [None]:
max_17_18_daisi=frequency_all['17+18+daisi'].max()
tmp = frequency_all[['17+18+daisi']].values
tmp1 = tmp.nonzero()
min_17_18_daisi = tmp[tmp1].min()

In [None]:
import math
IR_17_18_daisi = max_17_18_daisi / min_17_18_daisi
ln_IR_17_18_daisi = math.log(IR_17_18_daisi)

# train DAISI

In [None]:
train_seq = np.arange(1,380).tolist()
val_seq = np.arange(380,475).tolist()

folder_head = 'C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/new_data_daisi/seq_'
folder_tail = '/vqa/*.txt'

train_seq_new = np.arange(1,1500).tolist()
val_seq_new = np.arange(1500,1875).tolist()

folder_head_new = 'C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/DAISI_v1_0830/seq_'
folder_tail_new = '/vqa/*.txt'

train_dataset = DAISI_VQA_Combine(train_seq, folder_head, folder_tail, train_seq_new, folder_head_new, folder_tail_new, patch_size = 5)
train_dataloader = DataLoader(dataset=train_dataset, batch_size= 1, shuffle=True)
val_dataset = DAISI_VQA_Combine(val_seq, folder_head, folder_tail, val_seq_new, folder_head_new, folder_tail_new, patch_size = 5)
val_dataloader = DataLoader(dataset=val_dataset, batch_size= 1, shuffle=False)

In [None]:
# old model
checkpoint_old = torch.load('C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18/ablation/checkpoints_all/17+18/checkpoint_1718.pth.tar')
model_old = checkpoint_old['model']

In [None]:
# new model
checkpoint = torch.load('C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18/ablation/checkpoints_all/17+18/checkpoint_1718.pth.tar')

In [None]:
model = checkpoint['model']
optimizer = checkpoint['optimizer']

In [None]:
# change the last FC layer for new model (add the node for new classes)
num_new_class = 2

def kaiming_normal_init(m):
	if isinstance(m, nn.Conv2d):
		nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
	elif isinstance(m, nn.Linear):
		nn.init.kaiming_normal_(m.weight, nonlinearity='sigmoid')

# Old number of input/output channel of the last FC layer in old model
in_features = model.classifier.in_features
out_features = model.classifier.out_features

# Old weight/bias of the last FC layer
weight = model.classifier.weight.data
bias = model.classifier.bias.data

# New number of output channel of the last FC layer in new model
new_out_features = num_new_class + out_features

# Creat a new FC layer and initial it's weight/bias
new_fc = nn.Linear(in_features, new_out_features)
kaiming_normal_init(new_fc.weight)
new_fc.weight.data[:out_features] = weight
new_fc.bias.data[:out_features] = bias

# Replace the old FC layer
model.classifier = new_fc

In [None]:
model_old.classifier = new_fc

In [None]:
# Move to GPU, if available
model = model.to(device)
model_old = model_old.to(device)
print(final_args)
pytorch_total_params = sum(p.numel() for p in model.parameters())
print('model params: ', pytorch_total_params)

In [None]:
# Loss function
criterion = nn.CrossEntropyLoss().to(device)

In [None]:
args.checkpoint_dir = 'C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18/ablation/checkpoints_all/17+18+daisi/'

In [None]:
# best model initialize
start_epoch = 1
best_epoch = [0]
best_results = [0.0]
epochs_since_improvement = 0

In [None]:
out_features = model.classifier.out_features

# Acc of the conventional teacher and the LLM teacher

In [None]:
# 计算soft_target的准确率等指标

label = []
label_soft_list = []

for i, (_, visual_features, q, labels, t5_loss) in enumerate(train_dataloader,0):

    label_number = labels.numpy()[0]

    label += labels.tolist()

    # prepare questions
    questions = []
    for question in q: questions.append(question)

    inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)
    
    # GPU / CPU
    visual_features = visual_features.to(device)
    labels = labels.to(device)
    
    soft_target = model_old(inputs, visual_features) # soft_target is the output of the old model
    output_class_ranks = torch.argsort(soft_target, dim=-1, descending=True)
    
    label_soft = []
    for j in range(len(output_class_ranks)):
        label_soft.append(int(output_class_ranks[j][0]))
    
    label_soft_list += label_soft

In [None]:
from pandas.core.frame import DataFrame
from sklearn.metrics import accuracy_score
import math

c={"label" : label,
   "label_soft_list" : label_soft_list}
data=DataFrame(c)

acc_soft = []

for i in range(20):
    label_part = []
    label_soft_part = []

    for j in range(len(data)):
        if data.at[j,'label'] == i:
            label_part.append(data.at[j,'label'])
            label_soft_part.append(data.at[j,'label_soft_list'])

    acc_soft.append(accuracy_score(label_part, label_soft_part))
acc_soft = [0 if math.isnan(x) else x for x in acc_soft]

In [None]:
acc_soft

In [None]:
# calculate the ACC of the LLM teacher

label = []
label_llm_list = []

for i, (_, visual_features, q, labels, t5_loss) in enumerate(train_dataloader,0):
  label += labels.tolist()

  t5_loss_list = []
  for j in range(len(t5_loss)):
    tmp = str2list(t5_loss[j])
    t5_loss_list.append(tmp)

  check = np.reciprocal(t5_loss_list)
  t5_loss_tensor = torch.tensor(check)
  output_class_ranks = torch.argsort(t5_loss_tensor, dim=-1, descending=True)

  label_llm = []
  for j in range(len(output_class_ranks)):
    label_llm.append(int(output_class_ranks[j][0]))

  label_llm_list += label_llm

In [None]:
#from pandas.core.frame import DataFrame

c={"label" : label,
   "label_llm_list" : label_llm_list}
data=DataFrame(c)

acc_llm = []

for i in range(20):
  label_part = []
  label_llm_part = []

  for j in range(len(data)):
    if data.at[j,'label'] == i:
      label_part.append(data.at[j,'label'])
      label_llm_part.append(data.at[j,'label_llm_list'])
        
  if len(label_part) == 0:
    acc_llm.append(0)
  else:
    acc_llm.append(accuracy_score(label_part, label_llm_part))
  
acc_llm = [0 if math.isnan(x) else x for x in acc_llm]

In [None]:
acc_llm

In [None]:
c={"acc_soft" : acc_soft,
   "acc_llm" : acc_llm}
weight_data_17_18_daisi=DataFrame(c)

for i in range(len(weight_data_17_18_daisi)):
  if weight_data_17_18_daisi.at[i,'acc_soft'] + weight_data_17_18_daisi.at[i,'acc_llm'] == 0:
    weight_data_17_18_daisi.at[i,'DS_soft'] = 0.5*(1 - hard_label_weight)
    weight_data_17_18_daisi.at[i,'DS_llm'] = 0.5*(1 - hard_label_weight)
  else:
    weight_data_17_18_daisi.at[i,'DS_soft'] = (1-hard_label_weight) * weight_data_17_18_daisi.at[i,'acc_soft'] / (weight_data_17_18_daisi.at[i,'acc_soft'] + weight_data_17_18_daisi.at[i,'acc_llm'])
    weight_data_17_18_daisi.at[i,'DS_llm'] = (1-hard_label_weight) * weight_data_17_18_daisi.at[i,'acc_llm'] / (weight_data_17_18_daisi.at[i,'acc_soft'] + weight_data_17_18_daisi.at[i,'acc_llm'])

In [None]:
weight_data_17_18_daisi

In [None]:
#weight processing
weight_data_17_18_daisi['DI_soft']=(1-hard_label_weight) * (0.5 / (1 + IR_17_18_daisi))
weight_data_17_18_daisi['DI_llm'] = (1-hard_label_weight) * ((0.5 + IR_17_18_daisi) / (1 + IR_17_18_daisi))

In [None]:
weight_data_17_18_daisi['weight_true_label']=hard_label_weight

weight_data_17_18_daisi['weight_soft'] = DS_weight * weight_data_17_18_daisi['DS_soft'] + DI_weight * weight_data_17_18_daisi['DI_soft']
weight_data_17_18_daisi['weight_llm'] = DS_weight * weight_data_17_18_daisi['DS_llm'] + DI_weight * weight_data_17_18_daisi['DI_llm']

weight_data_17_18_daisi

In [None]:
acc_weight = weight_data_17_18_daisi[['weight_true_label','weight_soft','weight_llm']]
acc_weight.weight_data_17_18_daisi = ['weight_true_label','weight_soft','weight_llm']

In [None]:
acc_weight

In [None]:
for epoch in range(start_epoch, 20):
  if epochs_since_improvement > 0 and epochs_since_improvement % 5 == 0:
    adjust_learning_rate(optimizer, 0.8)

  # train
  #train_acc = train(args, train_dataloader=train_dataloader, model = model, criterion=criterion, optimizer=optimizer, epoch=epoch, tokenizer = tokenizer, device = device)
  train_acc = train_lwf(args, train_dataloader=train_dataloader, model = model, criterion=criterion, optimizer=optimizer, epoch=epoch, tokenizer = tokenizer, device = device)

  # validation
  test_acc, test_c_acc, test_precision, test_recall, test_fscore = validate_18(args, val_loader=val_dataloader, model = model, criterion=criterion, epoch=epoch, tokenizer = tokenizer, device = device)

  if test_acc >= best_results[0]:
    epochs_since_improvement = 0

    best_results[0] = test_acc
    best_epoch[0] = epoch
    print('Best epoch: %d | Best acc: %.6f' %(best_epoch[0], best_results[0]))
    save_clf_checkpoint(args.checkpoint_dir, epoch, epochs_since_improvement, model, optimizer, best_results[0], final_args)

  else:
    epochs_since_improvement += 1
    print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))

  if train_acc >= 1.0: break

# testing process after the CL training

In [None]:
train_seq = [1,2,6,9]
val_seq = [8]

folder_head = 'C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis17/seq_'
folder_tail = '/vqa/*.txt'

train_dataset = EndoVis17VQAClassification(train_seq, folder_head, folder_tail, patch_size = 5)
train_dataloader = DataLoader(dataset=train_dataset, batch_size= 64, shuffle=True)

val_dataset = EndoVis17VQAClassification(val_seq, folder_head, folder_tail, patch_size = 5)
val_dataloader = DataLoader(dataset=val_dataset, batch_size= 64, shuffle=False)

In [None]:
checkpoint = torch.load('C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18/ablation/checkpoints_all/17+18+daisi/Best.pth.tar')

model = checkpoint['model']

# Move to GPU, if available
model = model.to(device)
print(final_args)
pytorch_total_params = sum(p.numel() for p in model.parameters())
print('model params: ', pytorch_total_params)

# Loss function
criterion = nn.CrossEntropyLoss().to(device)

# validation
test_acc, test_c_acc, test_precision, test_recall, test_fscore = validate(args, val_loader=val_dataloader, model = model, criterion=criterion, epoch=0, tokenizer = tokenizer, device = device)

In [None]:
train_seq = [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15]
val_seq = [1, 5, 16]

folder_head = 'C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18/seq_'
folder_tail = '/vqa/Classification_t5_loss/*.txt'

val_dataset = EndoVis18VQAClassification(val_seq, folder_head, folder_tail, patch_size = 5)
val_dataloader = DataLoader(dataset=val_dataset, batch_size= 64, shuffle=False)

In [None]:
checkpoint = torch.load('C:/Users/kxchen/Desktop/Yuyang/multi-modality-20230721T022352Z-001/multi-modality/endovis18/ablation/checkpoints_all/17+18+daisi/Best.pth.tar')

model = checkpoint['model']

# Move to GPU, if available
model = model.to(device)
print(final_args)
pytorch_total_params = sum(p.numel() for p in model.parameters())
print('model params: ', pytorch_total_params)

# Loss function
criterion = nn.CrossEntropyLoss().to(device)

# validation
test_acc, test_c_acc, test_precision, test_recall, test_fscore = validate_18(args, val_loader=val_dataloader, model = model, criterion=criterion, epoch=0, tokenizer = tokenizer, device = device)