In [None]:
T=2
N_Balance = 2000
DS_weight=0.9
DI_weight=0.1
hard_label_weight=0.8
epoch_num=30

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
import os
os.chdir("/content/drive/MyDrive/Colab Notebooks/research/multi-modality/endovis18")
import argparse
import pandas as pd
from lib2to3.pytree import convert
from torch import nn
from torch import optim
import torch.utils.data
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

In [None]:
pip install transformers==4.18.0

In [None]:
from transformers import BertTokenizer
from torch.utils.data  import DataLoader
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/research/multi-modality/endovis18/Surgical_VQA')
from utils import *
from dataloaders.dataloaderClassification import *
from models.VisualBertClassification import VisualBertClassification
from models.VisualBertResMLPClassification import VisualBertResMLPClassification
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
def seed_everything(seed=27):
    '''
    Set random seed for reproducible experiments
    Inputs: seed number
    '''
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
# training function for our CL algorithm
def train_d4(args, train_dataloader, model, criterion, optimizer, epoch, tokenizer, device):

    model.train()

    total_loss = 0.0
    label_true = None
    label_pred = None
    label_score = None


    for i, (_, visual_features, q, labels, t5_loss) in enumerate(train_dataloader,0):

        label_number = labels.numpy()[0]

        # prepare questions
        questions = []
        for question in q: questions.append(question)
        inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)

        # t5 loss
        t5_loss_list = []
        for j in range(len(t5_loss)):
          #tmp = str2list(t5_loss[j])
          tmp = t5_loss[j][:22]
          t5_loss_list.append(tmp)
        check = np.reciprocal(t5_loss_list)
        t5_loss_tensor = torch.tensor(check)

        t5_loss_tensor = t5_loss_tensor.to(device)

        # GPU / CPU
        visual_features = visual_features.to(device)
        labels = labels.to(device)

        #print(labels)

        outputs = model(inputs, visual_features)

        soft_target_17 = model_17(inputs, visual_features)
        soft_target_18 = model_18(inputs, visual_features)
        soft_target = model_old(inputs, visual_features)


        loss1 = criterion(outputs, labels)

        outputs_S = F.softmax(outputs[:,:out_features]/T,dim=1)

        outputs_T_17 = F.softmax(soft_target_17[:,:out_features]/T,dim=1)
        outputs_T_18 = F.softmax(soft_target_18[:,:out_features]/T,dim=1)
        outputs_T = F.softmax(soft_target[:,:out_features]/T,dim=1)

        outputs_t5_loss = F.softmax(t5_loss_tensor[:,:out_features]/T,dim=1)

        loss2_17 = outputs_T_17.mul(-1*torch.log(outputs_S))
        loss2_17 = loss2_17.sum(1)
        loss2_17 = loss2_17.mean()*T*T

        loss2_18 = outputs_T_18.mul(-1*torch.log(outputs_S))
        loss2_18 = loss2_18.sum(1)
        loss2_18 = loss2_18.mean()*T*T

        loss2 = outputs_T.mul(-1*torch.log(outputs_S))
        loss2 = loss2.sum(1)
        loss2 = loss2.mean()*T*T

        loss3 = outputs_t5_loss.mul(-1*torch.log(outputs_S))
        loss3 = loss3.sum(1)
        loss3 = loss3.mean()*T*T

        #loss = loss1 * 0.8 + loss2 * 0.123 + loss3 * 0.077
        #loss = loss1 * acc_weight.at[label_number,'weight_true_label'] + loss2_17 * (acc_weight.at[label_number,'weight_soft'])*0.05 + loss2_18 * (acc_weight.at[label_number,'weight_soft'])*0.05 + loss2 * (acc_weight.at[label_number,'weight_soft'])*0.9 + loss3 * acc_weight.at[label_number,'weight_llm']

        loss = loss1 * acc_weight.at[label_number,'weight_true_label'] + loss2_17 * (acc_weight.at[label_number,'weight_soft'])/3 + loss2_18 * (acc_weight.at[label_number,'weight_soft'])/3 + loss2 * (acc_weight.at[label_number,'weight_soft'])/3 + loss3 * acc_weight.at[label_number,'weight_llm']


        # zero the parameter gradients
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print statistics
        total_loss += loss.item()

        scores, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)

        #print(predicted)

        label_true = labels.data.cpu() if label_true == None else torch.cat((label_true, labels.data.cpu()), 0)
        label_pred = predicted.data.cpu() if label_pred == None else torch.cat((label_pred, predicted.data.cpu()), 0)
        label_score = scores.data.cpu() if label_score == None else torch.cat((label_score, scores.data.cpu()), 0)

    # loss and acc
    acc, c_acc = calc_acc(label_true, label_pred), calc_classwise_acc(label_true, label_pred)
    precision, recall, fscore = calc_precision_recall_fscore(label_true, label_pred)
    print('Train: epoch: %d loss: %.6f | Acc: %.6f | Precision: %.6f | Recall: %.6f | FScore: %.6f' %(epoch, total_loss, acc, precision, recall, fscore))
    return acc

In [None]:
def validate_17(args, val_loader, model, criterion, epoch, tokenizer, device, save_output = False):

    model.eval()

    total_loss = 0.0
    label_true = None
    label_pred = None
    label_score = None
    file_names = list()

    criterion = nn.CrossEntropyLoss()

    convert_arr = ['kidney', 'Idle', 'Grasping', 'Retraction', 'Tissue_Manipulation',
              'Tool_Manipulation', 'Cutting', 'Cauterization', 'Suction',
               'Looping', 'Suturing', 'Clipping', 'Staple', 'Ultrasound_Sensing',
              'left-top', 'right-top', 'left-bottom', 'right-bottom',
                   'no', 'yes','left', 'right']

    with torch.no_grad():
        for i, (file_name, visual_features, q, labels) in enumerate(val_loader,0):
            # prepare questions
            questions = []
            for question in q: questions.append(question)
            inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)

            # GPU / CPU
            visual_features = visual_features.to(device)
            labels = labels.to(device)

            outputs = model(inputs, visual_features)

            #print(labels)

            loss = criterion(outputs,labels)

            total_loss += loss.item()

            scores, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)

            #print(predicted)

            label_true = labels.data.cpu() if label_true == None else torch.cat((label_true, labels.data.cpu()), 0)
            label_pred = predicted.data.cpu() if label_pred == None else torch.cat((label_pred, predicted.data.cpu()), 0)
            label_score = scores.data.cpu() if label_score == None else torch.cat((label_score, scores.data.cpu()), 0)
            for f in file_name: file_names.append(f)

    acc = calc_acc(label_true, label_pred)
    c_acc = 0.0
    # c_acc = calc_classwise_acc(label_true, label_pred)
    precision, recall, fscore = calc_precision_recall_fscore(label_true, label_pred)

    print('Test: epoch: %d loss: %.6f | Acc: %.6f | Precision: %.6f | Recall: %.6f | FScore: %.6f' %(epoch, total_loss, acc, precision, recall, fscore))

    if save_output:
        '''
            Saving predictions
        '''
        if os.path.exists(args.checkpoint_dir + 'text_files') == False:
            os.mkdir(args.checkpoint_dir + 'text_files' )
        file1 = open(args.checkpoint_dir + 'text_files/labels.txt', 'w')
        file1.write(str(label_true))
        file1.close()

        file1 = open(args.checkpoint_dir + 'text_files/predictions.txt', 'w')
        file1.write(str(label_pred))
        file1.close()

        if args.dataset_type == 'med_vqa':
            if args.dataset_cat == 'cat1':
                convert_arr = ['cta - ct angiography', 'no', 'us - ultrasound', 'xr - plain film', 'noncontrast', 'yes', 't2', 'ct w/contrast (iv)', 'mr - flair', 'mammograph', 'ct with iv contrast',
                            'gi and iv', 't1', 'mr - t2 weighted', 'mr - t1w w/gadolinium', 'contrast', 'iv', 'an - angiogram', 'mra - mr angiography/venography', 'nm - nuclear medicine', 'mr - dwi diffusion weighted',
                            'ct - gi & iv contrast', 'ct noncontrast', 'mr - other pulse seq.', 'ct with gi and iv contrast', 'flair', 'mr - t1w w/gd (fat suppressed)', 'ugi - upper gi', 'mr - adc map (app diff coeff)',
                            'bas - barium swallow', 'pet - positron emission', 'mr - pdw proton density', 'mr - t1w - noncontrast', 'be - barium enema', 'us-d - doppler ultrasound', 'mr - stir', 'mr - flair w/gd',
                            'ct with gi contrast', 'venogram', 'mr t2* gradient,gre,mpgr,swan,swi', 'mr - fiesta', 'ct - myelogram', 'gi', 'sbft - small bowel', 'pet-ct fusion']
            elif args.dataset_cat == 'cat2':
                convert_arr = ['axial', 'longitudinal', 'coronal', 'lateral', 'ap', 'sagittal', 'mammo - mlo', 'pa', 'mammo - cc', 'transverse', 'mammo - mag cc', 'frontal', 'oblique', '3d reconstruction', 'decubitus', 'mammo - xcc']
            else:
                convert_arr = ['lung, mediastinum, pleura', 'skull and contents', 'genitourinary', 'spine and contents', 'musculoskeletal', 'heart and great vessels', 'vascular and lymphatic', 'gastrointestinal', 'face, sinuses, and neck', 'breast']
        elif args.dataset_type == 'c80':
            convert_arr = ['no', 'calot triangle dissection', 'yes', '1', '2', 'gallbladder dissection',
                            'clipping cutting', 'gallbladder retraction', '0', 'cleaning coagulation',
                            'gallbladder packaging', 'preparation', '3']
        elif args.dataset_type == 'm18':
            convert_arr = ['kidney', 'Idle', 'Grasping', 'Retraction', 'Tissue_Manipulation',
                            'Tool_Manipulation', 'Cutting', 'Cauterization', 'Suction',
                            'Looping', 'Suturing', 'Clipping', 'Staple', 'Ultrasound_Sensing',
                            'left-top', 'right-top', 'left-bottom', 'right-bottom']

        df = pd.DataFrame(columns=["Img", "Ground Truth", "Prediction"])
        for i in range(len(label_true)):
            df = df.append({'Img': file_names[i], 'Ground Truth': convert_arr[label_true[i]], 'Prediction': convert_arr[label_pred[i]]}, ignore_index=True)

        df.to_csv(args.checkpoint_dir + args.checkpoint_dir.split('/')[1] + '_' + args.checkpoint_dir.split('/')[2] + '_eval.csv')

    return (acc, c_acc, precision, recall, fscore)

In [None]:
def validate_18_d3_d4(args, val_loader, model, criterion, epoch, tokenizer, device, save_output = False):

    model.eval()

    total_loss = 0.0
    label_true = None
    label_pred = None
    label_score = None
    file_names = list()

    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for i, (file_name, visual_features, q, labels, _) in enumerate(val_loader,0):
            # prepare questions
            questions = []
            for question in q: questions.append(question)
            inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)

            # GPU / CPU
            visual_features = visual_features.to(device)
            labels = labels.to(device)

            outputs = model(inputs, visual_features)


            loss = criterion(outputs,labels)

            total_loss += loss.item()

            scores, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)


            label_true = labels.data.cpu() if label_true == None else torch.cat((label_true, labels.data.cpu()), 0)
            label_pred = predicted.data.cpu() if label_pred == None else torch.cat((label_pred, predicted.data.cpu()), 0)
            label_score = scores.data.cpu() if label_score == None else torch.cat((label_score, scores.data.cpu()), 0)
            for f in file_name: file_names.append(f)

    acc = calc_acc(label_true, label_pred)
    c_acc = 0.0
    # c_acc = calc_classwise_acc(label_true, label_pred)
    precision, recall, fscore = calc_precision_recall_fscore(label_true, label_pred)

    print('Test: epoch: %d loss: %.6f | Acc: %.6f | Precision: %.6f | Recall: %.6f | FScore: %.6f' %(epoch, total_loss, acc, precision, recall, fscore))

    if save_output:
        '''
            Saving predictions
        '''
        if os.path.exists(args.checkpoint_dir + 'text_files') == False:
            os.mkdir(args.checkpoint_dir + 'text_files' )
        file1 = open(args.checkpoint_dir + 'text_files/labels.txt', 'w')
        file1.write(str(label_true))
        file1.close()

        file1 = open(args.checkpoint_dir + 'text_files/predictions.txt', 'w')
        file1.write(str(label_pred))
        file1.close()

        if args.dataset_type == 'med_vqa':
            if args.dataset_cat == 'cat1':
                convert_arr = ['cta - ct angiography', 'no', 'us - ultrasound', 'xr - plain film', 'noncontrast', 'yes', 't2', 'ct w/contrast (iv)', 'mr - flair', 'mammograph', 'ct with iv contrast',
                            'gi and iv', 't1', 'mr - t2 weighted', 'mr - t1w w/gadolinium', 'contrast', 'iv', 'an - angiogram', 'mra - mr angiography/venography', 'nm - nuclear medicine', 'mr - dwi diffusion weighted',
                            'ct - gi & iv contrast', 'ct noncontrast', 'mr - other pulse seq.', 'ct with gi and iv contrast', 'flair', 'mr - t1w w/gd (fat suppressed)', 'ugi - upper gi', 'mr - adc map (app diff coeff)',
                            'bas - barium swallow', 'pet - positron emission', 'mr - pdw proton density', 'mr - t1w - noncontrast', 'be - barium enema', 'us-d - doppler ultrasound', 'mr - stir', 'mr - flair w/gd',
                            'ct with gi contrast', 'venogram', 'mr t2* gradient,gre,mpgr,swan,swi', 'mr - fiesta', 'ct - myelogram', 'gi', 'sbft - small bowel', 'pet-ct fusion']
            elif args.dataset_cat == 'cat2':
                convert_arr = ['axial', 'longitudinal', 'coronal', 'lateral', 'ap', 'sagittal', 'mammo - mlo', 'pa', 'mammo - cc', 'transverse', 'mammo - mag cc', 'frontal', 'oblique', '3d reconstruction', 'decubitus', 'mammo - xcc']
            else:
                convert_arr = ['lung, mediastinum, pleura', 'skull and contents', 'genitourinary', 'spine and contents', 'musculoskeletal', 'heart and great vessels', 'vascular and lymphatic', 'gastrointestinal', 'face, sinuses, and neck', 'breast']
        elif args.dataset_type == 'c80':
            convert_arr = ['no', 'calot triangle dissection', 'yes', '1', '2', 'gallbladder dissection',
                            'clipping cutting', 'gallbladder retraction', '0', 'cleaning coagulation',
                            'gallbladder packaging', 'preparation', '3']
        elif args.dataset_type == 'm18':
            convert_arr = ['kidney', 'Idle', 'Grasping', 'Retraction', 'Tissue_Manipulation',
                            'Tool_Manipulation', 'Cutting', 'Cauterization', 'Suction',
                            'Looping', 'Suturing', 'Clipping', 'Staple', 'Ultrasound_Sensing',
                            'left-top', 'right-top', 'left-bottom', 'right-bottom']

        df = pd.DataFrame(columns=["Img", "Ground Truth", "Prediction"])
        for i in range(len(label_true)):
            df = df.append({'Img': file_names[i], 'Ground Truth': convert_arr[label_true[i]], 'Prediction': convert_arr[label_pred[i]]}, ignore_index=True)

        df.to_csv(args.checkpoint_dir + args.checkpoint_dir.split('/')[1] + '_' + args.checkpoint_dir.split('/')[2] + '_eval.csv')

    return (acc, c_acc, precision, recall, fscore)

In [None]:
parser = argparse.ArgumentParser(description='VisualQuestionAnswerClassification')

In [None]:
parser.add_argument('--emb_dim',     type=int,  default=300, help='dimension of word embeddings.')

In [None]:
parser.add_argument('--n_heads',     type=int,  default=8,  help='Multi-head attention.')
parser.add_argument('--dropout',     type=float, default=0.1, help='dropout')
parser.add_argument('--encoder_layers',  type=int,  default=6,  help='the number of layers of encoder in Transformer.')

In [None]:
# Training parameters
parser.add_argument('--epochs',       type=int,   default=80,     help='number of epochs to train for (if early stopping is not triggered).') #80, 26
parser.add_argument('--batch_size',     type=int,   default=64,      help='batch_size')
parser.add_argument('--workers',      type=int,   default=1,     help='for data-loading; right now, only 1 works with h5pys.')
parser.add_argument('--print_freq',     type=int,   default=100,     help='print training/validation stats every __ batches.')

# existing checkpoint
parser.add_argument('--checkpoint',     default=None,             help='path to checkpoint, None if none.')

parser.add_argument('--lr', type=float,  default=0.00001, help='0.000005, 0.00001, 0.000005')
parser.add_argument('--checkpoint_dir',   default= '/content/drive/MyDrive/Colab Notebooks/research/multi-modality/endovis18/result/',    help='med_vqa_c$version$/m18/c80//m18_vid$temporal_size$/c80_vid$temporal_size$') #clf_v1_2_1x1/med_vqa_c3
parser.add_argument('--dataset_type',    default= 'm18',         help='med_vqa/m18/c80/m18_vid/c80_vid')
parser.add_argument('--dataset_cat',    default= 'None',         help='cat1/cat2/cat3')
parser.add_argument('--transformer_ver',  default= 'vbrm',         help='vb/vbrm')
parser.add_argument('--tokenizer_ver',   default= 'v2',          help='v2/v3')
parser.add_argument('--patch_size',     default= 5,           help='1/2/3/4/5')
parser.add_argument('--temporal_size',   default= 3,           help='1/2/3/4/5')
parser.add_argument('--question_len',    default= 25,          help='25')
parser.add_argument('--num_class',     default= 2,           help='25')
parser.add_argument('--validate',      default=False,          help='When only validation required False/True')

In [None]:
parser.add_argument('-f')

In [None]:
args = parser.parse_args()

In [None]:
# load checkpoint, these parameters can't be modified
final_args = {"emb_dim": 300, "n_heads": 8, "dropout": 0.1, "encoder_layers": 6}

In [None]:
seed_everything()

In [None]:
# GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # sets device for model and PyTorch tensors
cudnn.benchmark = True  # set to true only if inputs to model are fixed size; otherwise lot of computational overhead
print('device =', device)

In [None]:
# best model initialize
start_epoch = 1
best_epoch = [0]
best_results = [0.0]
epochs_since_improvement = 0

In [None]:
# tokenizer
tokenizer = None
tokenizer = BertTokenizer.from_pretrained("/content/drive/MyDrive/Colab Notebooks/research/multi-modality/endovis18")

In [None]:
tokenizer

In [None]:
args.num_class = 20

In [None]:
#if args.transformer_ver == 'vb':
model = VisualBertClassification(vocab_size=len(tokenizer), layers=6, n_heads=8, num_class = args.num_class)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

In [None]:
# 将一个表示浮点数列表的字符串转换为实际的浮点数列表
def str2list(target_str):
  res=target_str.strip('[')
  res=res.strip(']')
  res=res.split(',')

  for i in range(len(res)):
    res[i] = res[i].strip() # 去掉空格

  new_list = [float(x) for x in res]
  return new_list[:20]

Imbalance Issue

In [None]:
# Since the three dataset is already fixed, we pre-store the label distribution in a CSV file to save the time loading all the data
# Load all the data will be a VERY time-consuming in Colab
import math

frequency_all = pd.read_csv("frequency_all.csv")
max_17_18_daisi_d4 = frequency_all['17+18+daisi+dataset_4'].max()
min_17_18_daisi_d4 = frequency_all['17+18+daisi+dataset_4'].min()
IR_17_18_daisi_d4 = max_17_18_daisi_d4 / min_17_18_daisi_d4
ln_IR_17_18_daisi_d4 = math.log(IR_17_18_daisi_d4,N_Balance)

print(ln_IR_17_18_daisi_d4)

Load dataset_4 and CL model trained on 17,18,DAISI


In [None]:
class LrspDataset(Dataset):
    def __init__(self, csv_file, data_type, patch_size=5):
        self.patch_size = patch_size
        tmp = pd.read_csv(csv_file)
        self.data_frame = tmp[tmp['type']==data_type]

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        sample = self.data_frame.iloc[idx]
        file_name = str(sample['path'])
        q = sample['q']
        labels = torch.tensor(sample['labels'])

        visual_feature_loc = '/' + os.path.join('content/drive/MyDrive/Colab Notebooks/research/multi-modality/dataset_4/vqa/img_features',(str(self.patch_size)+'x'+str(self.patch_size)),file_name+'.hdf5')
        frame_data = h5py.File(visual_feature_loc, 'r')
        visual_features = torch.from_numpy(frame_data['visual_features'][:])

        t5_loss = torch.tensor(eval(sample['t5_loss']))

        return file_name, visual_features, q, labels, t5_loss

In [None]:
train_dataset_lrsp = LrspDataset('/content/drive/MyDrive/Colab Notebooks/research/multi-modality/dataset_4/data.csv','train',patch_size=5)
train_dataloader_lrsp = DataLoader(dataset=train_dataset_lrsp, batch_size=1, shuffle=True)
len(train_dataset_lrsp)

In [None]:
# old model 17
checkpoint_17 = torch.load('/content/drive/MyDrive/Colab Notebooks/research/multi-modality/endovis17/Pretrain_t1.pth.tar')
model_17 = checkpoint_17['model']

# old model 18
checkpoint_18 = torch.load('/content/drive/MyDrive/Colab Notebooks/research/multi-modality/endovis17/Pretrain_t2.pth.tar')
model_18 = checkpoint_18['model']

# old model daisi
checkpoint_old = torch.load('/content/drive/MyDrive/Colab Notebooks/research/multi-modality/daisi_vqa_final/result/4980.pth.tar')
model_old = checkpoint_old['model']

# new model
checkpoint_old = torch.load('/content/drive/MyDrive/Colab Notebooks/research/multi-modality/daisi_vqa_final/result/4980.pth.tar')
model = checkpoint_old['model']

optimizer = checkpoint_old['optimizer']

In [None]:
#change the last FC layer for new model (add the node for new classes)
num_new_class = 4

def kaiming_normal_init(m):
	if isinstance(m, nn.Conv2d):
		nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
	elif isinstance(m, nn.Linear):
		nn.init.kaiming_normal_(m.weight, nonlinearity='sigmoid')

# Old number of input/output channel of the last FC layer in old model
in_features = model_18.classifier.in_features
out_features = model_18.classifier.out_features

# Old weight/bias of the last FC layer
weight = model_18.classifier.weight.data
bias = model_18.classifier.bias.data

# New number of output channel of the last FC layer in new model
new_out_features = num_new_class + out_features

# Creat a new FC layer and initial it's weight/bias
new_fc = nn.Linear(in_features, new_out_features)
kaiming_normal_init(new_fc.weight)
new_fc.weight.data[:out_features] = weight
new_fc.bias.data[:out_features] = bias

# Replace the old FC layer
model_17.classifier = new_fc
model_18.classifier = new_fc

In [None]:
#change the last FC layer for new model (add the node for new classes)
num_new_class = 2

def kaiming_normal_init(m):
	if isinstance(m, nn.Conv2d):
		nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
	elif isinstance(m, nn.Linear):
		nn.init.kaiming_normal_(m.weight, nonlinearity='sigmoid')

# Old number of input/output channel of the last FC layer in old model
in_features = model_old.classifier.in_features
out_features = model_old.classifier.out_features

# Old weight/bias of the last FC layer
weight = model_old.classifier.weight.data
bias = model_old.classifier.bias.data

# New number of output channel of the last FC layer in new model
new_out_features = num_new_class + out_features

# Creat a new FC layer and initial it's weight/bias
new_fc = nn.Linear(in_features, new_out_features)
kaiming_normal_init(new_fc.weight)
new_fc.weight.data[:out_features] = weight
new_fc.bias.data[:out_features] = bias

# Replace the old FC layer
model.classifier = new_fc
model_old.classifier = new_fc

In [None]:
# Move to GPU, if available
model_17 = model_17.to(device)
model_18 = model_18.to(device)
model_old = model_old.to(device)
model = model.to(device)
print(final_args)
pytorch_total_params = sum(p.numel() for p in model.parameters())
print('model params: ', pytorch_total_params)

In [None]:
# Loss function
criterion = nn.CrossEntropyLoss().to(device)

In [None]:
args.checkpoint_dir = '/content/drive/MyDrive/Colab Notebooks/research/multi-modality/endovis18/result/'

In [None]:
# best model initialize
start_epoch = 1
best_epoch = [0]
best_results = [0.0]
epochs_since_improvement = 0

In [None]:
out_features = model.classifier.out_features

soft target 和大模型准确率

In [None]:
from pandas.core.frame import DataFrame
from sklearn.metrics import accuracy_score
import math

In [None]:
label = []
label_soft_list = [] #软标签（旧模型所得）

for i, (_, visual_features, q, labels, t5_loss) in enumerate(train_dataloader_lrsp,0):

    label_number = labels.numpy()[0]
    label += labels.tolist()

    # prepare questions
    questions = []
    for question in q: questions.append(question)
    inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)

    # GPU / CPU
    visual_features = visual_features.to(device)
    labels = labels.to(device)

    soft_target = model_17(inputs, visual_features)
    output_class_ranks = torch.argsort(soft_target, dim=-1, descending=True)

    label_soft = []
    for j in range(len(output_class_ranks)):
        label_soft.append(int(output_class_ranks[j][0]))
    label_soft_list += label_soft

c={"label" : label, "label_soft_list" : label_soft_list}
data=DataFrame(c)
acc_soft = []
for i in range(22):
    label_part = []
    label_soft_part = []
    for j in range(len(data)):
        if data.at[j,'label'] == i:
            label_part.append(data.at[j,'label'])
            label_soft_part.append(data.at[j,'label_soft_list'])
    acc_soft.append(accuracy_score(label_part, label_soft_part))
acc_soft_17 = [0 if math.isnan(x) else x for x in acc_soft]

In [None]:
label = []
label_soft_list = [] #软标签（旧模型所得）

for i, (_, visual_features, q, labels, t5_loss) in enumerate(train_dataloader_lrsp,0):

    label_number = labels.numpy()[0]
    label += labels.tolist()

    # prepare questions
    questions = []
    for question in q: questions.append(question)
    inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)

    # GPU / CPU
    visual_features = visual_features.to(device)
    labels = labels.to(device)

    soft_target = model_18(inputs, visual_features)
    output_class_ranks = torch.argsort(soft_target, dim=-1, descending=True)

    label_soft = []
    for j in range(len(output_class_ranks)):
        label_soft.append(int(output_class_ranks[j][0]))
    label_soft_list += label_soft

c={"label" : label, "label_soft_list" : label_soft_list}
data=DataFrame(c)
acc_soft = []
for i in range(22):
    label_part = []
    label_soft_part = []
    for j in range(len(data)):
        if data.at[j,'label'] == i:
            label_part.append(data.at[j,'label'])
            label_soft_part.append(data.at[j,'label_soft_list'])
    acc_soft.append(accuracy_score(label_part, label_soft_part))
acc_soft_18 = [0 if math.isnan(x) else x for x in acc_soft]

In [None]:
label = []
label_soft_list = [] #软标签（旧模型所得）

for i, (_, visual_features, q, labels, t5_loss) in enumerate(train_dataloader_lrsp,0):

    label_number = labels.numpy()[0]
    label += labels.tolist()

    # prepare questions
    questions = []
    for question in q: questions.append(question)
    inputs = tokenizer(questions, return_tensors="pt", padding="max_length", max_length=args.question_len)

    # GPU / CPU
    visual_features = visual_features.to(device)
    labels = labels.to(device)

    soft_target = model_old(inputs, visual_features)
    output_class_ranks = torch.argsort(soft_target, dim=-1, descending=True)

    label_soft = []
    for j in range(len(output_class_ranks)):
        label_soft.append(int(output_class_ranks[j][0]))
    label_soft_list += label_soft

c={"label" : label, "label_soft_list" : label_soft_list}
data=DataFrame(c)
acc_soft = []
for i in range(22):
    label_part = []
    label_soft_part = []
    for j in range(len(data)):
        if data.at[j,'label'] == i:
            label_part.append(data.at[j,'label'])
            label_soft_part.append(data.at[j,'label_soft_list'])
    acc_soft.append(accuracy_score(label_part, label_soft_part))
acc_soft_old = [0 if math.isnan(x) else x for x in acc_soft]

In [None]:
label = []
label_llm_list = []

for i, (_, visual_features, q, labels, t5_loss) in enumerate(train_dataloader_lrsp,0):
  label += labels.tolist()

  t5_loss_list = []
  for j in range(len(t5_loss)):
    #tmp = str2list(t5_loss[j])
    tmp = t5_loss[j][:22]
    t5_loss_list.append(tmp)

  check = np.reciprocal(t5_loss_list)
  t5_loss_tensor = torch.tensor(check)
  output_class_ranks = torch.argsort(t5_loss_tensor, dim=-1, descending=True)

  label_llm = []
  for j in range(len(output_class_ranks)):
    label_llm.append(int(output_class_ranks[j][0]))

  label_llm_list += label_llm

c={"label" : label, "label_llm_list" : label_llm_list}
data=DataFrame(c)

acc_llm = []

for i in range(22):
  label_part = []
  label_llm_part = []

  for j in range(len(data)):
    if data.at[j,'label'] == i:
      label_part.append(data.at[j,'label'])
      label_llm_part.append(data.at[j,'label_llm_list'])

  if len(label_part) == 0:
    acc_llm.append(0)
  else:
    acc_llm.append(accuracy_score(label_part, label_llm_part))

acc_llm = [0 if math.isnan(x) else x for x in acc_llm]

In [None]:
acc_soft_17

In [None]:
acc_soft_18

In [None]:
acc_soft_old

In [None]:
acc_soft = [(x + y + z) / 3 for x, y, z in zip(acc_soft_17, acc_soft_18, acc_soft_old)]

In [None]:
acc_soft

In [None]:
acc_llm

In [None]:
#To reduce the time of assessing the old model, we only caculate once in the first time.
# acc_soft_17=[0,0.9875,0.010638297872340425,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0]
# acc_soft_18=[0,0.9875,0.0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0]
# acc_soft_old=[0,0.9375,0.0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0]
# acc_soft=[0.0,0.9708333333333333,0.0035460992907801418,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0]
# acc_llm=[0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.7647058823529411,0.6282051282051282,1.0,0.0]

from pandas.core.frame import DataFrame
from sklearn.metrics import accuracy_score
import math

c={"acc_soft" : acc_soft, "acc_llm" : acc_llm}

weight_data_17_18_daisi=DataFrame(c)


for i in range(len(weight_data_17_18_daisi)):
  if weight_data_17_18_daisi.at[i,'acc_soft'] + weight_data_17_18_daisi.at[i,'acc_llm'] == 0:
    weight_data_17_18_daisi.at[i,'DS_soft'] = 0.5*(1 - hard_label_weight)
    weight_data_17_18_daisi.at[i,'DS_llm'] = 0.5*(1 - hard_label_weight)
  else:
    weight_data_17_18_daisi.at[i,'DS_soft'] = (1-hard_label_weight) * weight_data_17_18_daisi.at[i,'acc_soft'] / (weight_data_17_18_daisi.at[i,'acc_soft'] + weight_data_17_18_daisi.at[i,'acc_llm'])
    weight_data_17_18_daisi.at[i,'DS_llm'] = (1-hard_label_weight) * weight_data_17_18_daisi.at[i,'acc_llm'] / (weight_data_17_18_daisi.at[i,'acc_soft'] + weight_data_17_18_daisi.at[i,'acc_llm'])

In [None]:
#weight processing
weight_data_17_18_daisi['DI_soft']=(1-hard_label_weight) * (1 / (1 + ln_IR_17_18_daisi_d4))
weight_data_17_18_daisi['DI_llm'] = (1-hard_label_weight) * ((ln_IR_17_18_daisi_d4) / (1 + ln_IR_17_18_daisi_d4))
weight_data_17_18_daisi['weight_true_label']=hard_label_weight
weight_data_17_18_daisi['weight_soft'] = DS_weight * weight_data_17_18_daisi['DS_soft'] + DI_weight * weight_data_17_18_daisi['DI_soft']
weight_data_17_18_daisi['weight_llm'] = DS_weight * weight_data_17_18_daisi['DS_llm'] + DI_weight * weight_data_17_18_daisi['DI_llm']

In [None]:
acc_weight = weight_data_17_18_daisi[['weight_true_label','weight_soft','weight_llm']]
acc_weight.weight_data_17_18_daisi = ['weight_true_label','weight_soft','weight_llm']

In [None]:
len(weight_data_17_18_daisi)
acc_weight

In [None]:
# d-daisi
class DaisiVQADataset_old(Dataset):
    def __init__(self, csv_file, data_type, patch_size=5):
        self.patch_size = patch_size
        tmp = pd.read_csv(csv_file)
        self.data_frame = tmp[tmp['type']==data_type]
        unique_files = len(self.data_frame['path'].unique())
        total_questions = len(self.data_frame)
        print(f"Total files: {unique_files} | Total questions: {total_questions}")

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        sample = self.data_frame.iloc[idx]
        q = sample['q']
        labels = torch.tensor(sample['labels'])
        file_name = str(sample['path'])

        visual_feature_loc = '/' + os.path.join('content/drive/MyDrive/Colab Notebooks/research/multi-modality/daisi_vqa_final/data/',file_name,'vqa/img_features',(str(self.patch_size)+'x'+str(self.patch_size)),file_name+'.hdf5')
        frame_data = h5py.File(visual_feature_loc, 'r')
        visual_features = torch.from_numpy(frame_data['visual_features'][:])

        t5_loss = torch.tensor(eval(sample['t5_loss']))

        return file_name, visual_features, q, labels, t5_loss

In [None]:
val_dataset_d4 = LrspDataset('/content/drive/MyDrive/Colab Notebooks/research/multi-modality/dataset_4/data.csv','val',patch_size=5)
val_dataloader_d4 = DataLoader(dataset=val_dataset_d4, batch_size=1, shuffle=False)


val_dataset_17 = EndoVis17VQAClassification([8],'/content/drive/MyDrive/Colab Notebooks/research/multi-modality/endovis17/seq_',
                        '/vqa/*.txt', patch_size = 5)
val_dataloader_17 = DataLoader(dataset=val_dataset_17, batch_size= 64, shuffle=False)


val_dataset_18 = EndoVis18VQAClassification([1,5,16],'/content/drive/MyDrive/Colab Notebooks/research/multi-modality/endovis18/seq_',
                          '/vqa/Classification_t5_loss/*.txt', patch_size = 5)
val_dataloader_18 = DataLoader(dataset=val_dataset_18, batch_size= 64, shuffle=False)

val_dataset_daisi = DaisiVQADataset_old('/content/drive/MyDrive/Colab Notebooks/research/multi-modality/daisi_vqa_final/daisi_data.csv',
                                        'val',patch_size=5)
val_dataloader_daisi = DataLoader(dataset=val_dataset_daisi, batch_size=1, shuffle=False)

In [None]:
# len(val_dataset_d4)

In [None]:
for epoch in range(start_epoch, epoch_num): # train only a few epoch to reduce training time

  if epochs_since_improvement > 0 and epochs_since_improvement % 5 == 0:
    adjust_learning_rate(optimizer, 0.8)

  # train
  train_acc = train_d4(args, train_dataloader=train_dataloader_lrsp, model = model, criterion=criterion, optimizer=optimizer, epoch=epoch, tokenizer = tokenizer, device = device)

  # validation
  #test 17
  test_acc_17, test_c_acc, test_precision, test_recall, test_fscore_17 = validate_17(args,val_loader=val_dataloader_17, model = model, criterion=criterion, epoch=epoch, tokenizer = tokenizer, device = device)

  #test 18
  test_acc_18, test_c_acc, test_precision, test_recall, test_fscore_18 = validate_18_d3_d4(args,val_loader=val_dataloader_18, model = model, criterion=criterion, epoch=epoch, tokenizer = tokenizer, device = device)

  #test daisi
  test_acc_daisi, test_c_acc, test_precision, test_recall, test_fscore_daisi = validate_18_d3_d4(args,val_loader=val_dataloader_daisi, model = model, criterion=criterion, epoch=epoch, tokenizer = tokenizer, device = device)

  test_acc, test_c_acc, test_precision, test_recall, test_fscore_d4 = validate_18_d3_d4(args, val_loader=val_dataloader_d4, model = model, criterion=criterion, epoch=epoch, tokenizer = tokenizer, device = device)

  test_acc_d4=test_acc

  av_acc = (test_acc_d4+test_acc_17+test_acc_18+test_acc_daisi)/4
  av_fscore = (test_fscore_d4+test_fscore_17+test_fscore_18+test_fscore_daisi)/4
  print('epoch: %d | Average acc: %.6f' %(epoch, av_acc))
  print('epoch: %d | Average fscore: %.6f' %(epoch, av_fscore))

  if av_acc >= best_results[0]:
    epochs_since_improvement = 0

    best_results[0] = av_acc
    best_epoch[0] = epoch
    save_clf_checkpoint(args.checkpoint_dir, epoch, epochs_since_improvement, model, optimizer, best_results[0], final_args)

  else:
    epochs_since_improvement += 1

  if train_acc >= 1.0: break