In [None]:
import json
from PIL import Image
import torch
import os
from torch import nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from tqdm import tqdm

#wandb
!pip install wandb -qqq
import wandb

#torchvision
#!pip install torchvision --upgrade
import torchvision

# Caricamento dei Dataset

In [None]:
#Carico i dataset di HM
train = pd.read_json("../input/hatefulmeme/hm_data/hm_data/train.jsonl",lines=True)
val_seen = pd.read_json('../input/hatefulmeme/hm_data/hm_data/dev_seen.jsonl',lines=True)
val_unseen = pd.read_json('../input/hatefulmeme/hm_data/hm_data/dev_unseen.jsonl',lines=True)
test_seen = pd.read_json('../input/hatefulmeme/hm_data/hm_data/test_seen.jsonl',lines=True)
test_unseen = pd.read_json('../input/hatefulmeme/hm_data/hm_data/test_unseen.jsonl',lines=True)

metadata_hateful = pd.concat([train, val_seen, val_unseen, test_seen, test_unseen], ignore_index=True)

#Carico il dataset Misogyny
metadata_misogyny = pd.read_json("../input/misogynydataset/misogyny.jsonl",lines=True)
metadata_misogyny_train = metadata_misogyny.sample(frac=0.79, random_state=42)
metadata_misogyny_test = metadata_misogyny.drop(metadata_misogyny_train.index).sample(frac=0.5, random_state=42)



#Concateno i metadata e l'embedding del testo
metadata = pd.concat([metadata_hateful, metadata_misogyny_train], ignore_index=True)

In [None]:
metadata_misogyny_test.shape

In [None]:
train_df = metadata.sample(frac=0.90, random_state=42)
valid_df = metadata.drop(train_df.index).sample(frac=0.5, random_state=42)
train_df = train_df.reset_index()
valid_df = valid_df.reset_index()

In [None]:
train_df.shape

In [None]:
valid_df.shape

# Dataloader

In [None]:
class JsonlDataset(torch.utils.data.Dataset):

  def __init__(self, df, HM_text_embeds_path, HM_graph_embeds_path, MIS_text_embeds_path, MIS_graph_embeds_path):
  
    self.data = df
    self.HM_text_embeds_path = HM_text_embeds_path
    self.MIS_text_embeds_path = MIS_text_embeds_path
    self.HM_graph_embeds = HM_graph_embeds_path
    self.MIS_graph_embeds = MIS_graph_embeds_path
    self.data_dir_mis = '../input/misogynydataset/img/'
    self.data_dir_hm = '../input/hatefulmeme/hm_data/hm_data/'
    
  def __len__(self):
    return len(self.data)

  def __getitem__(self, index):

    if torch.is_tensor(index):
      index = index.tolist()
    
    #Controllo se siamo in HM o MIS
    if 'jpg' in self.data['img'][index]:
        #Questo è MIS
        #Carico il testo
        text = torch.FloatTensor(np.load(f'{self.MIS_text_embeds_path}/{self.data["id"][index]}.npy'))
        
        #Carico il grafo
        #Acquisisco l'embedding
        #a = np.load(f'{self.MIS_graph_embeds}/{self.data["id"][index]}.npy')
        #Effettuo la media passando da [n,768] a [768]
        #b = np.mean(a,axis=0)
        #Trasformo in Tensore
        #graph = torch.FloatTensor(b)
        
        #Carico l'immagine
        image = Image.open(self.data_dir_mis+self.data['img'][index]).convert('RGB')
        image = image.resize((256,256))
        image = torchvision.transforms.functional.to_tensor(image)
               
    else:
        #Questo è HM
        #Carico il testo
        text = torch.FloatTensor(np.load(f'{self.HM_text_embeds_path}/{self.data["id"][index]}.npy'))
        
        #Carico il grafo
        #Acquisisco l'embedding
        #a = np.load(f'{self.HM_graph_embeds}/{self.data["id"][index]}.npy')
        #Effettuo la media passando da [n,768] a [768]
        #b = np.mean(a,axis=0)
        #Trasformo in Tensore
        #graph = torch.FloatTensor(b)
        
        #Carico l'immagine
        image = Image.open(self.data_dir_hm+self.data['img'][index]).convert('RGB')
        image = image.resize((256,256))
        image = torchvision.transforms.functional.to_tensor(image)

    
    label = torch.FloatTensor([self.data["label"][index]])

    return text,image,label #graph

# Modello

In [None]:
class ImageEncoder(nn.Module):
    def __init__(self, **kwargs):
        super(ImageEncoder, self).__init__()
        
        #self.model = torchvision.models.resnet152(pretrained=True)
        self.model = torchvision.models.resnet50(pretrained=True)
        #self.model = torchvision.models.resnet18(pretrained=True)
        #self.model = torchvision.models.efficientnet_b7(pretrained=True)
        #self.model.fc = nn.Linear(self.model.fc.in_features, 768)

        #Mi prendo tutti e 2048 elementi dell'ultimo avgpool2d
        self.model = torch.nn.Sequential(*list(self.model.children())[:-1])

        #Freeze of parameters
        for param in self.model.parameters():
            param.requires_grad = False
        
        #Riduco le features che arrivano dall'immagine
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, 768)
        
        #Espando le features
        #self.fc1_graph = nn.Linear(40, 200)
        #self.fc2_graph = nn.Linear(200, 500)
        #self.fc3_graph = nn.Linear(500, 768)
        
        #Concateno ora le feature dell'immagine con quelle testuali
        self.fc3 = nn.Linear(768 + 768, 1300)
        self.fc4 = nn.Linear(1300, 700)
        self.fc5 = nn.Linear(700, 250)
        self.fc6 = nn.Linear(250, 50)
        self.fc7 = nn.Linear(50, 1)
        
        #Inserire un layer di dropout
        self.dropout = nn.Dropout(0.3)

    def forward(self, image, text):

        #Forward immagine
        x1 = self.model(image).squeeze()
        x1 = F.relu(self.fc1(x1))
        x1 = F.relu(self.fc2(x1))
        
        #Recupero il testo
        x2 = text.squeeze(dim=1)
        
        #Espando il grafo
        #x3 = F.relu(self.fc1_graph(graph))
        #x3 = F.relu(self.fc2_graph(x3))
        #x3 = F.relu(self.fc3_graph(x3))
        #x3 = graph
        
        #concateno immagini e testo
        x = torch.cat((x1, x2), dim=1) #x3
        
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.dropout(x)
        x = F.relu(self.fc5(x))
        x = F.relu(self.fc6(x))
        x = self.dropout(x)
        x = self.fc7(x)

        return torch.sigmoid(x)

# Dataset/Dataloader

In [None]:
#Dataset
train_dataset = JsonlDataset(train_df, '../input/text-hm-embeds/content/text_embeds_hateful', '../input/h2v-768-complete/content/embeddings','../input/text-mis-embeds/content/text_embeds_misogyny','../input/misogyny-graph-h2v-lvl-1-complete/misogyny_complete_lvl_1_embedding')
valid_dataset = JsonlDataset(valid_df, '../input/text-hm-embeds/content/text_embeds_hateful', '../input/h2v-768-complete/content/embeddings','../input/text-mis-embeds/content/text_embeds_misogyny','../input/misogyny-graph-h2v-lvl-1-complete/misogyny_complete_lvl_1_embedding')

#Dataloader
BATCH_SIZE=32
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers = 2)
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers = 2)


# Iperparametri

In [None]:
BertResModel=ImageEncoder()
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BertResModel.to(DEVICE)
print('Modello caricato!')

In [None]:
wandb.login()
#pwd = '35b29dc1704ea7b50447995026737a8415a797d9'

optimizer = torch.optim.AdamW(BertResModel.parameters(),lr=0.0001)
criterion_ce = torch.nn.BCELoss()

EPOCHE = 30
best_F1 = 0.3

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='max', 
    factor=0.1, 
    patience=4, 
    threshold=0.001,
    threshold_mode='rel', 
    cooldown=0, 
    min_lr=0, 
    eps=1e-08, 
    verbose=True,
)


train_logs = {
    
    'loss': [],
    'accuracy': [],
    
}

valid_logs = {
    
    'loss': [],
    'accuracy': [],
    'F1': [],
    'AUC': [],
    
}

wandb.init(
      # Set the project where this run will be logged
      project="Augumented_HM+MIS", 
      # Track hyperparameters and run metadata
      config={
      "learning_rate": 0.0001,
      "architecture": "CNN+transformer",
      "dataset": "HatefulMemeDetection",
      "epochs": EPOCHE,
      })

In [None]:
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, classification_report, confusion_matrix

def binary_acc(y_pred, y_gt):
  
    tres = 0.5
    preds = (y_pred).detach().cpu().numpy() > tres
    proba = (y_pred).detach().cpu().numpy()
    out_label_ids = y_gt.detach().cpu().numpy()

    return accuracy_score(out_label_ids, preds)

def class_report(y_pred, y_gt):
    
    tres = 0.5
    preds = np.array(y_pred) > tres
    
    #target_names = ['benign', 'hateful']
    
    return classification_report(y_gt,preds, output_dict=True)

def f1(y_preds, y_gt):
    
    tres = 0.5
    preds = np.array(y_preds) > tres
    
    return f1_score(y_gt, preds, average='micro')

def AUC(y_preds, y_gt):
    
    return roc_auc_score(y_gt, y_preds, average='micro')

def binary_acc2(y_pred, y_gt):
  
    tres = 0.5
    preds = np.array(y_pred) > tres

    return accuracy_score(y_gt, preds)

def unpack(lista):
    unpack_lista=[]
    for i in range(len(lista)):
        for j in range(len(lista[i])):
            unpack_lista.append(lista[i][j])
    return unpack_lista

In [None]:
def ModelEval(model, valid_dataloader):
    
    #VALIDATION
    print("VALIDATION...")
    loop_valid = tqdm(valid_dataloader)
    running_loss_valid = 0.0
    running_accuracy_valid = 0.0
    
    predizioni = []
    GT = []

    model.eval()
    
    with torch.no_grad():
        for i, data in enumerate(loop_valid, 0):
            # get the inputs; data is a list of [inputs, labels]
            text,image,label = data
            text = text.to(device=DEVICE)
            #graph = graph.to(device=DEVICE)
            image = image.to(device=DEVICE)
            label = label.to(device=DEVICE)

            #forward
            outputs = model(image, text)
            valid_loss = criterion_ce(outputs, label)

            #accuracy
            valid_accuracy = binary_acc(outputs, label)

            #Accumulo
            running_loss_valid += valid_loss.item()
            running_accuracy_valid += valid_accuracy.item()
            
            #Accumulo predizioni e gt
            predizioni.append(outputs.detach().cpu().numpy())
            GT.append(label.detach().cpu().numpy())
                
            #Aggiornamento loop    
            loop_valid.set_postfix(loss = valid_loss.item(), accuracy = valid_accuracy.item())
            
        #LR_SCHEDULER    
        scheduler.step(AUC(unpack(predizioni),unpack(GT)))
                   
    #STATISTICHE
    valid_logs['loss'].append(running_loss_valid/len(valid_dataloader))
    valid_logs['accuracy'].append((running_accuracy_valid)/len(valid_dataloader))
    valid_logs['F1'].append(f1(unpack(predizioni),unpack(GT)))
    valid_logs['AUC'].append(AUC(unpack(predizioni),unpack(GT)))
    
    print(f"F1: {valid_logs['F1'][-1]:.4f}")

    #Aggiorno il logger
    wandb.log(
        {
            ##"valid_loss": running_loss_valid/len(valid_dataloader), 
            ##"valid_accuracy": running_accuracy_valid/len(valid_dataloader)
            "valid_loss": valid_logs['loss'][-1],
            "valid_accuracy": valid_logs['accuracy'][-1],
            "F1": valid_logs['F1'][-1],
            "AUC": valid_logs['AUC'][-1],
        },
        step=epoch
    
    )
    
    #Salvo il modello su questo parametro, perciò lo ritorno
    return valid_logs['F1'][-1]

In [None]:
#TRAINING
model=BertResModel
LOADER = train_dataloader

for epoch in range(EPOCHE):  # loop over the dataset multiple times

    print(f"Epoca: {epoch+1}\n")
    print("TRAINING...")
    
    
    loop = tqdm(LOADER)
    running_loss = 0.0
    running_accuracy = 0.0
    
    model.train()
    
    for i, data in enumerate(loop):
        # get the inputs; data is a list of [inputs, labels]
        text, image, label = data
        text = text.to(device=DEVICE)
        #graph = graph.to(device=DEVICE)
        image = image.to(device=DEVICE)
        label = label.to(device=DEVICE)
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(image, text)
        loss = criterion_ce(outputs, label)
        loss.backward()
        optimizer.step()
  
        #accuracy
        acc = binary_acc(outputs, label)

       
        # print statistics
        running_loss += loss.item()
        running_accuracy += acc.item()
        if i == len(LOADER)-1:    # print every mini-batches
            print('loss media sul minibatch training: %.3f' %(running_loss/len(LOADER)))
            print('accuracy media sul minibatch training: %.3f' %(running_accuracy/len(LOADER)))
            train_logs['loss'].append(running_loss/len(LOADER))
            train_logs['accuracy'].append((running_accuracy)/len(LOADER))

            wandb.log({"train_loss": running_loss/len(LOADER), "train_accuracy": running_accuracy/len(LOADER),"epoch": epoch}, step=epoch)
            
            running_loss = 0.0

            running_accuracy = 0.0

        loop.set_postfix(loss = loss.item(),accuracy = acc.item())
        
    #Evaluation model   
    valid_F1 = ModelEval(model, valid_dataloader)
      

    #Salvo il modello migliore su AUC
    if valid_F1 > best_F1:
        best_F1 = valid_F1
        torch.save(model.state_dict(), f'./best_model_F1_{best_F1:.4f}_epoca_{epoch+1}.pth')
        print('Model saved!')
        
print('Finished Training')
wandb.finish()

In [None]:
wandb.finish()

# Testing

In [None]:
import json
from PIL import Image
import torchvision
import torch
import os
from torch import nn
import torch.nn.functional as F
import pandas as pd
import numpy as np

In [None]:
metadata_misogyny_test = metadata_misogyny_test.reset_index()

In [None]:
#test_unseen = pd.read_json('../input/hatefulmeme/hm_data/hm_data/test_unseen.jsonl',lines=True)

test_dataset = JsonlDataset(metadata_misogyny_test, '../input/text-hm-embeds/content/text_embeds_hateful', '../input/h2v-768-complete/content/embeddings', '../input/text-mis-embeds/content/text_embeds_misogyny', '../input/misogyny-graph-h2v-lvl-1-complete/misogyny_complete_lvl_1_embedding')
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, num_workers = 2)

In [None]:
TestModel = ImageEncoder()
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Load dei pesi
weights = torch.load('./best_model_F1_0.7984_epoca_9.pth',map_location=DEVICE)
TestModel.load_state_dict(weights)

#Setting model

running_accuracy_test = 0.0
model_val = TestModel
model_val.to(DEVICE)
print("Fatto!")

In [None]:
metadata_misogyny_test.head()

In [None]:
from tqdm import tqdm
model_val.eval()
reports = []
TEST_LOADER=tqdm(test_loader)

predizioni = []
GT = []

with torch.no_grad():    
    for i, data in enumerate(TEST_LOADER):
        # get the inputs; data is a list of [inputs, labels]
        text, image,label = data
        text = text.to(device=DEVICE)
        #graph = graph.to(device=DEVICE)
        image = image.to(device=DEVICE)
        label = label.to(device=DEVICE)

        # forward + backward + optimize
        outputs = model_val(image, text)
        #acc = binary_acc(outputs, label)
        predizioni.append(outputs.detach().cpu().numpy())
        GT.append(label.detach().cpu().numpy())

In [None]:
 metriche = {
     'accuracy' : binary_acc2(unpack(predizioni), unpack(GT)),
     'AUC' : AUC(unpack(predizioni), unpack(GT)),
     'F1' : f1(unpack(predizioni), unpack(GT)),
     'report' : class_report(unpack(predizioni), unpack(GT)),
 }

In [None]:
metriche