In [None]:
# Serve per montare nel runtime di Colab una cartella che corrisponde al tuo Google Drive

from google.colab import drive
drive.mount('/content/drive');

Mounted at /content/drive


Installiamo la libreria transformers

In [None]:
!pip install transformers

Importiamo le librerie necessarie:


1.   **AutoTokenizer**: modulo di tokenizzazione di HuggingFace che seleziona il tokenizer automaticamente in base al modello NLP utilizzato. Nel nostro caso, visto che utilizzeremo CodeBERT, AutoTokenizer caricherà _BertTokenizer_.
Quest'ultimo, divide il testo in token utilizzando l'algoritmo WordPiece, che suddivide le parole in parti più piccole e più comuni (subword), in modo da poter gestire parole sconosciute o meno frequenti durante l'addestramento.
2.   **Auto Model**: modulo che seleziona automaticamente il modello di NLP appropriato in base al nome del modello, che passiamo come parametro, che nel nostro caso, sarà un modello pre-addestrato.
3.   **train_test_split**: questa funzione, proviene dalla libreria _sklearn_, e permette di dividere un dataset in due parti: _training-set_ e _test-set_. Rispettivamente, una verrà usata per allenare il modello, mentre l'altra verrà usata per valutare la sua capacità di generalizzazione.
4.   **pandas**: offre strumenti per l'analisi di dati in forma tabellare, dataframe e manipolazione di essi. Permette operazioni come applicazione di filtri a colonne, aggregazione e merge di dataframe.
5.  **numpy**: elaborazione di array numerici multidimensionali, per quest'ultimi, offre operazioni di algebra lineare e operazioni matematiche.
6.  **tabulate**: permette di trasformare un array in una tabella per visualizzare graficamente strutture dati.
7.  **tqdm**: permette di visualizzare progressbar a cicli di elaborazione iterativi. Potrebbe essere utile in questa fase dello sviluppo del sistema, per monitorare i tempi che richiedono i vari processi.
 



In [None]:
import torch # framework di ML
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler #manipolazione dei dati
from transformers import RobertaTokenizer, RobertaForSequenceClassification

from sklearn.model_selection import train_test_split # funzione in sklearn per dividere il dataset in train, test, validation

import pandas as pd
import numpy as np

from tabulate import tabulate
from tqdm import trange
import random # generazione di numeri random

import warnings
warnings.filterwarnings(action='once')

In [None]:
file_path = "/content/drive/Shareddrives/se4ai/ideal-dataset.xlsx - ideal-dataset_1 (1).CSV (9).csv"; # Da eseguire se hai montato la cartella di gdrive al primo blocco. (Cambiare la path con path del file .csv)

In [None]:
file_path = "ideal-dataset.csv"; # Da eseguire se hai caricato il dataset a mano nel runtime

In [None]:
df = pd.read_csv(file_path); # pandas read_csv legge automaticamente file csv e crea un "oggetto"

# Creiamo array di label e array di snipper di codice
labels = list([]);
for l in list(df['label']):
  if l == 'mop':
    labels.append(0);
  elif l == 'aop':
    labels.append(1);
  elif l == 'clr':
    labels.append(2);
  elif l == 'nic':
    labels.append(3);
  #labels.append(0 if l == 'mop' else 1);

code = list(df['code']);
print(labels);



[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 

In [None]:
# Divide il dataset in train test validation
# Parametri test_size:
# valori di default: train test_size = 0.33 val test_size = 0.3
TRAIN_TEST_SIZE = 0.33;
VAL_TEST_SIZE = 0.3;
# Tendenzialmente dovrebbero essere simili così train = 70%, test = 30%, val = 30% di test;
# Alzando test e val può migliorare accuracy perché ha più esempi per test e validazione

train_codes, temp_codes, train_labels, temp_labels = train_test_split(code, labels, test_size = TRAIN_TEST_SIZE, shuffle = True, stratify = labels);
test_codes, val_codes, test_labels, val_labels = train_test_split(temp_codes, temp_labels, test_size = VAL_TEST_SIZE, shuffle = True, stratify = temp_labels );



inizializziamo il tokenizer ed il modello

In [None]:
# Quando esegui per la prima volta (nello stesso runtime) li deve scaricare.
# Dopo aver eseguito returna questo warning:
# ---
# Some weights of the model checkpoint at microsoft/codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
# This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# ---
# Noi stiamo facendo esattamente quello che dice quindi va bene.
tokenizer = RobertaTokenizer.from_pretrained('microsoft/codebert-base', do_lower_case = True);

# Attenzione al parametro num_labels in base a quante label si devono classificare (4 = aop, clr, mop, nic)
model = RobertaForSequenceClassification.from_pretrained('microsoft/codebert-base', num_labels = 4)



Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be 

In [None]:
def preprocessing(input_text, tokenizer):
  '''
  Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
    - input_ids: list of token ids
    - token_type_ids: list of token type ids
    - attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
  '''
  return tokenizer.encode_plus(
                        input_text,
                        add_special_tokens = True,
                        max_length = 150,
                        truncation=True,
                        padding='max_length',
                        return_attention_mask = True,
                        return_tensors = 'pt'
                   )

def preprocessing_batch(data_set):
    token_id = []
    attention_masks = []
    for sample in data_set:
      encoding_dict = preprocessing(sample, tokenizer)
      token_id.append(encoding_dict['input_ids']) 
      attention_masks.append(encoding_dict['attention_mask'])
    token_id = torch.cat(token_id, dim = 0)
    attention_masks = torch.cat(attention_masks, dim = 0)
    return token_id,attention_masks;

train_token_id,train_attention_masks = preprocessing_batch(train_codes);
test_token_id,test_attention_masks = preprocessing_batch(test_codes);
val_token_id,val_attention_masks = preprocessing_batch(val_codes);

def print_rand_sentence_encoding(text, token_id):
  '''Displays tokens, token IDs and attention mask of a random text sample'''
  index = random.randint(0, len(text) - 1)
  tokens = tokenizer.tokenize(tokenizer.decode(token_id[index]))
  token_ids = [i.numpy() for i in token_id[index]]
  print(tokens);
  table = np.array([tokens, token_ids]).T
  print(tabulate(table, 
                 headers = ['Tokens', 'Token IDs'],
                 tablefmt = 'fancy_grid'))

#For instance selectiong a random instance of the training set  
# print_rand_sentence_encoding(test_codes, train_token_id)


In [None]:
train_labels = torch.tensor(train_labels)
test_labels = torch.tensor(test_labels)
val_labels = torch.tensor(val_labels)

In [None]:
batch_size = 16 # > 90<batch_size<120 circa => crash GPU;

train_set = TensorDataset(train_token_id, 
                          train_attention_masks, 
                          train_labels)

val_set = TensorDataset(val_token_id, 
                        val_attention_masks, 
                        val_labels)

test_set = TensorDataset(test_token_id, 
                        test_attention_masks, 
                        test_labels)

train_dataloader = DataLoader(
            train_set,
            sampler = RandomSampler(train_set),
            batch_size = batch_size
        )

validation_dataloader = DataLoader(
            val_set,
            sampler = SequentialSampler(val_set),
            batch_size = batch_size
        )


test_dataloader = DataLoader(
            test_set,
            sampler = SequentialSampler(test_set),
            batch_size = batch_size
        )

# Metriche di valutazione del modello
Nella sezione successiva verranno create le funzioni per la valutazione delle performance del modello.
Come sappiamo, per poter 

In [None]:
'''
Arg:
  preds: l'array di predizioni effettuate dal modello
  labels: l'oracolo delle predizioni, le effettive classi alle quali appartengono gli esempi che sono stati valutati
  lb_type: la classe per la quale vogliamo andare ad effettuare la valutazione
'''

def b_tp(preds, labels, lb_type):
  '''
    vado ad effettuare una somma delle predizioni che il modello ha classificato come positive e che sono effettivamente positive
  '''
  return sum([pred == label and pred == lb_type for pred, label in zip(preds, labels)])

def b_fp(preds, labels, lb_type):
  '''
    la predizione è sbagliata ed è considerata come un istanza della classe considerata quando in realtà non lo è
  '''
  return sum([pred != label and pred == lb_type for pred, label in zip(preds, labels)])

def b_tn(preds, labels, lb_type):
  '''
    la predizione è giusta ed è diversa dalla label che stiamo considerando, quindi doveva essere classificata come un qualcosa di diverso
  '''
  return sum([pred == label and pred != lb_type for pred, label in zip(preds, labels)])

def b_fn(preds, labels, lb_type):
  '''la classificazione è sbagliata ed è diversa dalla label che stiamo considerando, anche quando in realtà avrebbe dovuto essere uguale'''
  return sum([pred != label and label != lb_type for pred, label in zip(preds, labels)])


'''
  metriche per le singole classi
'''
def b_metrics(preds, labels, lb_type):
  '''
  Returns the following metrics:
    - accuracy    = (TP + TN) / N
    - precision   = TP / (TP + FP)
    - recall      = TP / (TP + FN)
    - specificity = TN / (TN + FP)
  '''
  preds = np.argmax(preds, axis = 1).flatten()
  labels = labels.flatten()
  tp = b_tp(preds, labels, lb_type)
  tn = b_tn(preds, labels, lb_type)
  fp = b_fp(preds, labels, lb_type)
  fn = b_fn(preds, labels, lb_type)
  b_accuracy = (tp + tn) / len(labels)
  b_precision = tp / (tp + fp) #if (tp + fp) > 0 else 'nan'
  b_recall = tp / (tp + fn) #if (tp + fn) > 0 else 'nan'
  b_specificity = tn / (tn + fp) #if (tn + fp) > 0 else 'nan'
  return b_accuracy, b_precision, b_recall, b_specificity

def total_metrics(preds, labels):
  '''
  Returns the following metrics:
    - accuracy    = (TP + TN) / N
    - precision   = TP / (TP + FP)
    - recall      = TP / (TP + FN)
    - specificity = TN / (TN + FP)
  '''
  preds = np.argmax(preds, axis = 1).flatten()
  labels = labels.flatten()
  total_tp = b_tp(preds, labels, 0) + b_tp(preds, labels, 1) + b_tp(preds, labels, 2) + b_tp(preds, labels, 3)
  total_fp = b_fp(preds, labels, 0) + b_fp(preds, labels, 1) + b_fp(preds, labels, 2) + b_fp(preds, labels, 3)
  total_tn = b_tn(preds, labels, 0) + b_tn(preds, labels, 1) + b_tn(preds, labels, 2) + b_tn(preds, labels, 3)
  total_fn = b_fp(preds, labels, 0) + b_fp(preds, labels, 1) + b_fp(preds, labels, 2) + b_fp(preds, labels, 3)

  b_total_precision = total_tp / (total_tp + total_fp) #if (tp + fp) > 0 else 'nan'
  b_total_recall = total_tp / (total_tp + total_fn) #if (tp + fn) > 0 else 'nan'
  b_total_specificity = total_tn / (total_tn + total_fp) #if (tn + fp) > 0 else 'nan'
  return b_total_precision, b_total_recall, b_total_specificity
        

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), 
                              lr = 5e-5, # LEARNING RATE DELL'ALGORITMO OTTIMIZZATORE (5e-5 = 5*10^-5 = 0.00005)
                              eps = 1e-08
                              )
model.cuda(); # Eseguire per impostare il modello in modo da usare la GPU durante training

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Sceglie GPU come device a cui mandare le tuple

epochs = 8 # Se alle ultime epoche continua a salire l'accuracy, o in generale non converge ancora molto, provare ad aumentare
# Se invece converge troppo presto vengono esesguite epoche inutili in cui il modello non migliora

for _ in trange (epochs, desc = 'Epoch'):
  model.train()
  tr_loss = 0
  nb_tr_examples, nb_tr_steps = 0, 0

  for step, batch in enumerate(train_dataloader):
      batch = tuple(t.to(device) for t in batch)
      b_input_ids, b_input_mask, b_labels = batch
      #reset gradient value for the new epoch
      optimizer.zero_grad()
      # Forward pass
      train_output = model(b_input_ids, 
                            token_type_ids = None, 
                            attention_mask = b_input_mask, 
                            labels = b_labels)
      # Backward pass
      train_output.loss.backward()
      optimizer.step()
      # Update tracking variables
      tr_loss += train_output.loss.item()
      nb_tr_examples += b_input_ids.size(0)
      nb_tr_steps += 1

       # ========== Validation ==========

      # Set model to evaluation mode
      model.eval()

      # Tracking variables 
      val_accuracy = []
      val_precision = []
      val_recall = []
      val_specificity = []

      latest_acc = 0.0000;

      for batch in validation_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
          # Forward pass
          eval_output = model(b_input_ids, 
                              token_type_ids = None, 
                              attention_mask = b_input_mask)
        logits = eval_output.logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        # Calculate validation metrics for each class
        b_accuracy_mop, b_precision_mop, b_recall_mop, b_specificity_mop = b_metrics(logits, label_ids, 0)
        b_accuracy_aop, b_precision_aop, b_recall_aop, b_specificity_aop = b_metrics(logits, label_ids, 1)
        b_accuracy_clr, b_precision_clr, b_recall_clr, b_specificity_clr = b_metrics(logits, label_ids, 2)
        b_accuracy_nic, b_precision_nic, b_recall_nic, b_specificity_nic = b_metrics(logits, label_ids, 3)
        # Calculate validation metrics for the entire model
        b_precision, b_recall, b_specificity = total_metrics(logits, label_ids)
        val_accuracy.append({"mop": b_accuracy_mop, "aop": b_accuracy_aop, "clr": b_accuracy_clr, "tot": (b_accuracy_mop + b_accuracy_aop + b_accuracy_clr + b_accuracy_nic) / 4})
        # Update precision only when (tp + fp) !=0; ignore nan
        if b_precision != 'nan': val_precision.append({"mop": b_precision_mop, "aop": b_precision_aop, "clr": b_precision_clr, "tot": b_precision})
        # Update recall only when (tp + fn) !=0; ignore nan
        if b_recall != 'nan': val_recall.append({"mop": b_recall_mop, "aop": b_recall_aop, "clr": b_recall_clr, "tot": b_recall})
        # Update specificity only when (tn + fp) !=0; ignore nan
        if b_specificity != 'nan': val_specificity.append({"mop": b_specificity_mop, "aop": b_specificity_aop, "clr": b_specificity_clr, "tot": b_specificity})

        print('\n\t - Train loss: {:.4f}'.format(tr_loss / nb_tr_steps))
        '''
        print('======================================================MOP=============================================================\n')
        print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["mop"] for acc in val_accuracy)/len(val_accuracy)))
        print('\t - Validation Precision: {:.4f}'.format(sum(prec["mop"] for prec in val_precision)/len(val_precision)) if len(val_precision)>0 else '\t - Validation Precision: NaN')
        print('\t - Validation Recall: {:.4f}'.format(sum(rec["mop"] for rec in val_recall)/len(val_recall)) if len(val_recall)>0 else '\t - Validation Recall: NaN')
        print('\t - Validation Specificity: {:.4f}\n'.format(sum(spec["mop"] for spec in val_specificity)/len(val_specificity)) if len(val_specificity)>0 else '\t - Validation Specificity: NaN')
        print('======================================================AOP=============================================================\n')
        print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["aop"] for acc in val_accuracy)/len(val_accuracy)))
        print('\t - Validation Precision: {:.4f}'.format(sum(prec["aop"] for prec in val_precision)/len(val_precision)) if len(val_precision)>0 else '\t - Validation Precision: NaN')
        print('\t - Validation Recall: {:.4f}'.format(sum(rec["aop"] for rec in val_recall)/len(val_recall)) if len(val_recall)>0 else '\t - Validation Recall: NaN')
        print('\t - Validation Specificity: {:.4f}\n'.format(sum(spec["aop"] for spec in val_specificity)/len(val_specificity)) if len(val_specificity)>0 else '\t - Validation Specificity: NaN')
        print('======================================================CLR=============================================================\n')
        print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["clr"] for acc in val_accuracy)/len(val_accuracy)))
        print('\t - Validation Precision: {:.4f}'.format(sum(prec["clr"] for prec in val_precision)/len(val_precision)) if len(val_precision)>0 else '\t - Validation Precision: NaN')
        print('\t - Validation Recall: {:.4f}'.format(sum(rec["clr"] for rec in val_recall)/len(val_recall)) if len(val_recall)>0 else '\t - Validation Recall: NaN')
        print('\t - Validation Specificity: {:.4f}\n'.format(sum(spec["clr"] for spec in val_specificity)/len(val_specificity)) if len(val_specificity)>0 else '\t - Validation Specificity: NaN')
        print('======================================================TOT=============================================================\n')
        '''

        # accurcay: media tra la precision di ogni classe
        # print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["mop"] for acc in val_accuracy)/len(val_accuracy)))
        print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["tot"] for acc in val_accuracy)/len(val_accuracy)) if len(val_accuracy)>0 else '\t - Validation Precision: NaN')
        print('\t - Validation Precision: {:.4f}'.format(sum(prec["tot"] for prec in val_precision)/len(val_precision)) if len(val_precision)>0 else '\t - Validation Precision: NaN')
        print('\t - Validation Recall: {:.4f}'.format(sum(rec["tot"] for rec in val_recall)/len(val_recall)) if len(val_recall)>0 else '\t - Validation Recall: NaN')
        print('\t - Validation Specificity: {:.4f}\n'.format(sum(spec["tot"] for spec in val_specificity)/len(val_specificity)) if len(val_specificity)>0 else '\t - Validation Specificity: NaN')


        latest_acc = (val_precision[-1]["mop"] + val_precision[-1]["aop"] + val_precision[-1]["clr"]) / 3;


PATH = './greet'
torch.save(model, PATH)

In [None]:
# load the model saved
PATH = './greet'
model = torch.load(PATH)

In [None]:
test_accuracy = []
test_precision = []
test_recall = []
test_specificity = []

for batch in test_dataloader:
    batch = tuple(t.to(device) for t in batch)
    b_input_ids, b_input_mask, b_labels = batch

    eval_output = model(b_input_ids, 
                        token_type_ids = None, 
                        attention_mask = b_input_mask)
    logits = eval_output.logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    # Calculate validation metrics for each class
    b_accuracy_mop, b_precision_mop, b_recall_mop, b_specificity_mop = b_metrics(logits, label_ids, 0)
    b_accuracy_aop, b_precision_aop, b_recall_aop, b_specificity_aop = b_metrics(logits, label_ids, 1)
    b_accuracy_clr, b_precision_clr, b_recall_clr, b_specificity_clr = b_metrics(logits, label_ids, 2)
    b_accuracy_nic, b_precision_nic, b_recall_nic, b_specificity_nic = b_metrics(logits, label_ids, 3)
    # for the entire model
    b_precision, b_recall, b_specificity = total_metrics(logits, label_ids)
    test_accuracy.append({"mop": b_accuracy_mop, "aop": b_accuracy_aop, "clr": b_accuracy_clr, "nic": b_accuracy_nic, "tot": (b_accuracy_mop + b_accuracy_aop + b_accuracy_clr + b_accuracy_nic) / 4})
    # Update precision only when (tp + fp) !=0; ignore nan
    if b_precision != 'nan': test_precision.append({"mop": b_precision_mop, "aop": b_precision_aop, "clr": b_precision_clr, "nic": b_precision_nic, "tot": b_precision})
    # Update recall only when (tp + fn) !=0; ignore nan
    if b_recall != 'nan': test_recall.append({"mop": b_recall_mop, "aop": b_recall_aop, "clr": b_recall_clr, "nic": b_recall_nic, "tot": b_recall})
    # Update specificity only when (tn + fp) !=0; ignore nan
    if b_specificity != 'nan': test_specificity.append({"mop": b_specificity_mop, "aop": b_specificity_aop, "clr": b_specificity_clr, "nic": b_specificity_nic, "tot": b_specificity})

    
print('======================================================MOP=============================================================\n')
print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["mop"] for acc in test_accuracy)/len(test_accuracy)))
print('\t - Validation Precision: {:.4f}'.format(sum(prec["mop"] for prec in test_precision)/len(test_precision)) if len(test_precision)>0 else '\t - Validation Precision: NaN')
print('\t - Validation Recall: {:.4f}'.format(sum(rec["mop"] for rec in test_recall)/len(test_recall)) if len(test_recall)>0 else '\t - Validation Recall: NaN')
print('\t - Validation Specificity: {:.4f}\n'.format(sum(spec["mop"] for spec in test_specificity)/len(test_specificity)) if len(test_specificity)>0 else '\t - Validation Specificity: NaN')
print('======================================================AOP=============================================================\n')
print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["aop"] for acc in test_accuracy)/len(test_accuracy)))
print('\t - Validation Precision: {:.4f}'.format(sum(prec["aop"] for prec in test_precision)/len(test_precision)) if len(test_precision)>0 else '\t - Validation Precision: NaN')
print('\t - Validation Recall: {:.4f}'.format(sum(rec["aop"] for rec in test_recall)/len(test_recall)) if len(test_recall)>0 else '\t - Validation Recall: NaN')
print('\t - Validation Specificity: {:.4f}\n'.format(sum(spec["aop"] for spec in test_specificity)/len(test_specificity)) if len(test_specificity)>0 else '\t - Validation Specificity: NaN')
print('======================================================NIC=============================================================\n')
print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["nic"] for acc in test_accuracy)/len(test_accuracy)))
print('\t - Validation Precision: {:.4f}'.format(sum(prec["nic"] for prec in test_precision)/len(test_precision)) if len(test_precision)>0 else '\t - Validation Precision: NaN')
print('\t - Validation Recall: {:.4f}'.format(sum(rec["nic"] for rec in test_recall)/len(test_recall)) if len(test_recall)>0 else '\t - Validation Recall: NaN')
print('\t - Validation Specificity: {:.4f}\n'.format(sum(spec["nic"] for spec in test_specificity)/len(test_specificity)) if len(test_specificity)>0 else '\t - Validation Specificity: NaN')
print('======================================================CLR=============================================================\n')
print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["clr"] for acc in test_accuracy)/len(test_accuracy)))
print('\t - Validation Precision: {:.4f}'.format(sum(prec["clr"] for prec in test_precision)/len(test_precision)) if len(test_precision)>0 else '\t - Validation Precision: NaN')
print('\t - Validation Recall: {:.4f}'.format(sum(rec["clr"] for rec in test_recall)/len(test_recall)) if len(test_recall)>0 else '\t - Validation Recall: NaN')
print('\t - Validation Specificity: {:.4f}\n'.format(sum(spec["clr"] for spec in test_specificity)/len(test_specificity)) if len(test_specificity)>0 else '\t - Validation Specificity: NaN')
print('======================================================TOT=============================================================\n')
    
# accurcay: media tra la precision di ogni classe
# print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["mop"] for acc in val_accuracy)/len(val_accuracy)))
print('\t - Validation Accuracy: {:.4f}'.format(sum(acc["tot"] for acc in test_accuracy)/len(test_accuracy)) if len(test_accuracy)>0 else '\t - Validation Precision: NaN')
print('\t - Validation Precision: {:.4f}'.format(sum(prec["tot"] for prec in test_precision)/len(test_precision)) if len(test_precision)>0 else '\t - Validation Precision: NaN')
print('\t - Validation Recall: {:.4f}'.format(sum(rec["tot"] for rec in test_recall)/len(test_recall)) if len(test_recall)>0 else '\t - Validation Recall: NaN')
print('\t - Validation Specificity: {:.4f}\n'.format(sum(spec["tot"] for spec in test_specificity)/len(test_specificity)) if len(test_specificity)>0 else '\t - Validation Specificity: NaN')

  b_precision = tp / (tp + fp) #if (tp + fp) > 0 else 'nan'



	 - Validation Accuracy: 0.8055
	 - Validation Precision: 0.6487
	 - Validation Recall: 0.5189
	 - Validation Specificity: 0.8988


	 - Validation Accuracy: 0.8055
	 - Validation Precision: nan
	 - Validation Recall: 0.4385
	 - Validation Specificity: 0.9725


	 - Validation Accuracy: 0.8055
	 - Validation Precision: nan
	 - Validation Recall: 0.3298
	 - Validation Specificity: 0.9860


	 - Validation Accuracy: 0.8055
	 - Validation Precision: 0.7917
	 - Validation Recall: 0.7917
	 - Validation Specificity: 0.8242


	 - Validation Accuracy: 0.8055
	 - Validation Precision: 0.8055
	 - Validation Recall: 0.8055
	 - Validation Specificity: 0.9231



In [None]:
correct = 0
wrong = 0

for index, test in enumerate(test_codes):
  encoding = preprocessing(test, tokenizer)
  predict_ids = []
  predict_attention_mask = []
  # Extract IDs and Attention Mask
  predict_ids.append(encoding['input_ids'])
  predict_attention_mask.append(encoding['attention_mask'])
  predict_ids = torch.cat(predict_ids, dim = 0)
  predict_attention_mask = torch.cat(predict_attention_mask, dim = 0)

  # Forward pass, calculate logit predictions
  with torch.no_grad():
    output = model(predict_ids.to(device), token_type_ids = None, attention_mask = predict_attention_mask.to(device))
  # print("0 = method opposite comment; 1 = attribute opposite comment; 2 = clear")
  print(output.logits.softmax(dim=-1).tolist())
  prediction = np.argmax(output.logits.cpu().numpy()).flatten().item()
  print(test)
  if prediction == 0:
    print('predicted: mop');
  elif prediction == 1:
    print('predicted: aop');
  elif prediction == 2:
    print('predicted: clr');
  elif prediction == 3:
    print('predicted: nic');


  oracle = test_labels.numpy()[index]
  if oracle == 0:
    print('oracle: mop');
  elif oracle == 1:
    print('oracle: aop');
  elif oracle == 2:
    print('oracle: clr');
  elif oracle == 3:
    print('oracle: nic');

  if prediction == oracle:
    print("PASS")
    correct += 1
  else:
    print("FAULT")
    wrong += 1
  
  # print(np.argmax(output.logits.cpu().numpy()).flatten().item())
  print("=================================================================================================================")

print("correct predictions: " + str(correct))
print("wrong predictions: " + str(wrong))


[[0.005276777315884829, 0.0007697134278714657, 0.9929850697517395, 0.0009683978860266507]]
"""
	this function create multiple list
"""
def create_multiple()
predicted: clr
oracle: clr
PASS
[[0.9964953064918518, 0.0015505609335377812, 0.0012162121711298823, 0.0007379390299320221]]
# Returns the smallest number from a list
def get_largest_number(numbers):
predicted: mop
oracle: mop
PASS
[[0.9778029918670654, 0.0003765436413232237, 0.02123873308300972, 0.0005816971533931792]]
"""
    Returns the IP address based on the geographic location
"""
def get_location_data(ip_address):
predicted: mop
oracle: mop
PASS
[[0.039701204746961594, 0.00044344732305034995, 0.9590497016906738, 0.0008057340164668858]]
# Converts miles to kilometers
def convert_kilometers_to_miles(km):
predicted: clr
oracle: mop
FAULT
[[0.005107656586915255, 0.0005185379413887858, 0.9934049248695374, 0.0009688978316262364]]
# Returns the biggest number from a list
def get_largest_number(numbers):
predicted: clr
oracle: 

In [None]:
!zip -r model.zip greet-a-2.3125

  and should_run_async(code)


  adding: greet-a-2.3125 (deflated 7%)


In [None]:
try:
  from google.colab import files
  files.download('./model.zip')
except ImportError:
  pass