In [None]:
import pandas as pd
import numpy as np
from datasets import Dataset
from sklearn.model_selection import train_test_split
import evaluate
from transformers import TrainingArguments, Trainer, set_seed, AutoModelForSequenceClassification, AutoTokenizer
from scipy.special import softmax
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
workingDir = os.path.abspath(os.path.join(''))

# Fine-tune a pretrained model

## Prepare a dataset

In [None]:
language = 'german'
outcome = 'topology'

if language == 'english':
  tokenizer_language = "bert-base-cased"
else:
  tokenizer_language = "bert-base-german-cased"

num_labels = 4

In [None]:
df_train = pd.read_csv(filepath_or_buffer=os.path.join(workingDir, 'data', 'train.csv'))
df_test = pd.read_csv(filepath_or_buffer=os.path.join(workingDir, 'data', 'test.csv'))

In [None]:
# ouput train
df_train['label_cp'] = np.where(df_train.isCP, 1, 0)

conditions  = [ df_train.Topology == 'none',
                df_train.Topology == 'Undefined',
                df_train.Topology == 'Unilateral',
                df_train.Topology == 'Bilateral']
choices     = [0, 1, 2, 3]
df_train['label_topology'] = np.select(conditions, choices, default=np.nan)
df_train['label_topology'] = df_train['label_topology'].astype('int')

# output test
df_test['label_cp'] = np.where(df_test.isCP, 1, 0)

conditions  = [ df_test.Topology == 'none',
                df_test.Topology == 'Undefined',
                df_test.Topology == 'Unilateral',
                df_test.Topology == 'Bilateral']
choices     = [0, 1, 2, 3]
df_test['label_topology'] = np.select(conditions, choices, default=np.nan)
df_test['label_topology'] = df_test['label_topology'].astype('int')

In [None]:
df_train['label'] = df_train[f'label_{outcome}']
df_train['text'] = df_train[f"input_{language}_bert"]

df_test['label'] = df_test[f'label_{outcome}']
df_test['text'] = df_test[f"input_{language}_bert"]

In [None]:
train, eval = train_test_split(df_train, test_size=0.2, random_state=1, stratify=df_train.label_topology)

train.reset_index(inplace=True, drop=True)
eval.reset_index(inplace=True, drop=True)

In [None]:
train_dataset = Dataset.from_pandas(train[['label', 'text']])
eval_dataset = Dataset.from_pandas(eval[['label', 'text']])
test_dataset = Dataset.from_pandas(df_test[['label', 'text']])

In [None]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_language)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)


train_dataset = train_dataset.map(tokenize_function, batched=True)
eval_dataset = eval_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

<a id='trainer'></a>

## Train

## Train with PyTorch Trainer

In [None]:
set_seed(1)

model = AutoModelForSequenceClassification.from_pretrained(tokenizer_language, num_labels=num_labels)

### Training hyperparameters

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", num_train_epochs=10, run_name=language)

### Evaluate

In [None]:
metric = evaluate.load("f1")

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels, average="macro")

### Trainer

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
def plot_results_topology(y_test, y_pred):
        # Get the unique classes in the test set
        classes = np.unique(y_test)

        # Calculate and print Accuracy, Precision, Recall
        accuracy = metrics.accuracy_score(y_test, y_pred)
        print("Accuracy:", round(accuracy, 2))
        print("Detail:")
        print(metrics.classification_report(y_test, y_pred))

        # Plot confusion matrix
        cm = metrics.confusion_matrix(y_test, y_pred)
        fig, ax = plt.subplots()
        sns.heatmap(cm, annot=True, fmt='d', ax=ax, cmap=plt.cm.Blues, cbar=False)
        ax.set(xlabel="Pred", ylabel="True", xticklabels=classes, yticklabels=classes, title="Confusion matrix")
        plt.yticks(rotation=0)
        plt.show()

        # Calculate accuracy again (redundant, but kept for consistency)
        accuracy = metrics.accuracy_score(y_test, y_pred)

        # Calculate precision, recall, and F1 score for micro and macro averages
        precision_micro, recall_micro, f1_micro, _ = metrics.precision_recall_fscore_support(y_test, y_pred, average='micro')
        precision_macro, recall_macro, f1_macro, _ = metrics.precision_recall_fscore_support(y_test, y_pred, average='macro')

        # Return the calculated metrics
        return accuracy, precision_micro, recall_micro, f1_micro, precision_macro, recall_macro, f1_macro

In [None]:
predictions = trainer.predict(eval_dataset)

In [None]:
y_pred_proba = softmax(predictions[0], axis=1)
y_pred = np.argmax(y_pred_proba, axis=-1)
y_train = eval.label

In [None]:
accuracy_train, precision_micro_train, recall_micro_train, f1_micro_train, precision_macro_train, recall_macro_train, f1_macro_train = plot_results_topology(y_train, y_pred)

In [None]:
predictions = trainer.predict(test_dataset)

In [None]:
y_pred_proba = softmax(predictions[0], axis=1)
y_pred = np.argmax(y_pred_proba, axis=-1)
y_test = df_test.label

In [None]:
accuracy_test, precision_micro_test, recall_micro_test, f1_micro_test, precision_macro_test, recall_macro_test, f1_macro_test = plot_results_topology(y_test, y_pred)

<a id='pytorch_native'></a>