In [None]:
import torch
import argparse
from sklearn.metrics import accuracy_score
from transformers import (
    DistilBertForSequenceClassification, DistilBertTokenizerFast, Trainer, TrainingArguments,
    AutoTokenizer,PreTrainedTokenizerFast,
    AutoModel,
    BertForSequenceClassification,
    )
from transformers import RobertaTokenizerFast, T5Tokenizer
from transformers import DistilBertForSequenceClassification, RobertaForSequenceClassification, T5ForConditionalGeneration
import numpy as np
from datasets import load_dataset, concatenate_datasets, load_from_disk
import logging
import sys
import copy
import os
from scipy.stats import pearsonr

## 1. Load Model & Tokenizer

In [None]:
model_name = 'bert-large-uncased-whole-word-masking'
from transformers import AutoConfig

# The identifier name of the pre-trained model
model_name = 'bert-large-uncased-whole-word-masking'

# Load the configuration from the identifier
config = AutoConfig.from_pretrained(model_name)



# The config object now contains the configuration of the BERT model
config.num_labels = 2

print(config)


In [None]:
import copy
# model = AutoModel.from_pretrained(model_name, config=config)
model = BertForSequenceClassification.from_pretrained(model_name, config=config)

# kd_teacher_model = copy.deepcopy(model)
print(model)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
   model_name,
   use_fast=True,
)
print(tokenizer)

In [None]:
if not isinstance(tokenizer, PreTrainedTokenizerFast):
    raise ValueError(
        "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
        " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
        " this requirement"
    )

## 2. Processing training data
### 2.1 Define a tikenize function

In [None]:
def tokenize_function(examples, tokenizer, dataset,model=None):
    if dataset in ["sst2", "cola"]:

        return tokenizer(examples['sentence'], padding="max_length", truncation=True,return_tensors="pt")

    elif dataset == "mnli":
        return tokenizer(examples["premise"], examples["hypothesis"], padding="max_length", truncation=True,return_tensors="pt")
    elif dataset == "qqp":
        return tokenizer(examples["question1"], examples["question2"], padding="max_length", truncation=True,return_tensors="pt")
    elif dataset == "qnli":
        return tokenizer(examples["question"], examples["sentence"], padding="max_length", truncation=True,return_tensors="pt")

    elif dataset in ["mrpc", "stsb", "rte"]:
        return tokenizer(examples["sentence1"], examples["sentence2"], padding="max_length", truncation=True,return_tensors="pt")


### 2.2 Load the dataset

In [None]:
dataset_name = 'sst2'
dataset = load_dataset("glue", dataset_name)
train_dataset = dataset["train"]
test_dataset = dataset["validation"]

### 2.3 Tokenize the dataset

In [None]:
tokenized_train_dataset = train_dataset.map(lambda examples: tokenize_function(examples, tokenizer, dataset_name), batched=True)
tokenized_test_dataset = test_dataset.map(lambda examples: tokenize_function(examples, tokenizer, dataset_name), batched=True)
# logging.info("=====> train_dataset size: {}".format(len(tokenized_train_dataset)))
print("=====> train_dataset size: {}".format(len(tokenized_train_dataset)))

## 3. Start training the original BERT Model
### 3.1 Define training args

In [None]:
training_args = TrainingArguments(
    output_dir='./log_dir',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./log_dir',
    save_total_limit = 1,
    save_strategy = "no",
    evaluation_strategy="epoch",
    )

### 3.2 Define eval metrics

In [None]:
def compute_metrics(eval_pred, task):
    predictions, labels = eval_pred
    if task == "stsb":
        pearson_corr, _ = pearsonr(predictions.squeeze(), labels)
        return {"pearson_corr": pearson_corr}
    else:
    
        predictions = predictions.argmax(-1)
        return {"accuracy": accuracy_score(labels, predictions)}

### 3.2 Start training using trainer

In [None]:
trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_test_dataset,  # Pass tokenized_test_dataset instead of test_dataset
        compute_metrics=lambda eval_pred: compute_metrics(eval_pred, dataset_name),
        # kd_teacher_model = kd_teacher_model
    )


    #get the best model
    

trainer.train()