In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!cp /content/gdrive/bertfunss.py .

In [None]:
!pip install transformers

In [None]:
import bertfunss
from transformers import BertForSequenceClassification, BertTokenizer
import pandas as pd
import torch
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
#get the gpu device
device_name = tf.test.gpu_device_name()
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

In [None]:
#see if the gpu is available
if torch.cuda.is_available():    
    #tell pytorch to use the gpu    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [None]:
#load the training data
df = pd.read_csv('pred_data.csv')
df.sample(10)

In [None]:
#split the data
X_train, X_test, y_train, y_test = train_test_split(df.sen_coding.values, df.votes.values, test_size=0.2, random_state=42)

In [None]:
sentences = X_train
labels = y_train
#load the model
modelpath = "GroNLP/bert-base-dutch-cased"
tokenizer,train_dataloader,validation_dataloader = bertfunss.dataprep(modelpath, sentences, labels, 16)

In [None]:
#specify different parameters 
num_labels = 3
epochs = 2
gpu = True
lrr = 5e-5
epss = 1e-8
#model training
model, training_stats = bertfunss.trainingloop(modelpath, num_labels, gpu, device, epochs, lrr, epss, train_dataloader, validation_dataloader)

In [None]:
#check training stats
df_sub_stats = bertfunss.stats(training_stats)
df_sub_stats

In [None]:
#save the model
output_dir = 'model_saved'
bertfunss.savemodel(tokenizer, model, output_dir)

In [None]:
#load the model
model = BertForSequenceClassification.from_pretrained('model_saved')
tokenizer = BertTokenizer.from_pretrained('model_saved')
model.to(device)

In [None]:
#check out the performance
sentencess = X_test
labelss = y_test
y_true, y_pred = bertfunss.modelinuse(sentencess, labelss, tokenizer, model, device)