In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
df = pd.read_csv('https://github.com/clairett/pytorch-sentiment-classification/raw/master/data/SST2/train.tsv', delimiter='\t', header=None)

In [3]:
labels = df[1].values

In [4]:
labels.shape

(6920,)

In [5]:
texts = df[0].values.tolist()

In [6]:
from transformers import AutoTokenizer, TFGPT2ForSequenceClassification

In [7]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialogRPT-updown")
model = TFGPT2ForSequenceClassification.from_pretrained("microsoft/DialogRPT-updown", from_pt=True)

All PyTorch model weights were used when initializing TFGPT2ForSequenceClassification.

All the weights of TFGPT2ForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2ForSequenceClassification for predictions without further training.


In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=0)

In [9]:
X_train_tokenized = tokenizer(X_train, return_tensors="np", max_length=30, padding='max_length', truncation=True)
X_test_tokenized = tokenizer(X_test, return_tensors="np", max_length=30, padding='max_length', truncation=True)

In [10]:
optimizer = tf.keras.optimizers.Adam(2e-5)
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

In [11]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=1)
checkpoint_filepath = "./checkpoints/checkpoint_gpt2"
mc = ModelCheckpoint(checkpoint_filepath, monitor='val_loss', mode='min', 
                     save_best_only=True, save_weights_only=True)

In [12]:
model.fit(dict(X_train_tokenized), y_train, epochs=10, batch_size=128, validation_split=0.1, callbacks=[es, mc])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 5: early stopping


<keras.callbacks.History at 0x1319b02f430>

In [13]:
y_preds = model.predict(dict(X_test_tokenized))



In [14]:
y_preds.logits

array([[-4.298037 ],
       [-5.6617866],
       [ 3.0735478],
       ...,
       [ 6.48752  ],
       [ 6.3204837],
       [ 4.5786104]], dtype=float32)

In [15]:
prediction_probs=tf.nn.sigmoid(y_preds.logits).numpy()

In [16]:
y_predictions = [1 if x > 0.5 else 0 for x in prediction_probs ]

In [17]:
from sklearn.metrics import classification_report
print(classification_report(y_predictions, y_test))

              precision    recall  f1-score   support

           0       0.89      0.87      0.88       655
           1       0.89      0.90      0.89       729

    accuracy                           0.89      1384
   macro avg       0.89      0.89      0.89      1384
weighted avg       0.89      0.89      0.89      1384

