In [1]:
from google.colab import files

uploaded = files.upload()

uploaded.keys()

Saving stance_examples1.csv to stance_examples1.csv


dict_keys(['stance_examples1.csv'])

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch, numpy as np, pandas as pd
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

df = pd.read_csv('stance_examples1.csv', encoding='latin1')
df['label'] = df['stance'].map({'con':0, 'pro':1})
df['text'] = df.apply(lambda r: f"[TOPIC] {r['topic']} [SEP] {r['sentence']}", axis=1)

# Make sure to adjust this path to where your 'stance-distilbert' model is located in your Google Drive.
model_path = '/content/drive/MyDrive/stance-distilbert'

tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True)
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [14]:
id2label = {0:'con', 1:'pro'}

def classify_stance(sentence:str, topic:str):
    text = f"[TOPIC] {topic} [SEP] {sentence}"
    enc = tokenizer(text, truncation=True, padding=True, max_length=256, return_tensors='pt')
    with torch.no_grad():
        logits = model(**enc).logits
        probs = torch.softmax(logits, dim=-1).squeeze().tolist()
        pred = int(torch.argmax(logits, dim=-1))
    return {'stance': id2label[pred], 'proba_con': round(probs[0],3), 'proba_pro': round(probs[1],3)}

# Example
topic = "We should ban gene editing."
sentence = "Gene editing can bring ethical problems."
print(classify_stance(sentence, topic))

topic = "We should ban gene editing."
sentence = "Gene editing can be exploited by the government."
print(classify_stance(sentence, topic))

topic = "We should ban gene editing."
sentence = "Gene editing can help enhancement of human health."
print(classify_stance(sentence, topic))

topic = "Space exploration is more important than sea exploration."
sentence = "We can find useful resources such as minerals or bacteria while exploring the sea."
print(classify_stance(sentence, topic))

topic = "Animal testing shouldn't be banned."
sentence = "We should appreciate animal rights."
print(classify_stance(sentence, topic))

{'stance': 'pro', 'proba_con': 0.468, 'proba_pro': 0.532}
{'stance': 'pro', 'proba_con': 0.492, 'proba_pro': 0.508}
{'stance': 'pro', 'proba_con': 0.474, 'proba_pro': 0.526}
{'stance': 'con', 'proba_con': 0.509, 'proba_pro': 0.491}
{'stance': 'pro', 'proba_con': 0.484, 'proba_pro': 0.516}
