# Installs and Imports

In [3]:
!pip install transformers
!pip install torch torchvision torchaudio
!pip install stanza
!pip install negate==1.1.3

Collecting stanza
  Downloading stanza-1.9.2-py3-none-any.whl.metadata (13 kB)
Collecting emoji (from stanza)
  Downloading emoji-2.14.0-py3-none-any.whl.metadata (5.7 kB)
Downloading stanza-1.9.2-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading emoji-2.14.0-py3-none-any.whl (586 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m586.9/586.9 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji, stanza
Successfully installed emoji-2.14.0 stanza-1.9.2
Collecting negate==1.1.3
  Downloading negate-1.1.3-py3-none-any.whl.metadata (22 kB)
Collecting lemminflect<0.3.0,>=0.2.3 (from negate==1.1.3)
  Downloading lemminflect-0.2.3-py3-none-any.whl.metadata (7.0 kB)
Downloading negate-1.1.3-py3-none-any.whl (23 kB)
Downloading lemminflect-0.2.3-py3-none-any.whl (769 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m769

In [4]:
import numpy as np
import pandas as pd

from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
import stanza
from negate import Negator

from sklearn.metrics import precision_score, recall_score, f1_score

# Load Data and Models

## Tweet Data

In [9]:
file_name = "exp2.csv"
df_exp2 = pd.read_csv(file_name)

In [12]:
file_name = "exp1_no_subject.csv"
df_exp1 = pd.read_csv(file_name)

In [6]:
df

Unnamed: 0,ID,original_ID,tweet,sarcastic,rephrase
0,6440,sign_6941,i looove getting 3 hours of sleep because two ...,1,i hate getting 3 hours of sleep because two jobs
1,12874,sign_12479,i hate people who use big words just to make t...,1,i hate people who use big words just to make t...
2,4295,sign_12125,i love that girl who never liked even my dp,1,i love that girl who never liked even my dp
3,4147,train_2222,I hate that I wasted my whole weekend 😠,0,
4,11437,sign_5017,i love being ignored,1,i hate being ignored
...,...,...,...,...,...
126,5211,train_682,i love 6 hour panic attacks,1,I don't like having 6-hour panic attacks.
127,19299,train_2654,I LOVE LORDE GOODNIGHT,0,
128,7159,train_2825,i LOVE grocery shopping,0,
129,13743,train_2657,i love video games,0,


In [13]:
df_exp1

Unnamed: 0,ID,original_ID,tweet,sarcastic,rephrase
0,6440,sign_6941,looove getting 3 hours of sleep because two jobs,1,hate getting 3 hours of sleep because two jobs
1,12874,sign_12479,hate people who use big words just to make the...,1,hate people who use big words just to make the...
2,4295,sign_12125,love that girl who never liked even my dp,1,love that girl who never liked even my dp
3,4147,train_2222,Hate that I wasted my whole weekend 😠,0,
4,11437,sign_5017,love being ignored,1,hate being ignored
...,...,...,...,...,...
119,5211,train_682,love 6 hour panic attacks,1,I don't like having 6-hour panic attacks.
120,19299,train_2654,I LOVE LORDE GOODNIGHT,0,
121,7159,train_2825,i LOVE grocery shopping,0,
122,13743,train_2657,love video games,0,


## Sentiment Classifier

In [14]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

def classify_sentiment(text) :
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    # [ negative, neutral, positive ]
    return scores

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Constituency Parser

In [None]:
stanza.download('en')
nlp = stanza.Pipeline('en')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json:   0%|   …

INFO:stanza:Downloaded file to /root/stanza_resources/resources.json
INFO:stanza:Downloading default packages for language: en (English) ...


Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.9.0/models/default.zip:   0%|          | 0…

INFO:stanza:Downloaded file to /root/stanza_resources/en/default.zip
INFO:stanza:Finished downloading models and saved to /root/stanza_resources
INFO:stanza:Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json:   0%|   …

INFO:stanza:Downloaded file to /root/stanza_resources/resources.json
INFO:stanza:Loading these models for language: en (English):
| Processor    | Package                   |
--------------------------------------------
| tokenize     | combined                  |
| mwt          | combined                  |
| pos          | combined_charlm           |
| lemma        | combined_nocharlm         |
| constituency | ptb3-revised_charlm       |
| depparse     | combined_charlm           |
| sentiment    | sstplus_charlm            |
| ner          | ontonotes-ww-multi_charlm |

INFO:stanza:Using device: cpu
INFO:stanza:Loading: tokenize
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: mwt
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: pos
  checkpoint = torch.load(filename, lambda storage, loc: storage)
  data = torch.load(self.filename, lambda storage, loc: storage)
  state = torch.load(filename, lambda storage,

In [None]:
def width(t) :
  wds = str(t).replace(")", "").split(" ")
  filt = [wd for wd in wds if '(' not in wd]
  return len( filt )

def add_spans(t, tks) :

  if t.is_preterminal() :
    tk = tks[0]
    t.span = ( tk.start_char , tk.end_char )
    t.tk_info = tk
  else :
    start, end = 0, 0
    for child in t.children :
      w = width(child)
      end = end + w
      add_spans(child, tks[start:end])
      start = end
    t.span = ( t.children[0].span[0] , t.children[-1].span[-1] )


def parse_tree(t):
  doc = nlp(t)
  tree = doc.sentences[0].constituency
  tks_list = []
  for item in doc.sentences[0].tokens :
      tks_list += item.words
  add_spans(tree, tks_list)
  return tree

In [None]:
def membership_check(tree, label):

  if tree.is_preterminal():
    return(tree.label == label)

  if tree.label == label :
    return True

  children = tree.children

  for el in children:

    if (membership_check(el, label)):
      return True

  return False
  # if there is no VP, return

def extract_VP(row):
  # the input will be tree.children

  if not isinstance(row, list):
    if not (membership_check(row, 'VP')):
      return None
    row = [row]

  for idx, el in enumerate(row):
    if el.label == 'VP':
      spc = None
      if idx > 0 and 'ADV' in row[idx - 1].label:
        spc = row[idx - 1]
      return {"VP" : el, "specifier": spc}

  l = []

  for el in row:
    l.extend(el.children)

  return extract_VP(l)



In [None]:
def parse_VP(tree):

  if tree.is_preterminal():

    if 'VB' in tree.label or 'MD' in tree.label or 'RB' in tree.label:
      return [tree], []
    return [], [tree]

  if not tree.label.startswith('V'):
    return [], [tree]

  vbs_list = []
  consts_list = []
  for child in tree.children:
    vbs, consts = parse_VP(child)
    vbs_list += vbs
    consts_list += consts

  return vbs_list, consts_list

In [None]:
def get_text(full, span) :
  if not span :
    return None
  return full[span[0] : span[1]]

def parse_tweet(tw, display=False) :
  tree = parse_tree(tw)

  if display :
    print(tw)
    print(tree)

  res = extract_VP(tree)
  if res :
    spc = res['specifier']
    vbs, consts = parse_VP(res["VP"])

    if 'be' in [vb.tk_info.lemma.lower() for vb in vbs] :
      if display :
        print("Special case : be")
      n_consts = []
      for c in consts :
        if 'ADJ' in c.label :
          vbs.append(c)
        else :
          n_consts.append(c)
      consts = n_consts

    spc_span = get_text(tw, spc.span) if spc else None
    if display :
      print(f"specifier : { spc_span}")
      print(f"verbs : {get_text(tw,  ( vbs[0].span[0], vbs[-1].span[-1] ) )}")
      print("constituents : ")
    for c in consts :
      if display :
        print( get_text(tw, c.span) )
  try :
    return {"text" : tw, "specifier" : spc.span if spc else None, "constituents" : [c.span for c in consts], "verb" : ( vbs[0].span[0], vbs[-1].span[-1] ) }
  except :
    return {"text" : tw, "specifier" : None, "constituents" : None, "verb" : None}


## Sentence Negator

In [None]:
negator = Negator()

# Brute-Force Algorithm for Threshold

In [None]:
def sentiment_distance(tweet, verb, noun_phrase):
  """Calculates the sentiment distance (euclidic distance) between the sentiment scores of the V and NP of a given tweet."""

  tweet_sentiment = classify_sentiment(tweet)
  tweet_no_v_sentiment = classify_sentiment(tweet.replace(verb, "")) ##TO DO: change (ask Samba)
  tweet_no_np_sentiment = classify_sentiment(tweet.replace(noun_phrase, "")) ##TO DO: change (ask Samba)
  v_sentiment = tweet_sentiment - tweet_no_v_sentiment
  np_sentiment = tweet_sentiment - tweet_no_np_sentiment

  return np.linalg.norm(v_sentiment - np_sentiment)

def is_sarc(sentiment_dist, threshold):
  """Returns True if the sentiment distance is greater than a given threshold and False otherwise."""

  return sentiment_dist > threshold

In [None]:
import re

def define_threshold(tweets, gold_annotations, threshold_list):
  """
  Creates a list of tuples containing 1) the tweet body, 2) the sarc/non-sarc gold annotation, 3) the sentiment distance between V and NP.
  Then, iterates over a list of thresholds and for each threshold calculates the accuracy between the gold annotations and the predicted values.
  Returns a dictionary of thresholds and accuracies for those thresholds.
  """

  tweet_annotation_sentiment_distance = []
  for tweet, annotation in zip(tweets, gold_annotations):
    """
    res = parse_tweet(tweet)
    if res["verb"] :
      verb_span = (res["specifier"][0], res["verb"][-1]) if res["specifier"] else res["verb"]
      comp_span = ( min([x[0] for x in res["constituents"]]), max([x[0] for x in res["constituents"]])  )
      tweet_annotation_sentiment_distance.append( (tweet, annotation, sentiment_distance(tweet, get_text(tweet, verb_span), get_text(tweet, comp_span) ) ) )
    else :
    """
    tweet_split = tweet.split()
    verb_idx = min([i for i,x in enumerate(tweet_split) if x.lower().startswith("lov") or x.lower().startswith("hat") ])
    if tweet_split[verb_idx-1] == "gotta":
      verb = " ".join(tweet_split[verb_idx-1:verb_idx+1])
    else:
      verb = tweet_split[verb_idx]
    noun_phrase = " ".join(tweet_split[verb_idx + 1:])
    tweet_annotation_sentiment_distance.append((tweet, annotation, sentiment_distance(tweet, verb, noun_phrase)))

  # scaling of the data
  sentiment_max = max([x[2] for x in tweet_annotation_sentiment_distance])
  print(sentiment_max)
  tweet_annotation_sentiment_distance = [(t,a, sentiment_dist/sentiment_max) for t, a, sentiment_dist in tweet_annotation_sentiment_distance]
  print(tweet_annotation_sentiment_distance)

  threshold_accuracy_dict = {}
  for threshold in threshold_list:
    correct = 0
    for _, annotation, sentiment_dist in tweet_annotation_sentiment_distance:
      if is_sarc(sentiment_dist, threshold) == bool(annotation):
        correct += 1

    threshold_accuracy_dict[threshold] = correct / len(tweet_annotation_sentiment_distance)

  return threshold_accuracy_dict, tweet_annotation_sentiment_distance

In [None]:
gap = 0.001
threshold_list = list(np.arange(0, 1 + gap, gap))

threshold_accuracy_dict, tweet_annotation_sentiment_distance = define_threshold(df["tweet"], df["sarcastic"], threshold_list)
best_threshold = max(threshold_accuracy_dict, key=threshold_accuracy_dict.get)
print("Best threshold:", best_threshold)
print("Accuracy:", threshold_accuracy_dict[best_threshold])

5.84252
[('Loving the representation from South Yorkshire 🥰🥰 #GodsOwn #England #ENG #bbcfootball #bbc', 0, 0.1288492), ('Hate this site [CHIRPBIRDICON]', 1, 0.592664), ('Loving the fact Carlton Cole knows longelos mum 🤣🤣🤣', 0, 0.66059244), ('loving late night twittering immediately being thrown into online political conversation by people who entered my life by jokes', 1, 0.7066286), ('Hate people who moan about EVERY SINGLE THING possible 😴👊🏼', 0, 0.23807698), ('love it when the hawks choke', 1, 0.6604515), ('Love it wen people try and stop my friends seeing me! Looooolll.', 1, 0.6722754), ('love to see and hear people wildly speculate about crashes on cable news', 1, 0.62219924), ('Love a good cry 👌🏻', 1, 0.2540908), ('love my fans xoxo', 1, 0.3239348), ("um love how the bernie vs hillary snap story's commercial is the purge makeamericapureagain 1 stworldproblems", 1, 0.80019104), ('love how a lot of ppl quit clash for agar io', 1, 0.69160795), ('gotta love yahoo and their dumb adver

In [None]:
threshold_accuracy_dict

{0.0: 0.8679245283018868,
 0.001: 0.8679245283018868,
 0.002: 0.8679245283018868,
 0.003: 0.8679245283018868,
 0.004: 0.8679245283018868,
 0.005: 0.8679245283018868,
 0.006: 0.8679245283018868,
 0.007: 0.8679245283018868,
 0.008: 0.8679245283018868,
 0.009000000000000001: 0.8679245283018868,
 0.01: 0.8679245283018868,
 0.011: 0.8679245283018868,
 0.012: 0.8679245283018868,
 0.013000000000000001: 0.8679245283018868,
 0.014: 0.8679245283018868,
 0.015: 0.8679245283018868,
 0.016: 0.8679245283018868,
 0.017: 0.8679245283018868,
 0.018000000000000002: 0.8679245283018868,
 0.019: 0.8679245283018868,
 0.02: 0.8679245283018868,
 0.021: 0.8679245283018868,
 0.022: 0.8679245283018868,
 0.023: 0.8679245283018868,
 0.024: 0.8679245283018868,
 0.025: 0.8679245283018868,
 0.026000000000000002: 0.8679245283018868,
 0.027: 0.8679245283018868,
 0.028: 0.8679245283018868,
 0.029: 0.8679245283018868,
 0.03: 0.8679245283018868,
 0.031: 0.8679245283018868,
 0.032: 0.8679245283018868,
 0.033: 0.86792452830

In [None]:
df['prediction'] = None
df['correct'] = None
for i in range(df.shape[0]):
  df.at[i, 'prediction'] = int(is_sarc(tweet_annotation_sentiment_distance[i][2], best_threshold))
  df.at[i, 'correct'] = int(df.at[i, 'prediction'] == df.at[i, 'sarcastic'])

In [None]:
df

Unnamed: 0,ID,original_ID,tweet,sarcastic,rephrase,prediction,correct
0,12607,train_1710,Loving the representation from South Yorkshire...,0,,1,0
1,4790,train_255,Hate this site [CHIRPBIRDICON],1,I love this site,1,1
2,11657,test_a_93,Loving the fact Carlton Cole knows longelos mu...,0,,1,0
3,3635,sign_10841,loving late night twittering immediately being...,1,i really shouldn't tweet late at night cause m...,1,1
4,12455,train_2607,Hate people who moan about EVERY SINGLE THING ...,0,,1,0
5,14973,sign_2261,love it when the hawks choke,1,hate it when the hawks choke,1,1
6,9609,train_404,Love it wen people try and stop my friends see...,1,"Non sarcastically I would say ""I hate it when ...",1,1
7,15449,sign_5263,love to see and hear people wildly speculate a...,1,love to see and hear people wildly speculate a...,1,1
8,16512,train_364,Love a good cry 👌🏻,1,"I would say ""I hate when someone makes me cry""",1,1
9,15442,sign_10897,love my fans xoxo,1,literally hate my fans xoxo,1,1


In [None]:
from scipy.stats import binomtest

result = binomtest(sum(df['correct']), df.shape[0], np.mean(df['sarcastic']), alternative='greater')

print(f"p-value: {result.pvalue}")
print(f"Test statistic: {result.statistic}")
print(result)

p-value: 0.5989630788216558
Test statistic: 0.8679245283018868
BinomTestResult(k=46, n=53, alternative='greater', statistic=0.8679245283018868, pvalue=0.5989630788216558)


In [None]:
df.at[df.shape[0]+1, 'correct'] = np.mean(df['correct'])
df.at[df.shape[0], 'sarcastic'] = np.mean(df['sarcastic'])
df.at[df.shape[0]+1, 'correct'] = f"P-value: {round(result.pvalue, 3)}"

In [None]:
df

Unnamed: 0,ID,original_ID,tweet,sarcastic,rephrase,prediction,correct
0,12607.0,train_1710,Loving the representation from South Yorkshire...,0.0,,1.0,0
1,4790.0,train_255,Hate this site [CHIRPBIRDICON],1.0,I love this site,1.0,1
2,11657.0,test_a_93,Loving the fact Carlton Cole knows longelos mu...,0.0,,1.0,0
3,3635.0,sign_10841,loving late night twittering immediately being...,1.0,i really shouldn't tweet late at night cause m...,1.0,1
4,12455.0,train_2607,Hate people who moan about EVERY SINGLE THING ...,0.0,,1.0,0
5,14973.0,sign_2261,love it when the hawks choke,1.0,hate it when the hawks choke,1.0,1
6,9609.0,train_404,Love it wen people try and stop my friends see...,1.0,"Non sarcastically I would say ""I hate it when ...",1.0,1
7,15449.0,sign_5263,love to see and hear people wildly speculate a...,1.0,love to see and hear people wildly speculate a...,1.0,1
8,16512.0,train_364,Love a good cry 👌🏻,1.0,"I would say ""I hate when someone makes me cry""",1.0,1
9,15442.0,sign_10897,love my fans xoxo,1.0,literally hate my fans xoxo,1.0,1


In [None]:
df.to_csv('exp2_results_27112024.csv', index=False)

# Rule-based Sarcasm Interpreter

In [None]:
#Final sarcasm classification function for when we have established the best threshold
def interpret_sarcasm(tweet, verb, noun_phrase, threshold=0.528):
    tweet_sentiment = classify_sentiment(tweet)
    tweet_no_v_sentiment = classify_sentiment(tweet.replace(verb, ""))
    tweet_no_np_sentiment = classify_sentiment(tweet.replace(noun_phrase, ""))
    v_sentiment = tweet_sentiment - tweet_no_v_sentiment
    np_sentiment = tweet_sentiment - tweet_no_np_sentiment

    if np.linalg.norm(v_sentiment - np_sentiment)/6.3885164 > threshold:
      return negator.negate_sentence(tweet)
    else:
      return "The tweet is not sarcastic."

In [None]:
#TEST!!!!!!!!!!!!!!!!!!!!!!!!!!
#Final sarcasm classification function for when we have established the best threshold
def interpret_sarcasm(tweet, verb, noun_phrase, threshold=0.528):
    tweet_sentiment = classify_sentiment(tweet)
    tweet_no_v_sentiment = classify_sentiment(tweet.replace(verb, ""))
    tweet_no_np_sentiment = classify_sentiment(tweet.replace(noun_phrase, ""))
    v_sentiment = tweet_sentiment - tweet_no_v_sentiment
    np_sentiment = tweet_sentiment - tweet_no_np_sentiment

    if np.linalg.norm(v_sentiment - np_sentiment)/6.3885164 > threshold:
      tweet_no_np = tweet.replace(noun_phrase, "")
      negated_part = negator.negate_sentence(tweet_no_np)
      return negated_part + " " + noun_phrase
    else:
      return "The tweet is not sarcastic."