In [None]:
!pip install simpletransformers
!pip install openai==0.28
!pip install -q google-generativeai
from google.api_core.exceptions import TooManyRequests
import google.generativeai as genai
import openai


import os
import time
import pandas as pd
import re
import numpy as np
import sklearn
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


from simpletransformers.classification import ClassificationModel
import logging


#1.Configuration

In [54]:
class Config:
    def __init__(self):
        self.num_train_epochs = 4
        self.train_batch_size = 2
        self.max_seq_length = 400
        self.overwrite_output_dir = True

        self.use_multiprocessing=False
        self.use_multiprocessing_for_evaluation=False
        self.test_size=0.2
        self.num_class=2
        self.modelname=["roberta","mpnet","bert"]
        self.modelpath=["roberta-base","microsoft/mpnet-base","bert-base-cased"]
        self.modelnum=3

        self.max_tokens=300
        self.temperature=0.3

config = Config()

#2.Load models

In [12]:
openai.api_key="your-gpt-api-key"
def get_openAI_response(prompt,model="gpt-3.5-turbo"):
  messages=[{"role": "user","content": prompt}]
  response=openai.ChatCompletion.create(
      model=model,
      messages=messages,
      max_tokens=config.max_tokens,
      temperature=config.temperature)
  return response.choices[0].message["content"]

In [55]:
genai.configure(api_key="your-gemini-api-key")
def get_gemini_response(prompt, model_name="models/gemini-1.5-flash-latest",max_out_tokens=config.max_tokens, temperature=config.temperature):
    model = genai.GenerativeModel(model_name=model_name)
    response = model.generate_content(prompt)
    return response.text.strip()

#3.Key Functions

In [14]:
def run_irony_detection_dataset(df: pd.DataFrame, text_column: str, prompt_fn,get_reponse,max_rows: int = None):
    results = []
    for i, row in df.iterrows():
        if max_rows and i >= max_rows:
            break
        text = row[text_column]
        prompt = prompt_fn(text)
        try:
          output = get_reponse(prompt)
        except Exception as e:
            print(f"Error on row {i}: {e}")
            output = "Error"

        reasoning, rephrased, irony = parse_irony_response(output)

        results.append({
            "original": text,
            "reasoning": reasoning.strip(),
            "rephrased": rephrased.strip(),
            "irony_class": irony
        })

    return pd.DataFrame(results)

In [15]:
def parse_irony_response(output: str):
    lines = output.strip().split('\n')
    reasoning_lines = []
    rephrased_line = ""
    irony = None

    for line in lines:
        stripped = line.strip()

        # Try to detect JSON result
        if "irony" in stripped and "{" in stripped:
            try:
                json_part = stripped[stripped.index("{"):]
                irony_obj = json.loads(json_part)
                irony = irony_obj.get("irony")
            except Exception:
                continue

        # If we haven't reached JSON yet, collect reasoning
        elif irony is None:
            reasoning_lines.append(stripped)

        # If we already found JSON, assume previous line was rephrased
        elif irony is not None and not rephrased_line:
            rephrased_line = stripped  # capture once

    reasoning = "\n".join(reasoning_lines).strip()
    return reasoning, rephrased_line, irony

In [16]:
# Function to count syllables in a word
def count_syllables(word):
    word = word.lower()
    vowels = "aeiouy"
    syllable_count = 0
    if word[0] in vowels:
        syllable_count += 1
    for index in range(1, len(word)):
        if word[index] in vowels and word[index - 1] not in vowels:
            syllable_count += 1
    if word.endswith("e"):
        syllable_count -= 1
    if syllable_count == 0:
        syllable_count = 1
    return syllable_count

# Function to count words
def count_words(text):
    words = re.findall(r'\w+', text)
    return len(words)

# Function to count sentences
def count_sentences(text):
    sentences = re.split(r'[.!?]', text)
    sentences = [s for s in sentences if len(s.strip()) > 0]  # Filter out empty strings
    return len(sentences)

# Function to count syllables in the whole text
def count_total_syllables(text):
    words = re.findall(r'\w+', text)
    total_syllables = 0
    for word in words:
        total_syllables += count_syllables(word)
    return total_syllables

# Flesch Reading Ease formula
def flesch_reading_ease(text):
    total_words = count_words(text)
    total_sentences = count_sentences(text)
    total_syllables = count_total_syllables(text)

    if total_sentences == 0 or total_words == 0:  # To avoid division by zero
        return None

    # Flesch Reading Ease formula
    reading_ease = 206.835 - 1.015 * (total_words / total_sentences) - 84.6 * (total_syllables / total_words)
    return reading_ease

In [17]:
def get_sentence_embedding(sentence):
    # Tokenize the input sentence
    inputs = tokenizer(sentence, return_tensors='pt', truncation=True, padding=True)

    # Get the hidden states from BERT
    with torch.no_grad():
        outputs = model(**inputs)

    # Take the mean of the hidden states for the last layer
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings

def cosine_similarity(embedding1, embedding2):
    # Calculate cosine similarity
    similarity = torch.nn.functional.cosine_similarity(embedding1, embedding2, dim=0)
    return similarity.item()


In [None]:
def voter(resutls_def,resutls_process,resutls_probability,threshold)
  for i in range(len(results)):
    res=sum(resutls_def[i],resutls_process[i],resutls_probability[i])
  if res>=threshold:
    return 1
  else:
    return 0

#4.Load Datasets

In [None]:
irony_iSarcasm= pd.read_csv('./data/irony_iSarcasm.csv',sep='\t', encoding='utf-8')
irony_SemEval= pd.read_csv('./data/irony_SemEval.csv',sep='\t', encoding='utf-8')
irony_Reddit=pd.read_csv("./data/irony_Reddit=.csv", sep='\t', encoding='utf-8')
irony_Gen=pd.read_csv("./data/irony_Gen.csv", sep='\t', encoding='utf-8')
irony_HYP= pd.read_csv('./data/irony_HYP.csv',sep='\t', encoding='utf-8')
irony_RQ=pd.read_csv("./data/irony_RQ.csv", sep='\t', encoding='utf-8')

In [29]:
train_irony_iSarcasm, test_irony_iSarcasm = train_test_split(irony_iSarcasm, test_size=config.test_size)
train_irony_SemEval, test_irony_SemEval = train_test_split(irony_SemEval, test_size=config.test_size)
train_irony_Reddit, test_irony_Reddit = train_test_split(irony_Reddit, test_size=config.test_size)
train_irony_Gen, test_irony_Gen = train_test_split(irony_Gen, test_size=config.test_size)
train_irony_HYP, test_irony_HYP = train_test_split(irony_HYP, test_size=config.test_size)
train_irony_RQ, test_irony_RQ = train_test_split(irony_RQ, test_size=config.test_size)

#5.Base_Line

### 5.1 Supervised Learning

In [30]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [None]:
def train_and_evaluate(model_name, model_path, train_data, test_data, config):
    model = ClassificationModel(
        model_name,
        model_path,
        num_labels=config.num_class,
        args={
            'num_train_epochs': config.num_train_epochs,
            'train_batch_size': config.train_batch_size,
            'max_seq_length': config.max_seq_length,
            'overwrite_output_dir': config.overwrite_output_dir,
            "use_multiprocessing": config.use_multiprocessing,
            "use_multiprocessing_for_evaluation": config.use_multiprocessing_for_evaluation
        }
    )
    model.train_model(train_data)
    predictions, _ = model.predict(list(test_data.Text))
    print(f"-------------{model_name}-------------")
    print(classification_report(test_data['Labels'], predictions))

# Define datasets and names
datasets_supervised = {
    "iSarcasm": (train_irony_iSarcasm, test_irony_iSarcasm),
    "SemEval": (train_irony_SemEval, test_irony_SemEval),
    "Reddit": (train_irony_Reddit, test_irony_Reddit),
    "Gen": (train_irony_Gen, test_irony_Gen),
    "HYP": (train_irony_HYP, test_irony_HYP),
    "RQ": (train_irony_RQ, test_irony_RQ)
}

# Loop through combinations
for (train_data, test_data) in datasets_supervised.items():
    for i in range(config.modelnum):
        train_and_evaluate(
            config.modelname[i],
            config.modelpath[i],
            train_data,
            test_data,
            config
        )

##5.2 Zero-shot

In [34]:
datasets_supervised= {"iSarcasm": test_irony_iSarcasm,
    "SemEval":  test_irony_SemEval,
    "Reddit": test_irony_Reddit,
    "Gen": test_irony_Gen,
    "HYP": test_irony_HYP,
    "RQ": test_irony_RQ
    }

In [None]:
def COT_irony_prompt(text: str) -> str:
    return f"""Determine whether the following statement includes irony:
  "{text}"
  Steps to follow:
  1. Let’s think step by step.
  2. Please write the reason why you think this statement has irony.
  3. Please rephrase this statement without the irony on a new line.
  4. Return the result only in JSON format: {{ "irony": 1 }} or {{ "irony": 0 }}."""


for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=COT_irony_prompt, get_reponse=get_openAI_response)
    results.to_csv(f"{key}_results_COT_Gemini.csv", index=False)
for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=COT_irony_prompt, get_reponse=get_genmini_response)
    results.to_csv(f"{key}_results_COT_Gemini.csv", index=False)

In [None]:
def AUTO_COT_irony_prompt(text: str, examples: list[str]) -> str:
    example_text = "\n".join([f"{i+1}. {ex}" for i, ex in enumerate(examples)])
    return f"""Study the following samples without irony:
    sample1 :Simply having a wonderful christmas time :D
    sample2 :Corny jokes are my absolute favorite
    sample3:Thank god she doesn't get me to do this anymore!!!
    1.Let’s think step by step.
    2.Please write the reason why you think this statement has irony.
    3.Please rephrase this statement without the irony on a new line.
    4. Return the result only in JSON format: {{ "irony": 1 }} or {{ "irony": 0 }}.

    Statement:
    ```{text}```
    """
for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=AUTO_COT_irony_prompt, get_reponse=get_openAI_response)
    results.to_csv(f"{key}_results_AUTO_GPT.csv", index=False)

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=AUTO_COT_irony_prompt, get_reponse=get_genmini_response)
    results.to_csv(f"{key}_results_AUTO_Gemini.csv", index=False)

In [None]:
def APE_irony_prompt(text: str) -> str:
    return f"""Determine whether the following statement includes irony:
  "{text}"
  Steps to follow:
  1. Let’s work this out in a step-by-step way to be sure we have the right answer.
  2. Please write the reason why you think this statement has irony.
  3. Please rephrase this statement without the irony on a new line.
  4. Return the result only in JSON format: {{ "irony": 1 }} or {{ "irony": 0 }}."""
for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=APE_irony_prompt, get_reponse=get_openAI_response)
    results.to_csv(f"{key}_results_APE_GPT.csv", index=False)

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=APE_irony_prompt, get_reponse=get_genmini_response)
    results.to_csv(f"{key}_results_APE_Gemini.csv", index=False)

In [None]:
def PS_irony_prompt(text: str) -> str:
    return f"""Determine whether the following statement includes irony:
  "{text}"
  Steps to follow:
  1. Let’s first understand the problem and devise a plan to solve the problem.
  2. Let’s carry out the plan and solve the problem step by step.
  3. Please write the reason why you think this statement has irony.
  4. Please rephrase this statement without the irony on a new line.
  5. Return the result only in JSON format: {{ "irony": 1 }} or {{ "irony": 0 }}."""

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=PS_irony_prompt, get_reponse=get_openAI_response)
    results.to_csv(f"{key}_results_PS_GPT.csv", index=False)

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=PS_irony_prompt, get_reponse=get_genmini_response)
    results.to_csv(f"{key}_results_PS_Gemini.csv", index=False)

In [None]:
def PS_plus_irony_prompt(text: str) -> str:
    return f"""Determine whether the following statement includes irony:
  "{text}"
  Steps to follow:
  1.Let’s first understand the problem and check if contains a discrepancy between what is said and what is meant.
  2.let’s carry out the plan and pay attention to finding ironic words or phases.
  3.solve the problem step by step
  4.Please write the reason why you think this statement has irony.
  5.Please rephrase this statement without the irony on a new line.
  6.Return the result only in JSON format: {{ "irony": 1 }} or {{ "irony": 0 }}."""

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=PS_plus_irony_prompt, get_reponse=get_openAI_response)
    results.to_csv(f"{key}_results_PLUS_GPT.csv", index=False)

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=PS_plus_irony_prompt,  get_reponse=get_genmini_response)
    results.to_csv(f"{key}_results_PLUS_Gemini.csv", index=False)

#6.IDADP

In [None]:
def IDADP_irony_prompt_process(text: str) -> str:
    return f"""Determine whether the following statement includes irony:
  "{text}"
  Steps to follow:
  1. Let’s think step by step.
  2. The text is not ironic if the statement does not contain a discrepancy between what is said and what is meant.
  3. The text is not ironic if There is no unexpected outcome or contrast between expectation and reality presented in the statement.
  4. Please write the reason why you think this statement has irony.
  5. Please rephrase this statement without the irony on a new line.
  6. Return the result only in JSON format: {{ "irony": 1 }} or {{ "irony": 0 }}."""

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=IDADP_irony_prompt_process, get_reponse=get_openAI_response)
    results.to_csv(f"{key}_IDADP_process_GPT.csv", index=False)

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=IDADP_irony_prompt_process, get_reponse=get_genmini_response)
    results.to_csv(f"{key}_IDADP_process_Gemini.csv", index=False)

In [None]:
def IDADP_irony_prompt_definition(text: str) -> str:
    return f"""Determine whether the following statement includes irony:
  "{text}"
  Steps to follow:
  1. Let’s think step by step.
  2. Identify the irony: Determine which part of the sentence conveys the opposite of what is meant.
  3. Clarify the intent: Express the actual meaning directly.
  4. Please write the reason why you think this statement has irony.
  5. Please rephrase this statement without the irony on a new line.
  6. Return the result only in JSON format: {{ "irony": 1 }} or {{ "irony": 0 }}."""

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=IDADP_irony_prompt_definition, get_openAI_response)
    results.to_csv(f"{key}_IDADP_definition_GPT.csv", index=False)

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=IDADP_irony_prompt_definition, get_genmini_response)
    results.to_csv(f"{key}_IDADP_definition_Gemini.csv", index=False)

In [None]:
def IDADP_irony_prompt_probability(text: str) -> str:
    return f"""Determine whether the following statement includes irony:
  "{text}"
  Steps to follow:
  1. Let’s think step by step.
  2. Please provide a probabilistic score ranging from 0 to 1,representing the likelihood that the text is ironic.
  3. The threshold for irony detection is set to 0.7.
  4. Please write the reason why you think this statement has irony.
  5. Please rephrase this statement without the irony on a new line.
  6. Return the result only in JSON format: {{ "irony": 1 }} or {{ "irony": 0 }}."""

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=IDADP_irony_prompt_probability, get_reponse=get_openAI_response)
    results.to_csv(f"{key}_IDADP_probability_GPT.csv", index=False)

for key, value in datasets_supervised.items():
    results = run_irony_detection_dataset(df=value, text_column="Text", prompt_fn=IDADP_irony_prompt_probability, get_reponse=get_genmini_response)
    results.to_csv(f"{key}_IDADP_probability_Gemini.csv", index=False)