In [None]:
!pip install afinn
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from imblearn.under_sampling import RandomUnderSampler
from afinn import Afinn
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import spacy


Collecting afinn
  Downloading afinn-0.1.tar.gz (52 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: afinn
  Building wheel for afinn (setup.py) ... [?25l[?25hdone
  Created wheel for afinn: filename=afinn-0.1-py3-none-any.whl size=53429 sha256=289a89eb716e613972771b539a364886c9bede229e3215df22f86195669b32ec
  Stored in directory: /root/.cache/pip/wheels/b0/05/90/43f79196199a138fb486902fceca30a2d1b5228e6d2db8eb90
Successfully built afinn
Installing collected packages: afinn
Successfully installed afinn-0.1


In [None]:
# Load the dataset and initialize the sentiment analyzer
data = pd.read_csv("deadpan_sarcasm.csv")

In [None]:
afinn = Afinn()
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()
nlp = spacy.load("en_core_web_sm")


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [None]:
# Define functions to count positive and negative words
def count_pos_words(text):
    words = text.split()
    pos_count = sum([afinn.score(word) > 0 for word in words])
    return pos_count

def count_neg_words(text):
    words = text.split()
    neg_count = sum([afinn.score(word) < 0 for word in words])
    return neg_count

# Add positive and negative word count features
data['positive_word_count'] = data['headline'].apply(count_pos_words)
data['negative_word_count'] = data['headline'].apply(count_neg_words)

In [None]:
# Define function to calculate sentiment score
def get_sentiment_score(text):
    sentiment_score = sia.polarity_scores(text)["compound"]
    return sentiment_score

# Calculate sentiment score
data['sentiment_score'] = data['headline'].apply(get_sentiment_score)

In [None]:
# Separate the features (headlines) and labels (sarcasm types) including the additional features
X = data[['headline', 'positive_word_count', 'negative_word_count', 'sentiment_score']]
y = data['ChatGPT_Alltypes']

In [None]:
# Map the sarcasm types to a binary label (1 for deadpan, 0 for others)
y = y.map(lambda x: 1 if x == 'Deadpan' else 0)


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Create TF-IDF vectorizer to convert text into numerical features
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train['headline'])
X_test_vec = vectorizer.transform(X_test['headline'])

In [None]:
# Convert the NumPy array into a DataFrame
X_train_array = pd.DataFrame(X_train_vec.toarray())
X_test_array = pd.DataFrame(X_test_vec.toarray())

In [None]:
# Concatenate the additional features with the TF-IDF features
X_train_features = pd.concat([X_train_array, X_train.drop('headline', axis=1)], axis=1)
X_test_features = pd.concat([X_test_array, X_test.drop('headline', axis=1)], axis=1)

In [None]:
# Convert column names to strings
X_train_features.columns = X_train_features.columns.astype(str)
X_test_features.columns = X_test_features.columns.astype(str)

In [None]:
# Create an instance of RandomUnderSampler
rus = RandomUnderSampler(random_state=42)
X_train_resampled, y_train_resampled = rus.fit_resample(X_train_features, y_train)
X_test_resampled, y_test_resampled = rus.fit_resample(X_test_features, y_test)


In [None]:
# Create a pipeline with an imputer and logistic regression
pipeline = make_pipeline(SimpleImputer(strategy='mean'), LogisticRegression())

# Fit the model using the pipeline
pipeline.fit(X_train_resampled, y_train_resampled)

In [None]:
# Predict using the pipeline
y_pred = pipeline.predict(X_test_resampled)

# Evaluate the model
print(classification_report(y_test_resampled, y_pred))

In [None]:
# Create a pipeline with an imputer and Support Vector Classification (SVM)
pipeline = make_pipeline(SimpleImputer(strategy='mean'), SVC(probability=True))

# Fit the model using the pipeline
pipeline.fit(X_train_resampled, y_train_resampled)


In [None]:
# Create a pipeline with an imputer and Random Forest Classifier
pipeline = make_pipeline(SimpleImputer(strategy='mean'), RandomForestClassifier())

# Fit the model using the pipeline
pipeline.fit(X_train_resampled, y_train_resampled)