In [1]:
import pandas as pd
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

RANDOM_STATE = 50

In [27]:
TEXT_EMBEDDINGS = 'BERT' # {'BERT', 'GLOVE'}

In [28]:
if TEXT_EMBEDDINGS == 'BERT':
    textEmbeddingFile = '../bert embeddings/sarcasm_data_embeddings.npy'
elif TEXT_EMBEDDINGS == 'GLOVE':
    textEmbeddingFile = '../glove embeddings/glove_mustard.npy'
else:
    raise Exception("Invalid TEXT_EMBEDDINGS")
    

In [19]:
import pickle
import numpy as np

with open('../video_features/resnet_features.pkl', 'rb') as f:
    video_features_dict = pickle.load(f)

for k in video_features_dict:
    video_features_dict[k] = np.mean(video_features_dict[k], axis=0)

video_features_dict[list(video_features_dict.keys())[0]].shape

(2048,)

In [20]:
data = pd.read_csv('../datasets/mustard_dataset/sarcasm_with_id.csv')
video_features = []

ids = list(data['id'])

for i in ids:
    if i[-2:] == "_1":
        video_features.append(video_features_dict[i[:-2]])

video_features = np.array(video_features)
video_features.shape

(690, 2048)

In [29]:
data = pd.read_csv('../datasets/mustard_dataset/sarcasm_data.csv').dropna(axis=0,how='any')

features = data['text'].to_list()
labels = data['sarcasm'].to_list()

text_features = np.load(textEmbeddingFile)
text_features.shape

(690, 768)

In [30]:
final_features = np.concatenate((video_features, text_features), axis=1)
final_features.shape

(690, 2816)

In [23]:
def svm_train(features,labels):
    clf = make_pipeline(
        StandardScaler(),
        svm.SVC(C=15.0, gamma="scale", kernel="rbf")
    )
    return clf.fit(features, labels)


def svm_test(clf,features,labels):
    pred = clf.predict(features)
    true = labels

    result_string = classification_report(true, pred, digits=3, output_dict=True)
    # print(confusion_matrix(true, pred))
    # print(result_string)
    return result_string

In [24]:
from sklearn.model_selection import StratifiedKFold

def kFoldResults(features, n_splits=5):

    kf = StratifiedKFold(n_splits, shuffle=True, random_state=RANDOM_STATE)

    results = []

    for train_index, test_index in kf.split(features, labels):
        train_x = [features[index] for index in train_index]
        train_y = [labels[index] for index in train_index]
        test_x = [features[index] for index in test_index]
        test_y = [labels[index] for index in test_index]
        clf = svm_train(train_x,train_y)
        resString = svm_test(clf,test_x,test_y)
        results.append([
            resString['macro avg']['precision'],
            resString['macro avg']['recall'],
            resString['macro avg']['f1-score'],
            resString['accuracy']
        ])

    res = np.mean(np.array(results), axis=0)
    print("Precision:",round(res[0], 4))
    print("recall:",round(res[1], 4))
    print("accuracy:",round(res[3], 4))
    print("f1-score:",round(res[2], 4))

In [25]:
print("Text + Video")
kFoldResults(final_features)

Text + Video
Precision: 0.6766
recall: 0.671
accuracy: 0.671
f1-score: 0.6677


In [31]:
print("Text Only")
kFoldResults(text_features)

Text Only
Precision: 0.635
recall: 0.6348
accuracy: 0.6348
f1-score: 0.6347


In [14]:
print("Video Only")
kFoldResults(video_features)

Video Only
Precision: 0.6582
recall: 0.6551
accuracy: 0.6551
f1-score: 0.653
