In [218]:
import json
import numpy as np

json_file_path = '/Users/jamessmith/Desktop/Desktop/SemEval_Task/data/subtask1/train.json'

# Swap the file opening and data loading statements
with open(json_file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

labels = [sample.get("labels", []) for sample in data]

# lists to get all labels
all_labels = [label for sublist in labels for label in sublist]

num_unique_labels = len(set(all_labels))
print(f"Number of Unique Labels: {num_unique_labels}")

print("Label Names:", set(all_labels))


Number of Unique Labels: 20
Label Names: {'Slogans', "Misrepresentation of Someone's Position (Straw Man)", 'Exaggeration/Minimisation', 'Whataboutism', 'Name calling/Labeling', 'Causal Oversimplification', 'Doubt', 'Black-and-white Fallacy/Dictatorship', 'Appeal to fear/prejudice', 'Reductio ad hitlerum', 'Flag-waving', 'Bandwagon', 'Presenting Irrelevant Data (Red Herring)', 'Glittering generalities (Virtue)', 'Thought-terminating cliché', 'Appeal to authority', 'Smears', 'Loaded Language', 'Obfuscation, Intentional vagueness, Confusion', 'Repetition'}


In [221]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling1D
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from collections import deque
from collections.abc import Iterable
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import precision_recall_fscore_support,classification_report

class MemeClassification:
    def __init__(self, label_tree):
        self.label_tree = label_tree
        self.label_binarizer = None
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.bert_model = TFBertModel.from_pretrained('bert-base-uncased')
        self.model = None

    def load_data(self, json_file_path):
        with open(json_file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
        return data

    def preprocess_data(self, data):
        texts = [sample["text"] for sample in data]
        labels = [sample.get("labels", []) for sample in data]

        self.label_binarizer = MultiLabelBinarizer()
        y = self.label_binarizer.fit_transform(labels)

        return texts, y

    def tokenize_and_pad(self, texts):
        input_ids = []
        attention_masks = []

        for text in texts:
            encoded_text = self.tokenizer(text, max_length=128, truncation=True, padding='max_length', return_tensors='tf')
            input_ids.append(encoded_text['input_ids'])
            attention_masks.append(encoded_text['attention_mask'])

        input_ids = np.concatenate(input_ids, axis=0)
        attention_masks = np.concatenate(attention_masks, axis=0)

        return input_ids, attention_masks

    def build_model(self, num_classes):
        input_ids_input = Input(shape=(128,), dtype=tf.int32, name="input_ids")
        attention_masks_input = Input(shape=(128,), dtype=tf.int32, name="attention_masks")

        bert_output = self.bert_model(input_ids_input, attention_mask=attention_masks_input)[0]
        pooled_output = GlobalAveragePooling1D()(bert_output)
        output_layer = Dense(num_classes, activation='sigmoid')(pooled_output)

        self.model = Model(inputs=[input_ids_input, attention_masks_input], outputs=output_layer)
        self.model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    def train_model(self, X_train, y_train, X_val, y_val, attention_masks_train, attention_masks_val, epochs=3, batch_size=8):
        
        history = self.model.fit(
            [X_train, attention_masks_train],
            y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=([X_val, attention_masks_val], y_val),
            class_weight='auto' # automatic class weights calculation
        )
       
    def calculate_hierarchy_distance(self, node1, node2):
        # Check if the nodes are leaf nodes
        if node1 not in self.label_tree or node2 not in self.label_tree:
            return None

        # Find ancestors of each leaf node
        ancestors1 = set()
        current_node = node1
        while current_node:
            ancestors1.add(current_node)
            current_node = next((parent for parent, children in self.label_tree.items() if current_node in children), None)

        ancestors2 = set()
        current_node = node2
        while current_node:
            ancestors2.add(current_node)
            current_node = next((parent for parent, children in self.label_tree.items() if current_node in children), None)

        # Find common ancestors
        common_ancestors = ancestors1.intersection(ancestors2)

        # distance based on levels
        distance = len(ancestors1) + len(ancestors2) - 2 * len(common_ancestors)

        return distance
    
    # from the cited article on the task page, i came up with this evaulation calculation
    def evaluate_with_hierarchy(self, X_test, attention_masks_test, y_test_one_hot):
        predictions = self.model.predict([X_test, attention_masks_test])

        total_precision = 0.0
        total_recall = 0.0
        total_f1 = 0.0
        total_samples = len(y_test_one_hot)

        for i in range(total_samples):
            predicted_probabilities = predictions[i]

            # Decoding predicted labels
            predicted_labels = [self.label_binarizer.classes_[j] for j in range(len(self.label_binarizer.classes_)) if
                                predicted_probabilities[j] > 0.5]
            gold_labels = [self.label_binarizer.classes_[j] for j in range(len(self.label_binarizer.classes_)) if
                           y_test_one_hot[i][j] == 1]

            # Checks if the prediction is a leaf node and is the correct label
            if set(predicted_labels).issubset(set(gold_labels)):
                reward = 1.0  # Full reward
            elif any(
                    self.calculate_hierarchy_distance(predicted_label, gold_label) > 0 for predicted_label in
                    predicted_labels for gold_label in gold_labels):
                reward = 0.5  # Partial reward
            else:
                reward = 0.0  # Null reward

            # total_precision, total_recall on reward
            total_precision += reward
            total_recall += reward

        # average metrics
        average_precision = total_precision / total_samples
        average_recall = total_recall / total_samples

        # hierarchical F1
        hierarchical_f1 = 2 * (average_precision * average_recall) / (average_precision + average_recall) if (
                average_precision + average_recall) != 0 else 0.0

        return hierarchical_f1

    
    def run_experiment(self, json_file_path, num_classes):
        data = self.load_data(json_file_path)
        texts, y = self.preprocess_data(data)
        input_ids, attention_masks = self.tokenize_and_pad(texts)
        
       # split the data
        X_train, X_temp, y_train, y_temp, attention_masks_train, attention_masks_temp = train_test_split(
            input_ids, y, attention_masks, test_size=0.2, random_state=42
        )

        X_val, X_test, y_val, y_test, attention_masks_val, attention_masks_test = train_test_split(
            X_temp, y_temp, attention_masks_temp, test_size=0.5, random_state=42
        )
        
        # Convert integer labels to one-hot encoded labels
        y_train_one_hot = to_categorical(y_train, num_classes)
        y_val_one_hot = to_categorical(y_val, num_classes)
        y_test_one_hot = to_categorical(y_test, num_classes)
        
        print(X_train.shape, X_val.shape, X_test.shape)
        print(attention_masks_train.shape, attention_masks_val.shape, attention_masks_test.shape)
        print(y_train_one_hot.shape, y_val_one_hot.shape, y_test_one_hot.shape)

        # build model
        self.build_model(num_classes)
        # train the model
        self.train_model(X_train, attention_masks_train, y_train, X_val, attention_masks_val, y_val)

        # Evaluate the model
        hierarchical_f1 = self.evaluate_with_hierarchy(X_test, attention_masks_test, y_test_one_hot)

        print(f"Average Hierarchical F1: {hierarchical_f1}")

        # model.predict for the test set
        predictions = self.model.predict([X_test, attention_masks_test])

        # Decoding the encoded labels
        decoded_predictions = np.argmax(predictions, axis=1)
        decoded_y_test = np.argmax(y_test, axis=1)

        # classification report
        print(classification_report(decoded_y_test, decoded_predictions))
        
# hierarchical tree
label_tree = {
    'Persuasion': {
        'Pathos': {
            'Appeal to Emotion(visual)': 1,
            'Exaggeration/Minimisation': 2,
            'Loaded Language': 3,
            'Flag waving': 4,
            'Appeal to fear/prejudice': 5,
            'Transfer': 6
        },
        'Ethos': {
            'Transfer': 6,
            'Glittering generalities': 7,
            'Appeal to authority': 8,
            'Bandwagon': 9,
            'Ad Hominem': {
                'Name calling/Labelling': 10,
                'Doubt': 11,
                'Smears': 12,
                'Reduction and Hitlerium': 13,
                'Whataboutism': 14
            }
        },
        'Logos': {
            'Repetition': 15,
            'Obfuscation, Intentional vagueness, Confusion': 16,
            'Justification': {
                'Flag waving': 4,
                'Appeal to fear/prejudice': 5,
                'Appeal to Authority': 8,
                'Bandwagon': 9,
                'Slogans': 17
            },
            'Reasoning': {
                'Distraction': {
                    'Whataboutism': 14,
                    'Presenting Irrelevant Data (Red Herring)': 18,
                    'Straw Man': 19
                },
                'Simplification': {
                    'Black-and-white Fallacy/Dictatorship': 20,
                    'Casual Oversimplification': 21,
                    'Thought-terminating cliché': 22
                }
            }
        }
    }
}


meme_classifier = MemeClassification(label_tree)
num_classes = 20
meme_classifier.run_experiment('/Users/jamessmith/Desktop/Desktop/SemEval_Task/data/subtask1/train.json', num_classes)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

(5600, 128) (700, 128) (700, 128)
(5600, 128) (700, 128) (700, 128)
(5600, 20, 20) (700, 20, 20) (700, 20, 20)


ValueError: Data cardinality is ambiguous:
  x sizes: 5600, 700
  y sizes: 5600
Make sure all arrays contain the same number of samples.