In [58]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import json
import os
from PIL import Image
import numpy as np

In [79]:
def load_data(json_file, image_folder):
    with open(json_file, 'r') as f:
        data = json.load(f)
    
    images, labels = [], []
    n = 0
    
    for item in data:
        n += 1
        if n % 1000 == 0:
            print(f"Processed {n} images")
        
        image_path = os.path.join(image_folder, item['image'])
        
        try:
            image = Image.open(image_path).convert('RGB').resize((128, 128))  # Ensure all images are RGB and resized
            images.append(np.array(image))
            labels.append(item['labels'])
        except Exception as e:
            print(f"Error processing image {image_path}: {e}")
    
    return np.array(images), labels

train_images, train_labels = load_data('semeval2024_dev_release/subtask2a/train.json', 'train_images')
val_images, val_labels = load_data('semeval2024_dev_release/subtask2a/validation.json', 'validation_images')


In [111]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, hamming_loss, f1_score

# Preprocess labels
mlb = MultiLabelBinarizer()
train_labels_encoded = mlb.fit_transform(train_labels)
val_labels_encoded = mlb.transform(val_labels)

# Verify the shape of images and labels
print(f"Train images shape: {train_images.shape}")
print(f"Train labels shape: {train_labels_encoded.shape}")
print(f"Validation images shape: {val_images.shape}")
print(f"Validation labels shape: {val_labels_encoded.shape}")

Train images shape: (7000, 128, 128, 3)
Train labels shape: (7000, 22)
Validation images shape: (500, 128, 128, 3)
Validation labels shape: (500, 22)


In [86]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [87]:
# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# train_labels_encoded = [label_encoder.fit_transform(label) for label in train_labels]
# val_labels_encoded = [label_encoder.transform(label) for label in val_labels]

In [88]:
# Data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow(train_images, train_labels_encoded, batch_size=32)
val_generator = val_datagen.flow(val_images, val_labels_encoded, batch_size=32)

In [89]:
# Build model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(mlb.classes_), activation='sigmoid')  # Use sigmoid for multi-label classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [90]:
# Train model
model.fit(train_generator, epochs=10, validation_data=val_generator)

Epoch 1/10


  self._warn_if_super_not_called()


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 125ms/step - accuracy: 0.1938 - loss: 0.3030 - val_accuracy: 0.1740 - val_loss: 0.2838
Epoch 2/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 120ms/step - accuracy: 0.1964 - loss: 0.2747 - val_accuracy: 0.1720 - val_loss: 0.2814
Epoch 3/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 119ms/step - accuracy: 0.2016 - loss: 0.2727 - val_accuracy: 0.1820 - val_loss: 0.2811
Epoch 4/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 120ms/step - accuracy: 0.2031 - loss: 0.2692 - val_accuracy: 0.1840 - val_loss: 0.2801
Epoch 5/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 122ms/step - accuracy: 0.1982 - loss: 0.2709 - val_accuracy: 0.1900 - val_loss: 0.2820
Epoch 6/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 122ms/step - accuracy: 0.1985 - loss: 0.2701 - val_accuracy: 0.1940 - val_loss: 0.2804
Epoch 7/10
[1m219/21

<keras.src.callbacks.history.History at 0x2c7d7f7d0>

In [91]:
def load_test_images(image_folder):
    images = []
    image_ids = []
    for image_name in os.listdir(image_folder):
        image_path = os.path.join(image_folder, image_name)
        try:
            image = Image.open(image_path).convert('RGB').resize((128, 128))
            images.append(np.array(image))
            image_ids.append(image_name)
        except Exception as e:
            print(f"Error processing image {image_path}: {e}")
    return np.array(images), image_ids

test_images, test_image_ids = load_test_images('dev_images')
test_images = test_images / 255.0  # Normalize images

In [92]:
def load_gold_labels(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    labels = {}
    for item in data:
        labels[item['image']] = item['labels']
    return labels

gold_labels = load_gold_labels('dev_gold_labels/dev_subtask2a_en.json')

In [93]:
# Assuming `model` is your trained model
predictions = model.predict(test_images)

# Convert predictions to binary labels
predicted_labels = (predictions > 0.5).astype(int)

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step


In [95]:
# Convert predictions to label names using the MultiLabelBinarizer
predicted_labels_names = mlb.inverse_transform(predicted_labels)

In [96]:
# Create a dictionary to map image IDs to predicted labels
predicted_labels_dict = dict(zip(test_image_ids, predicted_labels_names))

In [108]:
# Evaluate the predictions
from sklearn.metrics import classification_report

# Prepare ground truth and predicted lists for evaluation
ground_truth_list = []
predicted_list = []

for image_id in test_image_ids:
    if image_id in gold_labels and image_id in predicted_labels_dict:
        ground_truth_list.append(gold_labels[image_id])
        predicted_list.append(predicted_labels_dict.get(image_id, []))

In [109]:
# Binarize the ground truth labels
ground_truth_binarized = mlb.transform(ground_truth_list)

# Binarize the predicted labels for evaluation
predicted_binarized = mlb.transform(predicted_list)

# Calculate evaluation metrics
print(classification_report(ground_truth_binarized, predicted_binarized, target_names=mlb.classes_))

                                                     precision    recall  f1-score   support

                        Appeal to (Strong) Emotions       0.00      0.00      0.00        56
                                Appeal to authority       0.61      0.20      0.30       143
                           Appeal to fear/prejudice       0.00      0.00      0.00        78
                                          Bandwagon       0.00      0.00      0.00        18
               Black-and-white Fallacy/Dictatorship       0.00      0.00      0.00       103
                          Causal Oversimplification       0.00      0.00      0.00        56
                                              Doubt       0.00      0.00      0.00        52
                          Exaggeration/Minimisation       0.00      0.00      0.00        68
                                        Flag-waving       0.45      0.04      0.07       123
                   Glittering generalities (Virtue)       0.00      0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [112]:

# Calculate subset accuracy
subset_accuracy = np.mean(np.all(ground_truth_binarized == predicted_binarized, axis=1))
print(f"Subset Accuracy: {subset_accuracy:.4f}")

# Calculate hamming loss
hamming = hamming_loss(ground_truth_binarized, predicted_binarized)
print(f"Hamming Loss: {hamming:.4f}")

# Calculate average precision
average_precision = np.mean([
    accuracy_score(ground_truth_binarized[:, i], predicted_binarized[:, i])
    for i in range(ground_truth_binarized.shape[1])
])
print(f"Average Precision: {average_precision:.4f}")

# Calculate F1 score (micro, macro)
f1_micro = f1_score(ground_truth_binarized, predicted_binarized, average='micro')
f1_macro = f1_score(ground_truth_binarized, predicted_binarized, average='macro')
print(f"F1 Score (Micro): {f1_micro:.4f}")
print(f"F1 Score (Macro): {f1_macro:.4f}")

Subset Accuracy: 0.0140
Hamming Loss: 0.1119
Average Precision: 0.8881
F1 Score (Micro): 0.1245
F1 Score (Macro): 0.0396


Subset Accuracy: 0.0140: The subset accuracy is very low, meaning the model rarely predicts all labels for an instance correctly. This is the strictest measure and shows that exact matches are rare.

Hamming Loss: 0.1119: The hamming loss indicates that around 11.19% of labels are incorrectly predicted on average. This relatively low error rate per label suggests that the model does reasonably well on individual labels but might not capture all labels for an instance.

Average Precision: 0.8881: This high value suggests that when the model predicts a label, it is correct most of the time, showing high precision.

F1 Score (Micro): 0.1245: This score shows the overall balance of precision and recall across all labels, indicating the model has a modest performance in retrieving relevant labels.

F1 Score (Macro): 0.0396: This very low score suggests that the model performs poorly on less frequent labels, as it gives equal weight to all labels.

In [113]:
# Calculate relaxed accuracy
correct_count = 0
for ground_truth, predicted in zip(ground_truth_list, predicted_list):
    if any(label in ground_truth for label in predicted):
        correct_count += 1

relaxed_accuracy = correct_count / len(ground_truth_list)
print(f"Relaxed Accuracy: {relaxed_accuracy:.4f}")

Relaxed Accuracy: 0.1610


##### Interpretation: A relaxed accuracy of 0.1610 means that 16.10% of the instances had at least one of their predicted labels match the true labels.
##### This metric indicates that the model is able to predict at least one correct label for approximately 16.10% of the instances.

In [103]:
f=open('dev_gold_labels/dev_subtask2a_en.json','r')
data=json.load(f)
len(data)

1000

In [104]:
len(os.listdir('dev_images'))

1500

In [107]:
gold_labels

{'prop_meme_3736.png': ['Glittering generalities (Virtue)'],
 'prop_meme_22788.png': ['Loaded Language',
  'Slogans',
  'Flag-waving',
  'Smears',
  'Name calling/Labeling'],
 'prop_meme_13442.png': ['Loaded Language', 'Smears'],
 'prop_meme_17567.png': ['Glittering generalities (Virtue)', 'Smears'],
 'prop_meme_7757.png': ['Black-and-white Fallacy/Dictatorship'],
 'prop_meme_13573.png': ['Smears'],
 'prop_meme_12170.png': ['Loaded Language',
  'Flag-waving',
  'Name calling/Labeling'],
 'prop_meme_1821.png': ['Smears'],
 'prop_meme_18847.png': ['Flag-waving', 'Slogans'],
 'prop_meme_11744.png': ['Smears'],
 'prop_meme_7785.png': ['Loaded Language', 'Thought-terminating cliché'],
 'prop_meme_19804.png': ['Transfer'],
 'prop_meme_12160.png': ['Causal Oversimplification'],
 'prop_meme_10144.png': ['Loaded Language',
  'Transfer',
  'Thought-terminating cliché'],
 'prop_meme_13528.png': ['Causal Oversimplification',
  'Glittering generalities (Virtue)',
  'Flag-waving'],
 'prop_meme_3738.