## Predicting if a Meme is Hateful or not on the basis of the Image Captioning and Text of the meme

### Step 1: Importing the libraries

In [9]:
import cv2
import pytesseract
import string
from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer, AutoModelForSequenceClassification
import torch
from PIL import Image
import os

pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

### Step 2: Function to Preproces the image before OCR to better filter out the text

In [10]:
def preprocess_image(image):
    image = cv2.bilateralFilter(image, 5, 55, 60)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, image = cv2.threshold(image, 240, 255, cv2.THRESH_BINARY)
    return image

### Step 3: Function to perform OCR on the Meme Image

In [11]:
def extract_text_from_image(image_path):
    image = cv2.imread(image_path)
    image = preprocess_image(image)
    text = pytesseract.image_to_string(image)
    allowed_chars = string.ascii_letters + string.digits + " "
    filtered_text = "".join(char if char in allowed_chars else " " for char in text).replace("\n", " ")
    return filtered_text

### Step 4: Using bipin/image-caption-generator model for generating a caption for the meme image

In [12]:
def generate_image_caption(image_path):
    model_name = "bipin/image-caption-generator"
    model = VisionEncoderDecoderModel.from_pretrained(model_name)
    feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained("gpt2")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    img = Image.open(image_path)
    if img.mode != 'RGB':
        img = img.convert(mode="RGB")

    pixel_values = feature_extractor(images=[img], return_tensors="pt").pixel_values
    pixel_values = pixel_values.to(device)

    max_length = 128
    num_beams = 4

    # get model prediction
    output_ids = model.generate(pixel_values, num_beams=num_beams, max_length=max_length)

    # decode the generated prediction
    preds = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return preds

### Step 5: Using facebook/roberta-hate-speech-dynabench-r4-target Model to Classify Text as hateful or not

In [13]:
def detect_hate_speech(text):
    tokenizer = AutoTokenizer.from_pretrained("facebook/roberta-hate-speech-dynabench-r4-target")
    model = AutoModelForSequenceClassification.from_pretrained("facebook/roberta-hate-speech-dynabench-r4-target")
    
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    hate_confidence = predictions[0][1].item()  # Assuming the second index corresponds to "hateful"
    classification = "hateful" if hate_confidence > 0.5 else "not hateful"
    confidence = hate_confidence if classification == "hateful" else 1 - hate_confidence
    return classification, confidence

### Step 6: Wrapper function to call the helper functions and generate an output file

In [14]:
def analyze_meme_for_toxicity(image_path, output_file):
    extracted_text = extract_text_from_image(image_path)
    image_caption = generate_image_caption(image_path)
    combined_text = extracted_text + " " + image_caption
    
    # Analyze individually and combined
    results = {}
    results["extracted_text"] = detect_hate_speech(extracted_text)
    results["image_caption"] = detect_hate_speech(image_caption)
    results["combined_text"] = detect_hate_speech(combined_text)
    
    # Write results to file
    with open(output_file, "a") as file:
        file.write(f"Image File Name: {os.path.basename(image_path)}\n")
        for text_type, (classification, confidence) in results.items():
            file.write(f"{text_type.capitalize()} - Classification: {classification}, Confidence: {confidence:.4f}\n")
        file.write("\n")

In [15]:
def analyze_folder_for_toxicity(folder_path):
    output_file = "detection_results.txt"
    # Ensure the output file is empty before starting
    open(output_file, 'w').close()
    
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(folder_path, filename)
            analyze_meme_for_toxicity(image_path, output_file)

### Step 7: Running the Code for Classification

In [16]:
folder_path = 'C:\\Users\\soumi\\Desktop\\precog\\img\\validation_data\\hateful_memes' #Enter the path of the folder containing images you wish to use
analyze_folder_for_toxicity(folder_path)