In [None]:
!pip install openai==0.28
import openai
import json
from tqdm import tqdm
from PIL import Image
import base64
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
data = pd.read_csv('data.csv')

label_mapping = {'Harmless': 'harmless', 'Harmful': 'harmful'}
data['label'] = data['label'].map(label_mapping)
data['label'].value_counts()

In [None]:
openai.api_key = <INSERT OpenAI API Key>

In [None]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
demo_images = []
demo_messages = []

for demo in demo_images:
    demo_messages.extend([
        {
            "type": "text",
            "text": demo["Title"]
        },
        {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{encode_image(demo['image'])}",
            }
        }
    ])

completion_payload = {
    "model": "gpt-4o-mini",
    "messages": [
        {"role": "system", "content": "You are a helpful image classification assistant. You must classify given image analyzing the image and Title whether it is harmful or harmless."},
        {"role": "user", "content": demo_messages},
    ]
}

In [None]:
labels = []
predictions = []
# completion_payload["messages"]=[]

for index, row in tqdm(data.iterrows(), total=data.shape[0]):
    image_path = row['IMAGE_PATH']
    Title = row['Title']
    label = row['label'].strip().lower()
    labels.append(label)

    try:
        with open(image_path, 'rb') as f:
            query_image = Image.open(f)
            query_image.load()
            encoded_image = encode_image(image_path)

            completion_payload["messages"].append({
                "role": "user",
                "content": [
                    {"type": "text", "text": f"You are a helpful image classification assistant. You must classify given image whether it is harmful or harmless analyzing the image and Title:{Title}. **Harmful**: This category includes images depicting sexuality, gambling, violence, weapons, click bait, spam, harmful, or inappropriate content. Examples might be scenes of physical violence, display of sexual affection, weapons, sharp objects, explicit content, spam content, gambling, click bait content or dangerous situations. **Harmless**: This category includes images that are non-violent, safe, and appropriate. Examples might be everyday scenes, landscapes, or social gatherings without harmful context. If you are not sure, pick whatever label you think is more probable based on your analysis. Give the output in one word if is is harmful or harmless. Do not explain anything"},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
                ]
            })

        response = openai.ChatCompletion.create(
            model=completion_payload["model"],
            messages=completion_payload["messages"]
        )

        result = response.choices[0].message["content"].strip().lower()
        predictions.append(result)

    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        predictions.append('error')
        continue

In [None]:
report = classification_report(labels, corrected_predictions, zero_division=0 )
print("\nClassification Report:\n", report)

In [None]:
# Below code is to be used only in case of batch processing
import numpy as np
import os
labels_file = 'accumulated_labels.npy'
predictions_file = 'accumulated_predictions.npy'

if os.path.exists(labels_file) and os.path.exists(predictions_file):
    all_labels = np.load(labels_file).tolist()
    all_predictions = np.load(predictions_file).tolist()
else:
    all_labels = []
    all_predictions = []

current_batch_labels = np.array(labels)
current_batch_predictions = np.array(corrected_predictions)
all_labels.extend(current_batch_labels)
all_predictions.extend(current_batch_predictions)

np.save(labels_file, np.array(all_labels))
np.save(predictions_file, np.array(all_predictions))