In [1]:
import base64
from PIL import Image
import vertexai
from vertexai.preview.generative_models import GenerativeModel, Part
from vertexai.generative_models._generative_models import HarmCategory, HarmBlockThreshold, ResponseBlockedError
from pathlib import Path

gcp_project_name = "crypto-resolver-346012" # get this from your google cloud platform account
vertexai.init(project=gcp_project_name, location="us-central1")

prompt = '''You are an AI expert in detecting humour or satire. User gives you an image, and you have to make a choice "Y" or "N".
###Instructions: Users image has 2 halves called yes and but, and the combination of those might make no sense at all, or be funny. Even though yesbut is a meme format, users image is edited and might not be a meme. Your job is to find out which one it is and output Y ONLY if its funny and N otherwise.
###Output format: This image is <funny/not funny> because <reason>. Thus, my answer is <Y/N>'''
prompt = "Why is this image funny/satirical? Explain in about 50-70 words"
def generate(image):
    model = GenerativeModel("gemini-pro-vision")
    
    safety_settings = {
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    }

    # Yes,But memes are funny because the Yes picture depicts a normal situation, and the But picture reveals something about the Yes picture that makes humans laugh. What is funny about the given Yes,But meme
    
    responses = model.generate_content(
        [prompt,image],
        generation_config={
            "max_output_tokens": 256,
            "temperature": 1,
            "top_p": 1,
            "top_k": 32,
            "candidate_count": 1
        },
        safety_settings=safety_settings,
        
    )
    #print(type(responses))
    #print(responses)
    #print(responses.text)
    return responses.text


with open("images_real/Screenshot_2024-05-23-20-22-16-966_com.instagram.android.jpg", "rb") as f:
    data = f.read()
    image1 = Part.from_data(data=data, mime_type="image/jpeg")
    #print(image1)

generate(image1)

' The image is trying to highlight how people often search for things, but aren\'t usually typing in the search bar. The contrast with "YES" followed by things people do type in the search bar versus "BUT" followed by things that are typed with voice search is funny because it\'s unexpected.'

In [None]:
'''
You are an AI expert in creating humour or satire. User gives you an image, and you have to make a choice "A" or "B".

###Structure of image: The image is a 2x2 table with the labels "yes", "but", "A", and "B". Either the "yes" cell or the "but" cell will have a question mark in it. Your job is to replace the question mark with either cell "A" or cell "B" so that the resulting <yes,but> pair is funny or satirical.

###Output format: Option <answer> is more funny because <reason>" where <answer> must be either "A" or "B"

'''

###Output format: " Thus, option <answer> is more funny because <reason>" where <answer> must be either "A" or "B"

"When the question mark is replaced by image A, the resulting [yes,but] pair is <describe it here>.
When the question mark is replaced by image B, the resulting [yes,but] pair is <describe it here>.

In [16]:
import os,json
from tqdm import tqdm
import random
from datetime import datetime
import time

outpath = "outputs/detection/gemini-cot-real.json"
inpaths = ["images_real"]

try:
    with open(outpath, "r") as f:
        outputs = json.load(f)
except FileNotFoundError:
    print("starting from zero")
    outputs = {}

def get_pred(output):
    if not output:
        return ""
    return output.strip(' .')[-1]

def is_correct(pred, folder):
    if not pred:
        return False
    return (pred=="Y" and "negative" not in folder) or (pred=="N" and "negative" in folder)

total, correct, adhering, y = 0,0,0,0
files = sum(([os.path.join(folder, file) for file in os.listdir(folder) if file[-3:]=="jpg"] for folder in inpaths),[])   
random.Random(42).shuffle(files)
pbar = tqdm(files)
t_last = None
for filepath in pbar:
    folder,file = filepath.split('/')
    if filepath in outputs and outputs[filepath]:
        total+=1
        pred = get_pred(outputs[filepath])
        correct += 1 if is_correct(pred,folder) else 0
        adhering += 1 if pred in ["Y", "N"] else 0
        y += 1 if pred=="Y" else 0
        pbar.set_postfix({"folder": folder, "total": total, "accuracy": correct/total, "adherance": adhering/total, "y%": y/adhering if adhering>0 else 0})
        continue
    with open(filepath, "rb") as f:
        data = f.read()
        image = Part.from_data(data=data, mime_type="image/jpeg")
    #display(Image.open(os.path.join("images",filename)).convert('RGB'))
    #print(filename)
    try:
        output = generate(image)
    except Exception as e:
        print("Caught exception: ",str(e))
        if "generate_content_requests_per_minute_per_project_per_base_model" in str(e):
            timediff = int(60-(datetime.now()-t_last).total_seconds() if t_last else 60)
            print(f"sleeping for ~{timediff}+5s")
            time.sleep(timediff+5)
            print("sleep over")
            t_last = datetime.now()
            output = generate(image)
        else:
            print("Could not do file: ",filepath)
            output = ""
    
    outputs[filepath]= output
    with open(outpath, "w") as f:
        json.dump(outputs, f, indent=4)
        
    total+=1
    pred = get_pred(output)
    correct += 1 if is_correct(pred,folder) else 0
    adhering += 1 if pred in ["Y", "N"] else 0
    y += 1 if pred=="Y" else 0
    pbar.set_postfix({"folder": folder, "total": total, "accuracy": correct/total, "adherance": adhering/total, "y%": y/adhering if adhering>0 else 0})

with open(outpath, "w") as f:
    json.dump(outputs, f, indent=4)

 87%|██████████████████████████▊    | 103/119 [01:10<00:45,  2.86s/it, folder=images_real, total=103, accuracy=0.689, adherance=0.854, y%=0.807]

Caught exception:  429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-pro-vision. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai.
sleeping for ~60+5s
sleep over


 92%|██████████████████████████████▌  | 110/119 [03:02<01:13,  8.17s/it, folder=images_real, total=110, accuracy=0.691, adherance=0.864, y%=0.8]

Caught exception:  429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-pro-vision. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai.
sleeping for ~13+5s
sleep over


 93%|████████████████████████████▉  | 111/119 [03:28<01:45, 13.21s/it, folder=images_real, total=111, accuracy=0.694, adherance=0.865, y%=0.802]

Caught exception:  429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-pro-vision. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai.
sleeping for ~51+5s
sleep over


 98%|██████████████████████████████▍| 117/119 [05:04<00:19,  9.94s/it, folder=images_real, total=117, accuracy=0.692, adherance=0.872, y%=0.794]

Caught exception:  429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-pro-vision. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai.
sleeping for ~20+5s
sleep over


100%|███████████████████████████████| 119/119 [05:44<00:00,  2.90s/it, folder=images_real, total=119, accuracy=0.697, adherance=0.874, y%=0.798]


In [2]:
import os,json
from tqdm import tqdm
import random
from datetime import datetime
import time

outpath = "outputs/annotations/images_real/whyfunny/gemini-whyfunny.json"
inpaths = ["images_real"]

try:
    with open(outpath, "r") as f:
        outputs = json.load(f)
except FileNotFoundError:
    print("starting from zero")
    outputs = {}

def get_pred(output):
    if not output:
        return ""
    return output.strip(' .')[-1]

def is_correct(pred, folder):
    if not pred:
        return False
    return (pred=="Y" and "negative" not in folder) or (pred=="N" and "negative" in folder)

total, correct, adhering, y = 0,0,0,0
files = sum(([os.path.join(folder, file) for file in os.listdir(folder) if file[-3:]=="jpg"] for folder in inpaths),[])   
random.Random(42).shuffle(files)
pbar = tqdm(files)
t_last = None
for filepath in pbar:
    folder,file = filepath.split('/')
    if filepath in outputs and outputs[filepath]:
        total+=1
        pred = get_pred(outputs[filepath])
        correct += 1 if is_correct(pred,folder) else 0
        adhering += 1 if pred in ["Y", "N"] else 0
        y += 1 if pred=="Y" else 0
        pbar.set_postfix({"folder": folder, "total": total, "accuracy": correct/total, "adherance": adhering/total, "y%": y/adhering if adhering>0 else 0})
        continue
    with open(filepath, "rb") as f:
        data = f.read()
        image = Part.from_data(data=data, mime_type="image/jpeg")
    #display(Image.open(os.path.join("images",filename)).convert('RGB'))
    #print(filename)
    try:
        output = generate(image)
    except Exception as e:
        print("Caught exception: ",str(e))
        if "generate_content_requests_per_minute_per_project_per_base_model" in str(e):
            timediff = int(120-(datetime.now()-t_last).total_seconds() if t_last else 120)
            print(f"sleeping for ~{timediff}+5s")
            time.sleep(timediff+5)
            print("sleep over")
            t_last = datetime.now()
            output = generate(image)
        else:
            print("Could not do file: ",filepath)
            output = ""
    
    outputs[filepath]= output
    with open(outpath, "w") as f:
        json.dump(outputs, f, indent=4)
        
    total+=1
    pred = get_pred(output)
    correct += 1 if is_correct(pred,folder) else 0
    adhering += 1 if pred in ["Y", "N"] else 0
    y += 1 if pred=="Y" else 0
    pbar.set_postfix({"folder": folder, "total": total, "accuracy": correct/total, "adherance": adhering/total, "y%": y/adhering if adhering>0 else 0})

with open(outpath, "w") as f:
    json.dump(outputs, f, indent=4)

 92%|███████████████████████████████████████▍   | 109/119 [00:54<00:17,  1.73s/it, folder=images_real, total=109, accuracy=0, adherance=0, y%=0]

Caught exception:  429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-pro-vision. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai.
sleeping for ~120+5s
sleep over


100%|███████████████████████████████████████████| 119/119 [04:03<00:00,  2.05s/it, folder=images_real, total=119, accuracy=0, adherance=0, y%=0]


In [None]:
"""
Caught exception:  500 Internal error encountered.
Could not do file:  20240101_172932.jpg

Caught exception:  500 Internal error encountered.
Could not do file:  20240101_182249.jpg
"""

In [None]:
"""
manually redid:
["20240101_173205_BUT.jpg", "20240101_173205_YES.jpg", "20240101_174949_BUT.jpg", "20240101_174949_YES.jpg", "20240101_181310_YES.jpg"]
"""