In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

all_image_files = []


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        _f = (os.path.join(dirname, filename))
        if ".jpeg" in _f:
            all_image_files.append(_f)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Install dependencies
Source: https://huggingface.co/llava-hf/vip-llava-7b-hf

In [None]:
!pip install -q git+https://github.com/huggingface/transformers.git
!pip install -q bitsandbytes==0.41.3 accelerate==0.25.0

## Load quantised model

In [2]:
import torch
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

In [3]:
from transformers import pipeline

model_id = "llava-hf/llava-v1.6-mistral-7b-hf"

pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})

2024-03-29 19:41:19.773100: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-29 19:41:19.773160: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-29 19:41:19.774593: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Load Prompt

In [59]:
from PIL import Image

base_prompt = "Score the creativity between 0 to 100 of this album cover created for the theme of <THEME>. For this task, you have the ability to perceive visual content. Please output only a single number between 0 and 100 without any further comments about the image or the theme."

In [60]:
from tqdm import tqdm

all_llava_outputs = []

THEME_DICT = {
    'car' : 'fast car',
    'dog' : 'dog in space',
    'matrix' : 'the matrix',
    'robots' : 'robots',
    'space' : 'space explorer'
}

for _f in tqdm(all_image_files):
    img = Image.open(_f)
    selected_theme = _f.split("/")[-1].replace(".jpeg", "")
    selected_prompt = base_prompt.replace("<THEME>", THEME_DICT[selected_theme])
    
    llava_prompt = f"[INST] <image>\n{selected_prompt} [/INST]"
    
    outputs = pipe(img, prompt=llava_prompt, generate_kwargs={"max_new_tokens": 200})
    generated_score = (outputs[0]["generated_text"])
    all_llava_outputs.append(generated_score)

100%|██████████| 10/10 [01:09<00:00,  6.97s/it]


In [61]:
import pandas as pd

df = pd.DataFrame({
    "file_name" : all_image_files,
    "llava_outputs" : all_llava_outputs
})

In [69]:
df['llava_score'] = df['llava_outputs'].apply(lambda x: float(x.split('[/INST]')[1].strip())/100.0)

In [70]:
df.head()

Unnamed: 0,file_name,llava_outputs,llava_score
0,/kaggle/input/spectrogrand-survey-images/spect...,[INST] \nScore the creativity between 0 to 10...,0.9
1,/kaggle/input/spectrogrand-survey-images/spect...,[INST] \nScore the creativity between 0 to 10...,0.9
2,/kaggle/input/spectrogrand-survey-images/spect...,[INST] \nScore the creativity between 0 to 10...,0.9
3,/kaggle/input/spectrogrand-survey-images/spect...,[INST] \nScore the creativity between 0 to 10...,0.9
4,/kaggle/input/spectrogrand-survey-images/spect...,[INST] \nScore the creativity between 0 to 10...,0.8


In [71]:
df.to_csv("spectrogrand_survey_llava_outputs.csv", index = False)