In [17]:
import surprisal
import pandas as pd
import numpy as np
import warnings
from surprisal import AutoHuggingFaceModel
from tqdm import tqdm
import time
import os

from collections import Counter
import string
warnings.filterwarnings('ignore')

In [18]:
ls

credential.txt       pic-1.png            pytorch.py
df_1030.pkl          project_torch.ipynb


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


## Replicate API

Minigpt-4 is hosted on [replicate](https://replicate.com/daanelson/minigpt-4/versions/b96a2f33cc8e4b0aa23eacfce731b9c41a7d9466d9ed4e167375587b54db9423), with an API available for deployment.

| Parameter          | Interpretation                                                                                                                        | Default | Min  | Max |
|--------------------|---------------------------------------------------------------------------------------------------------------------------------------|---------|------|-----|
| num_beams          | number of beams for beam search decoding                                                                                              | 5       | 1    | 10  |
| temperature        | temperature for generating tokens, lower = more predictable results                                                                   | 1.32    | 0.01 | 2   |
| top_p              | sample from the top p percent most likely tokens                                                                                      | 0.9     | 0    | 1   |
| repetition_penalty | penalty for repeated words in generated text; 1 is no penalty, values greater than 1 discourage repetition, less than 1 encourage it. | 1       | 0.01 | 5   |
| max_new_tokens     | maximum number of new tokens to generate                                                                                              | 3000    | 1    |     |
| max_length         | total length of prompt and output in tokens                                                                                           | 4000    | 1    |     |                                                                                             

In [6]:
with open('credential.txt', encoding = 'utf-8') as f:
  key = f.readline().strip()

os.environ["REPLICATE_API_TOKEN"] = key

In [None]:
def get_description(iteration, beams, temp, top_p, repetition):
  col = []
  val = []

  prompt = "Please tell me everything you see going on in this picture."

  for i in np.arange(0, iteration, 1):
    col.append(i)
    output = replicate.run("daanelson/minigpt-4:b96a2f33cc8e4b0aa23eacfce731b9c41a7d9466d9ed4e167375587b54db9423",
                           input={"image": open("pic-1.png", "rb"),
                                  "prompt": prompt,
                                  "num_beams": beams,
                                  "temperature": temp,
                                  "repetition_penalty": repetition})
    val.append(output)

  data = {"iteration" : col,
          "num_beam" : [beams] * len(col),
          "temperature" : [temp] * len(col),
          "top_p" : [top_p] * len(col),
          "description": val
      }

  df = pd.DataFrame(data)

  return df

In [None]:
def get_description_df(param_list, iteration, beams, top_p, repetition):
  df = pd.DataFrame()
  for i in tqdm(param_list):
    df = df.append(get_description(iteration, beams, i, top_p, repetition))
    print(i)
  df = df.reset_index(drop=True)

  return df

In [None]:
df = get_description_df(param_list = np.arange(0.01, 1.01, 0.01),
                   iteration = 1,
                   beams = 1,
                   top_p = 1,
                   repetition = 1)

## Text analysis

In [None]:
def tokenize(s):
    return [w.strip(string.punctuation) for w in s.lower().split()]

In [None]:
def word_counts(s):
    """Tokenize the str `s` and accumulate counts in a dictionary for each
    word that appears.

    Parameters
    ----------
    s : str
        The input string.

    Returns
    -------
    dict of { str : int }
        Word counts in the string, with words as keys and their corresponding
        counts as values.
    """
    counts = Counter()
    tokens = tokenize(s)
    for token in tokens:
        counts[token] += 1

    #return counts, len(counts), sum(counts.values()), len(counts)/sum(counts.values())
    return len(counts)/sum(counts.values())

In [None]:
df['ttr'] = df['description'].apply(word_counts)

## Test surprisal

In [20]:
m = AutoHuggingFaceModel.from_pretrained('gpt2')

In [63]:
def get_word_surp(sent):
    # words = []
    # surps = []
    words = [word for word, surp in m.surprise(sent)[0]]
    surps = [surp for word, surp in m.surprise(sent)[0]]
    
    return pd.Series([words, surps])


### Mini-GPT4

In [56]:
df = pd.read_pickle("df_1030.pkl")

In [57]:
df['id'] = df.index + 1
df

Unnamed: 0,iteration,num_beam,temperature,top_p,description,id
0,0,1,0.01,1,The image shows a woman and two children stand...,1
1,0,1,0.02,1,The image shows a woman and two children stand...,2
2,0,1,0.03,1,The image shows a woman and two children stand...,3
3,0,1,0.04,1,The image shows a woman and two children stand...,4
4,0,1,0.05,1,The image shows a woman and two children stand...,5
...,...,...,...,...,...,...
95,0,1,0.96,1,The image shows two women and a child in a kit...,96
96,0,1,0.97,1,The image shows a woman and two children in a ...,97
97,0,1,0.98,1,"A mother is standing at a sink, washing her ha...",98
98,0,1,0.99,1,This is a black and white image of a woman and...,99


In [58]:
df['sentences'] = df['description'].str.split('.')
df

Unnamed: 0,iteration,num_beam,temperature,top_p,description,id,sentences
0,0,1,0.01,1,The image shows a woman and two children stand...,1,[The image shows a woman and two children stan...
1,0,1,0.02,1,The image shows a woman and two children stand...,2,[The image shows a woman and two children stan...
2,0,1,0.03,1,The image shows a woman and two children stand...,3,[The image shows a woman and two children stan...
3,0,1,0.04,1,The image shows a woman and two children stand...,4,[The image shows a woman and two children stan...
4,0,1,0.05,1,The image shows a woman and two children stand...,5,[The image shows a woman and two children stan...
...,...,...,...,...,...,...,...
95,0,1,0.96,1,The image shows two women and a child in a kit...,96,[The image shows two women and a child in a ki...
96,0,1,0.97,1,The image shows a woman and two children in a ...,97,[The image shows a woman and two children in a...
97,0,1,0.98,1,"A mother is standing at a sink, washing her ha...",98,"[A mother is standing at a sink, washing her h..."
98,0,1,0.99,1,This is a black and white image of a woman and...,99,[This is a black and white image of a woman an...


In [59]:
df.iloc[99,:]['sentences']

['This is a black and white image of a woman and two children standing in a kitchen',
 ' The woman is wearing a black dress and apron, and is holding a mop and bucket in her hands',
 ' The children are wearing overalls and are standing next to the woman',
 ' One of the children is reaching up to touch a cabinet above the sink, while the other is standing on a stool next to the sink, looking into the cabinet',
 ' There is a window behind the woman and children, and the room appears to be brightly lit',
 ' The walls and floor are made of white tile, and there is a counter with a sink on one side of the room',
 '']

In [60]:
fulldf = df.explode('sentences').reset_index(drop=True)
fulldf

Unnamed: 0,iteration,num_beam,temperature,top_p,description,id,sentences
0,0,1,0.01,1,The image shows a woman and two children stand...,1,The image shows a woman and two children stand...
1,0,1,0.01,1,The image shows a woman and two children stand...,1,"The woman is holding a mop and bucket, and th..."
2,0,1,0.01,1,The image shows a woman and two children stand...,1,The sink is full of dirty dishes and there is...
3,0,1,0.01,1,The image shows a woman and two children stand...,1,The walls and countertops are dirty and there...
4,0,1,0.01,1,The image shows a woman and two children stand...,1,The floor is also dirty and there is a trash ...
...,...,...,...,...,...,...,...
541,0,1,1.00,1,This is a black and white image of a woman and...,100,The children are wearing overalls and are sta...
542,0,1,1.00,1,This is a black and white image of a woman and...,100,One of the children is reaching up to touch a...
543,0,1,1.00,1,This is a black and white image of a woman and...,100,There is a window behind the woman and childr...
544,0,1,1.00,1,This is a black and white image of a woman and...,100,"The walls and floor are made of white tile, a..."


In [61]:
fulldf.iloc[545,:]

iteration                                                      0
num_beam                                                       1
temperature                                                  1.0
top_p                                                          1
description    This is a black and white image of a woman and...
id                                                           100
sentences                                                       
Name: 545, dtype: object

In [62]:
fulldf = fulldf.loc[fulldf['sentences'] != ''].reset_index(drop=True)
fulldf

Unnamed: 0,iteration,num_beam,temperature,top_p,description,id,sentences
0,0,1,0.01,1,The image shows a woman and two children stand...,1,The image shows a woman and two children stand...
1,0,1,0.01,1,The image shows a woman and two children stand...,1,"The woman is holding a mop and bucket, and th..."
2,0,1,0.01,1,The image shows a woman and two children stand...,1,The sink is full of dirty dishes and there is...
3,0,1,0.01,1,The image shows a woman and two children stand...,1,The walls and countertops are dirty and there...
4,0,1,0.01,1,The image shows a woman and two children stand...,1,The floor is also dirty and there is a trash ...
...,...,...,...,...,...,...,...
441,0,1,1.00,1,This is a black and white image of a woman and...,100,"The woman is wearing a black dress and apron,..."
442,0,1,1.00,1,This is a black and white image of a woman and...,100,The children are wearing overalls and are sta...
443,0,1,1.00,1,This is a black and white image of a woman and...,100,One of the children is reaching up to touch a...
444,0,1,1.00,1,This is a black and white image of a woman and...,100,There is a window behind the woman and childr...


In [65]:
get_word_surp(fulldf['sentences'][0])

0    [The, Ġimage, Ġshows, Ġa, Ġwoman, Ġand, Ġtwo, ...
1    [3.276172161102295, 7.5474724769592285, 4.2324...
dtype: object

In [67]:
fulldf[['words', 'surps']] = fulldf['sentences'].apply(get_word_surp)

In [16]:
fulldf

Unnamed: 0,iteration,num_beam,temperature,top_p,description,words,surps
0,0,1,0.01,1,The image shows a woman and two children stand...,"[The, Ġimage, Ġshows, Ġa, Ġwoman, Ġand, Ġtwo, ...","[3.276172161102295, 7.5474724769592285, 4.2324..."
1,0,1,0.02,1,The image shows a woman and two children stand...,"[The, Ġimage, Ġshows, Ġa, Ġwoman, Ġand, Ġtwo, ...","[3.276172161102295, 7.5474724769592285, 4.2324..."
2,0,1,0.03,1,The image shows a woman and two children stand...,"[The, Ġimage, Ġshows, Ġa, Ġwoman, Ġand, Ġtwo, ...","[3.276172161102295, 7.5474724769592285, 4.2324..."
3,0,1,0.04,1,The image shows a woman and two children stand...,"[The, Ġimage, Ġshows, Ġa, Ġwoman, Ġand, Ġtwo, ...","[3.276172161102295, 7.5474724769592285, 4.2324..."
4,0,1,0.05,1,The image shows a woman and two children stand...,"[The, Ġimage, Ġshows, Ġa, Ġwoman, Ġand, Ġtwo, ...","[3.276172161102295, 7.5474724769592285, 4.2324..."
...,...,...,...,...,...,...,...
95,0,1,0.96,1,The image shows two women and a child in a kit...,"[The, Ġimage, Ġshows, Ġtwo, Ġwomen, Ġand, Ġa, ...","[3.276172161102295, 7.5474724769592285, 4.2324..."
96,0,1,0.97,1,The image shows a woman and two children in a ...,"[The, Ġimage, Ġshows, Ġa, Ġwoman, Ġand, Ġtwo, ...","[3.276172161102295, 7.5474724769592285, 4.2324..."
97,0,1,0.98,1,"A mother is standing at a sink, washing her ha...","[A, Ġmother, Ġis, Ġstanding, Ġat, Ġa, Ġsink, ,...","[3.940398693084717, 6.570438385009766, 3.22463..."
98,0,1,0.99,1,This is a black and white image of a woman and...,"[This, Ġis, Ġa, Ġblack, Ġand, Ġwhite, Ġimage, ...","[4.856124401092529, 1.694906234741211, 1.10080..."


### GPT-4V

In [80]:
# List of descriptions
descriptions_list = [
    "In a bright kitchen setting, a boy stands on a stool, eagerly reaching for a cookie jar located on a high shelf. A girl with outstretched hands appears excited and looks towards the boy. A woman, likely their mother, observes the children while holding a dish near a sink. Various kitchen elements, including open cabinets, dishes, a window, and a puddle of water, complement the scene.",
    "In a kitchen, a girl excitedly outstretches her hands towards a boy who stands on a stool attempting to get a cookie jar from a high shelf. Observing them is a woman standing near a sink, holding a dish. The room has open cabinets, a window showing a garden, and a puddle of water on the floor.",
    "A kitchen scene unfolds with a woman, presumably the mother, standing near a sink with a dish in hand, watching over two children. The boy, standing on a stool, reaches for a cookie jar, while a girl looks on with anticipation. Other elements like open cabinets, dishes, and a window backdrop add depth to the scene.",
    "Within a kitchen, a woman stands by a window that offers a view of a garden. She observes two children: a boy on a stool reaching for a cookie jar and an excited girl with hands outstretched. Nearby, there's a sink, open cabinets, and dishes.",
    "The kitchen atmosphere is evident with a counter that holds various dishes, while in the forefront, a boy on a stool tries accessing a cookie jar. An eager girl and a woman holding a dish complete the trio. Background details include open cabinets and a window with a curtain.",
    "Amidst the bright kitchen, the open cabinets reveal dishes inside. In the scene, a boy on a stool aims for a cookie jar, a girl anticipates his move, and a woman stands by the sink, observing them and holding a dish.",
    "The focal point of this kitchen setting is the sink area where a woman stands, holding a dish and observing two children. The boy, using a stool, stretches for a cookie jar, and a girl eagerly watches. The surrounding environment showcases open cabinets and various kitchen items.",
    "A distinct feature of the kitchen scene is a puddle of water on the floor, close to where a woman stands by the sink. In the same space, a boy on a stool reaches out for a cookie jar, with an enthusiastic girl nearby. Other elements include open cabinets and a window overlooking a garden.",
    "Emphasizing the interaction between the characters, the boy on the stool smiles as he tries accessing a cookie jar, with the girl's eager eyes on him. Meanwhile, a woman, possibly their mother, observes this near the sink, holding a dish. The kitchen scene is enriched with elements like open cabinets, a window, and a puddle on the floor.",
    "The primary setting is a luminous kitchen where the interactions of three characters stand out. A boy, elevated on a stool, goes for a cookie jar, an eager girl stands close, and a woman observes the scene, holding a dish. Surrounding details include a window, open cabinets, dishes, and a water puddle."
]

# Create dataframe
gptdf = pd.DataFrame(descriptions_list, columns=["Description"])


In [81]:
gptdf['id'] = gptdf.index

In [82]:
gptdf['sentences'] = gptdf['Description'].str.split('.')
gptdf

Unnamed: 0,Description,id,sentences
0,"In a bright kitchen setting, a boy stands on a...",0,"[In a bright kitchen setting, a boy stands on ..."
1,"In a kitchen, a girl excitedly outstretches he...",1,"[In a kitchen, a girl excitedly outstretches h..."
2,"A kitchen scene unfolds with a woman, presumab...",2,"[A kitchen scene unfolds with a woman, presuma..."
3,"Within a kitchen, a woman stands by a window t...",3,"[Within a kitchen, a woman stands by a window ..."
4,The kitchen atmosphere is evident with a count...,4,[The kitchen atmosphere is evident with a coun...
5,"Amidst the bright kitchen, the open cabinets r...",5,"[Amidst the bright kitchen, the open cabinets ..."
6,The focal point of this kitchen setting is the...,6,[The focal point of this kitchen setting is th...
7,A distinct feature of the kitchen scene is a p...,7,[A distinct feature of the kitchen scene is a ...
8,Emphasizing the interaction between the charac...,8,[Emphasizing the interaction between the chara...
9,The primary setting is a luminous kitchen wher...,9,[The primary setting is a luminous kitchen whe...


In [83]:
gptdf.iloc[9,:]['sentences']

['The primary setting is a luminous kitchen where the interactions of three characters stand out',
 ' A boy, elevated on a stool, goes for a cookie jar, an eager girl stands close, and a woman observes the scene, holding a dish',
 ' Surrounding details include a window, open cabinets, dishes, and a water puddle',
 '']

In [84]:
gptdf = gptdf.explode('sentences').reset_index(drop=True)
gptdf

Unnamed: 0,Description,id,sentences
0,"In a bright kitchen setting, a boy stands on a...",0,"In a bright kitchen setting, a boy stands on a..."
1,"In a bright kitchen setting, a boy stands on a...",0,A girl with outstretched hands appears excite...
2,"In a bright kitchen setting, a boy stands on a...",0,"A woman, likely their mother, observes the ch..."
3,"In a bright kitchen setting, a boy stands on a...",0,"Various kitchen elements, including open cabi..."
4,"In a bright kitchen setting, a boy stands on a...",0,
5,"In a kitchen, a girl excitedly outstretches he...",1,"In a kitchen, a girl excitedly outstretches he..."
6,"In a kitchen, a girl excitedly outstretches he...",1,Observing them is a woman standing near a sin...
7,"In a kitchen, a girl excitedly outstretches he...",1,"The room has open cabinets, a window showing ..."
8,"In a kitchen, a girl excitedly outstretches he...",1,
9,"A kitchen scene unfolds with a woman, presumab...",2,"A kitchen scene unfolds with a woman, presumab..."


In [85]:
gptdf = gptdf.loc[gptdf['sentences'] != ''].reset_index(drop=True)
gptdf

Unnamed: 0,Description,id,sentences
0,"In a bright kitchen setting, a boy stands on a...",0,"In a bright kitchen setting, a boy stands on a..."
1,"In a bright kitchen setting, a boy stands on a...",0,A girl with outstretched hands appears excite...
2,"In a bright kitchen setting, a boy stands on a...",0,"A woman, likely their mother, observes the ch..."
3,"In a bright kitchen setting, a boy stands on a...",0,"Various kitchen elements, including open cabi..."
4,"In a kitchen, a girl excitedly outstretches he...",1,"In a kitchen, a girl excitedly outstretches he..."
5,"In a kitchen, a girl excitedly outstretches he...",1,Observing them is a woman standing near a sin...
6,"In a kitchen, a girl excitedly outstretches he...",1,"The room has open cabinets, a window showing ..."
7,"A kitchen scene unfolds with a woman, presumab...",2,"A kitchen scene unfolds with a woman, presumab..."
8,"A kitchen scene unfolds with a woman, presumab...",2,"The boy, standing on a stool, reaches for a c..."
9,"A kitchen scene unfolds with a woman, presumab...",2,"Other elements like open cabinets, dishes, an..."


In [86]:
gptdf[['words', 'surps']] = gptdf['sentences'].apply(get_word_surp)

In [87]:
gptdf

Unnamed: 0,Description,id,sentences,words,surps
0,"In a bright kitchen setting, a boy stands on a...",0,"In a bright kitchen setting, a boy stands on a...","[In, Ġa, Ġbright, Ġkitchen, Ġsetting, ,, Ġa, Ġ...","[4.456691265106201, 2.480856418609619, 8.28542..."
1,"In a bright kitchen setting, a boy stands on a...",0,A girl with outstretched hands appears excite...,"[ĠA, Ġgirl, Ġwith, Ġout, stretched, Ġhands, Ġa...","[8.909259796142578, 6.602831840515137, 3.38330..."
2,"In a bright kitchen setting, a boy stands on a...",0,"A woman, likely their mother, observes the ch...","[ĠA, Ġwoman, ,, Ġlikely, Ġtheir, Ġmother, ,, Ġ...","[8.909259796142578, 3.8164854049682617, 4.7108..."
3,"In a bright kitchen setting, a boy stands on a...",0,"Various kitchen elements, including open cabi...","[ĠVarious, Ġkitchen, Ġelements, ,, Ġincluding,...","[15.0089111328125, 11.841216087341309, 7.16316..."
4,"In a kitchen, a girl excitedly outstretches he...",1,"In a kitchen, a girl excitedly outstretches he...","[In, Ġa, Ġkitchen, ,, Ġa, Ġgirl, Ġexcited, ly,...","[4.456691265106201, 2.480856418609619, 10.6579..."
5,"In a kitchen, a girl excitedly outstretches he...",1,Observing them is a woman standing near a sin...,"[ĠObs, erving, Ġthem, Ġis, Ġa, Ġwoman, Ġstandi...","[15.353221893310547, 2.225095272064209, 6.1387..."
6,"In a kitchen, a girl excitedly outstretches he...",1,"The room has open cabinets, a window showing ...","[ĠThe, Ġroom, Ġhas, Ġopen, Ġcabinets, ,, Ġa, Ġ...","[7.687983989715576, 9.02103042602539, 3.302321..."
7,"A kitchen scene unfolds with a woman, presumab...",2,"A kitchen scene unfolds with a woman, presumab...","[A, Ġkitchen, Ġscene, Ġunfolds, Ġwith, Ġa, Ġwo...","[3.940398693084717, 10.463443756103516, 7.2994..."
8,"A kitchen scene unfolds with a woman, presumab...",2,"The boy, standing on a stool, reaches for a c...","[ĠThe, Ġboy, ,, Ġstanding, Ġon, Ġa, Ġstool, ,,...","[7.687983989715576, 8.478435516357422, 2.71497..."
9,"A kitchen scene unfolds with a woman, presumab...",2,"Other elements like open cabinets, dishes, an...","[ĠOther, Ġelements, Ġlike, Ġopen, Ġcabinets, ,...","[12.121082305908203, 8.39760684967041, 6.03748..."


# MISC

In [29]:
sentences = [
    "The cat is on the mat.",
    "The cat is on the hat",
    "The cat is on the pizza",
    "The pizza is on the mat",
    "I told you that the cat is on the mat",
    "I told you the cat is on the mat",
]

df = pd.DataFrame(sentences, columns=['sent'])

In [30]:
df

Unnamed: 0,sent
0,The cat is on the mat.
1,The cat is on the hat
2,The cat is on the pizza
3,The pizza is on the mat
4,I told you that the cat is on the mat
5,I told you the cat is on the mat


In [35]:
get_word_surp(sentences[0])

0                 [The, Ġcat, Ġis, Ġon, Ġthe, Ġmat, .]
1    [3.276172161102295, 9.222101211547852, 2.46293...
dtype: object

In [36]:
df[['words', 'surps']] = df['sent'].apply(get_word_surp)

In [20]:
for word, surp in m.surprise(sentences[0])[0]:
    print(surp)

3.276172161102295
9.222101211547852
2.46293306350708
4.144979476928711
0.9611892700195312
7.2371673583984375
1.3514349460601807


In [37]:
df

Unnamed: 0,sent,words,surps
0,The cat is on the mat.,"[The, Ġcat, Ġis, Ġon, Ġthe, Ġmat, .]","[3.276172161102295, 9.222101211547852, 2.46293..."
1,The cat is on the hat,"[The, Ġcat, Ġis, Ġon, Ġthe, Ġhat]","[3.276172161102295, 9.222100257873535, 2.46292..."
2,The cat is on the pizza,"[The, Ġcat, Ġis, Ġon, Ġthe, Ġpizza]","[3.276172161102295, 9.222100257873535, 2.46292..."
3,The pizza is on the mat,"[The, Ġpizza, Ġis, Ġon, Ġthe, Ġmat]","[3.276172161102295, 10.859604835510254, 3.2117..."
4,I told you that the cat is on the mat,"[I, Ġtold, Ġyou, Ġthat, Ġthe, Ġcat, Ġis, Ġon, ...","[3.9977831840515137, 6.8563690185546875, 0.618..."
5,I told you the cat is on the mat,"[I, Ġtold, Ġyou, Ġthe, Ġcat, Ġis, Ġon, Ġthe, Ġ...","[3.9977831840515137, 6.8563690185546875, 0.618..."
