In [None]:
# Uncomment these lines if you have not installed any packages yet
# !pip install vislearnlabpy
# !pip install git+https://github.com/openai/CLIP.git
# !pip install torch torchvision torchaudio

To open in Google CoLab: https://colab.research.google.com/github/vislearnlab/vllpy/blob/main/examples/drawings/embedding_analysis.ipynb

In [None]:
from vislearnlabpy.embeddings.generate_embeddings import EmbeddingGenerator
from vislearnlabpy.embeddings.embedding_store import EmbeddingStore
from vislearnlabpy.embeddings.utils import display_search_results, zscore_embeddings 
import numpy as np
import pandas as pd
import os
from pathlib import Path

In [None]:
SAVE_DIR = Path(os.getcwd()) / "mongo_output" # often set to /Volumes/vislearnlab/experiments/drawings/data..
drawings_df = pd.read_csv(Path(SAVE_DIR / "AllDescriptives_images_final_birch_run_v1.csv"))

In [None]:
#make sure the filename field is pointing to the right location, this is very important. Otherwise you will have to do some string manipulation to make sure it is
drawings_df.head() 

In [8]:
def remap_path(full_path, new_base):
    """
    Given a full path to a file and a new base directory, returns a new path
    that keeps only the parent and grandparent directories of the file, prepended by new_base.

    Example:
        Input:
            full_path = ".../mongo_output/sketches_full_dataset/square/image.png"
            new_base = "/mnt/data/output"

        Output:
            Path("/mnt/data/output/square/image.png")
    """
    original = Path(full_path)
    parent = original.parent.name            # e.g. "a_shark"
    
    return str(Path(new_base) / parent / original.name)

In [37]:
# renaming columns in the format our embedding generator expects them to be in
drawings_df = drawings_df.rename(columns={
    'filename': 'image1',
    'category': 'text1'
})
# getting rid of articles
drawings_df['text1'] = drawings_df['text1'].apply(lambda x: x.split('_')[-1])

#remap path if needed: new_base="/file/storage/path"
#drawings_df["image1"] = drawings_df["image1"].apply(lambda x: remap_path(x, new_base))

# Filtering to just our actual participants
filtered_df = drawings_df[drawings_df['participantID'].str.lower().str.contains('bd')]
filtered_df.to_csv("tmp_draw_df.csv")

Our embedding generator

In [35]:
from vislearnlabpy.embedding.stimuli_loader import ImgExtractionSettings, ImageExtractor
clip_extraction_settings = ImgExtractionSettings(
        resize_dim=224,
        apply_content_crop=True,
        apply_center_crop=False,
        use_thumbnail=False,
        background_threshold=1, # remove any small background artifacts that are size 1 or less
        filter_edge_artifacts=False, 
        normalize_stroke_thickness=False
)
clip_transforms = ImageExtractor.get_transformations(clip_extraction_settings)
# change device="cuda" or "cuda:2" etc if you are using with GPU
clip_generator = EmbeddingGenerator(model_type="clip", device="cpu", output_type="doc", transform=clip_transforms) 
# setting text prompt to "a drawing of a xx" will make sure CLIP knows it's looking at drawings
clip_generator.model.text_prompt = "a drawing of a"

We're storing our embeddings in DocArray lists so that they are accessible and loadable from a single file and are directly linked to their file paths and category names

In [None]:
clip_generator.generate_image_embeddings(output_path="sketch_embeddings", input_csv="tmp_draw_df.csv", batch_size=100, id_column="image1", overwrite=True)

In [None]:
clip_generator.output_type = "csv"
clip_generator.generate_image_embeddings(output_path="sketch_embeddings", input_csv="tmp_draw_df.csv", batch_size=100, id_column="image1", overwrite=True)

Loading our image embeddings and text embeddings

In [None]:
image_embedding_store = EmbeddingStore.from_doc("sketch_embeddings/image_embeddings/clip_image_embeddings_doc.docs")
text_embedding_store = EmbeddingStore.from_doc("sketch_embeddings/text_embeddings/clip_text_embeddings_doc.docs")

In [None]:
text_embedding_store.EmbeddingList[5]

An RDM at the category level.

In [None]:
image_embedding_store.compute_text_rdm(output_path="rdm")

Just sanity checking that our text embeddings look like we expect them to.

In [None]:
text_embedding_store.EmbeddingList.text

Search through our embedding store for 'sharks' 

In [None]:
docs, scores = image_embedding_store.search_store(text_query="shark", limit=10)
display_search_results(docs, scores)

Now only using drawings that were originally labeled as shark

In [None]:
docs, scores = image_embedding_store.search_store(text_query="penguin", limit=10, categories=["penguin"])
display_search_results(docs, scores)

In [44]:
a = image_embedding_store.EmbeddingList[120]

In [None]:
from vislearnlabpy.embeddings.similarity_utils import calculate_accuracy
calculate_accuracy(
        a.embedding,
        text_embedding_store.EmbeddingList,
        a.text
    )

Now see how recognizable different images are

In [None]:
from vislearnlabpy.embeddings.similarity_utils import calculate_accuracy
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict

records = []
for doc in image_embedding_store.EmbeddingList:
    acc = calculate_accuracy(
        doc.embedding,
        text_embedding_store.EmbeddingList,
        doc.text
    )
    records.append(
        {
            "category": doc.text,
            "accuracy": acc,
            "url": doc.url,          # can calculate age etc. here from ID extracted from URL
        }
    )
df = pd.DataFrame(records)
mean_df = (
    df.groupby("category", as_index=False)["accuracy"]
      .mean()
      .query("accuracy > 0")          # keep positives only
      .sort_values("accuracy")        # ascending for barh order
)
plt.figure(figsize=(8, max(4, len(mean_df) * 0.25)))
plt.barh(mean_df["category"], mean_df["accuracy"]) # horizontal for readable labels
plt.xlabel("Mean recognizability (probability)")
plt.title("Average recognizability per category")
plt.tight_layout()
plt.show()

Save embeddings and recognizability values as CSV files for processing in R etc.

In [42]:
text_embedding_store.to_base_csv("sketch_embeddings/text_embeddings/clip_text_embeddings.csv")
image_embedding_store.to_base_csv("sketch_embeddings/image_embeddings/clip_image_embeddings.csv")
df.to_csv("recognizability.csv")

Z-score embeddings before saving if needed

In [43]:
image_embedding_store.EmbeddingList.normalized_embedding = zscore_embeddings(np.stack(image_embedding_store.EmbeddingList.embedding))
# image_embedding_store.EmbeddingList.embedding = image_embedding_store.EmbeddingList.normalized_embedding
# image_embedding_store.to_base_csv("sketch_embeddings/image_embeddings/clip_image_embeddings.csv")

In [3]:
from vislearnlabpy.embeddings.similarity_generator import SimilarityGenerator
sim_generator = SimilarityGenerator(similarity_type="cosine", model=image_embedding_store.FeatureGenerator.model)


In [None]:
sim_generator.cross_sims(image_embedding_store.EmbeddingList, text_embedding_store.EmbeddingList, output_csv="similarity_matrix.csv")