In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
# Parameters 
STIMULUS_LABEL_SAVE_STRING = "pieman"

In [9]:


# 1) CSV Path for Continuous Embeddings
input_csv = "/Volumes/Passport/fmriprep/trait_ratings_data/filled_1pieman.csv"

# 2) Read into a DataFrame
df = pd.read_csv(input_csv)

# 3) If “bin_CJ” is the first column, we can just drop it (or set it as index)
#    Here, let’s drop it and keep only the 3072-dimensional embeddings:
emb_cols = [f"dim_{i}" for i in range(3072)]
embeddings = df[emb_cols].values     # shape will be (160, 3072)

# Quick sanity check:
print("Embeddings shape:", embeddings.shape)  # should print (160, 3072)

Embeddings shape: (160, 3072)


In [10]:
# embeddings is (160, 3072).  We’ll get a (160, 160) similarity matrix:
cosine_sim_matrix = cosine_similarity(embeddings, embeddings)

# Check shape:
assert cosine_sim_matrix.shape == (160, 160), "Cosine similarity matrix should be of shape (160, 160)"
print("Cosine‐similarity matrix shape:", cosine_sim_matrix.shape)  # (160, 160)

# Optional: If you prefer a “dissimilarity” RDM, do:
# cosine_dist_matrix = 1.0 - cosine_sim_matrix

Cosine‐similarity matrix shape: (160, 160)


In [11]:
# Save as NumPy .npy
np.save(f"/Volumes/Passport/fmriprep/derivatives/personality_cos_sim_matrix/{STIMULUS_LABEL_SAVE_STRING}_open_ended_cos_sim.npy", cosine_sim_matrix)

# Or save as CSV (if you need a human-readable form)
pd.DataFrame(cosine_sim_matrix).to_csv(f"/Volumes/Passport/fmriprep/derivatives/personality_cos_sim_matrix/{STIMULUS_LABEL_SAVE_STRING}_open_ended_cos_sim.csv", 
                                       index=False)

In [13]:
# Convert into an RDM
cos_open_ended_rdm = 1.0 - cosine_sim_matrix  # Dissimilarity RDM

# Save the RDM as a NumPy .npy file
np.save(f"/Volumes/Passport/fmriprep/derivatives/RDMs_behavior/{STIMULUS_LABEL_SAVE_STRING}_cos_open_ended_RDM.npy", cos_open_ended_rdm)