In [44]:
import numpy as np
import os
import pandas as pd
from pathlib import Path
from scipy.spatial.distance import pdist, squareform
from src.model_loader import ModelLoader
import sys
import tidytcells as tt

if "__project_dir__" not in globals():
    __project_dir__ = Path.cwd().parents[1].resolve()

sys.path.append(__project_dir__)
os.chdir(__project_dir__)

In [40]:
functional_trbvs = pd.DataFrame(
    tt.tcr.query(precision="gene", functionality="F", contains="TRBV"), columns=["TRBV"]
).sort_values(by="TRBV", ignore_index=True)

In [41]:
savedir = Path("v_gene_representations")

if not savedir.is_dir():
    savedir.mkdir()

In [45]:
with open(savedir / "key.txt", "w") as f:
    f.writelines([v_gene + "\n" for v_gene in functional_trbvs["TRBV"]])

In [50]:
models_of_interest = ["BCDRBERT_+ACL_1", "BVCDR3BERT"]

In [51]:
for model in models_of_interest:
    modeldir = savedir / model
    if not modeldir.is_dir():
        modeldir.mkdir()

    model = ModelLoader(
        Path("/home/yutanagano/UCLOneDrive/MBPhD/projects/tcr_embedder/model_saves")
        / model
    )

    embeddings = model.embed(functional_trbvs)
    cdist = squareform(pdist(embeddings))

    np.save(modeldir / "embeddings.npy", embeddings)
    np.save(modeldir / "cdist.npy", cdist)