# PCDistModelWrapper demo

In [1]:
import sys
import os
sys.path.append('/home/yutanagano/Projects/tcr_embedder')
os.chdir('/home/yutanagano/Projects/tcr_embedder')

#### Demo prep (this can be ignored)

In [2]:
from src.modules import RandomEmbedder
dummy_model = RandomEmbedder()
state_dict = dummy_model.state_dict()

## 1. Instantiating model

What you need:
- Access to the source code for the appropriate model class (`RandomEmbedder` in this demo)
- A copy of the state_dict of the trained model state (`state_dict` in this notebook, which is just a fake one I generated above. In reality this will need to be loaded from a file via `torch.load`. For an example see the `benchmark.ipynb` notebook.)

In [3]:
from src.modules import RandomEmbedder

model = RandomEmbedder()
model.load_state_dict(state_dict)

<All keys matched successfully>

## 2. Wrap model

What you need:
- Access to the source code for the wrapper class (`PCDistModelWrapper`)
- Access to the source code for the appropriate `Tokeniser` class (`CDR3Tokeniser` in this notebook)

In [4]:
from src.datahandling.tokenisers import CDR3Tokeniser
from src.utils import PCDistModelWrapper

wrapped_model = PCDistModelWrapper(model=model, tokeniser=CDR3Tokeniser())

## 3. `pdist` and `cdist`

In [5]:
import numpy as np

data_1 = {
    'CDR3A': np.array(['AAA', 'AAC', 'ACC']),
    'CDR3B': np.array(['AAA', 'AAA', 'AAA'])
}
data_2 = {
    'CDR3A': np.array(['AAA', 'AAC'])
}

In [6]:
wrapped_model.pdist(sequence_elements=data_1)

array([0.4630851 , 1.0275284 , 0.60230976], dtype=float32)

In [7]:
wrapped_model.cdist(
    sequence_elements_1=data_1,
    sequence_elements_2=data_2
)

array([[0.76536685, 0.87089807],
       [1.1478796 , 1.1057673 ],
       [1.5708961 , 1.435915  ]], dtype=float32)