# OLGA

OLGA is used to simulate repertoires and confounders.

OLGA repository: https://github.com/statbiophys/OLGA/tree/master/olga

Running OLGA from the command line with the default model to simulate TCRbeta:

```
olga-generate_sequences --humanTRB -n 1000 -o rep_1.tsv --seed=1
```

Running OLGA from the command line with the custom model:

```
olga-generate_sequences -n 1000 -o rep_1.tsv --seed=1 --set_custom_model_VDJ ./path_to_model_folder/
```

The custom model is located under ./olga_model_removed_TRBV5_1/.

# immuneML

immuneML is used to load the repertoire and implant signals in the repertoires, to simulate the immune state.


In [None]:
import os
import dagsim.baseDS as ds
import numpy as np
import uuid
from pathlib import Path
from util.load_repertoire import load_olga_repertoire, load_iml_repertoire
from util.implanting import make_immune_signal, make_repertoire_with_signal
from immuneML.util.PathBuilder import PathBuilder
from util.implanting import make_repertoire_without_signal
from util.implanting import make_AIRR_dataset

# os.chdir("./simData")
result_path = PathBuilder.build("./data/implanted/")
signal = make_immune_signal()

In [None]:
def get_state(confounder):
    return np.random.binomial(n=1, p=0.5) if confounder==1 else np.random.binomial(n=1, p=0.2)

def select(location):
    pass

def generate_repertoire(state, confounder, seed):
    uid = str(uuid.uuid4())
    # path = "healthy" if state==1 else "disease"

    PathBuilder.build('./data/olga/')

    if confounder==1:
        os.system("olga-generate_sequences --humanTRB -n 1000 -o ./data/olga/" + uid + ".tsv --seed=" + str(seed))
    else:
        os.system("olga-generate_sequences -n 1000 -o ./data/olga/" + uid + ".tsv --seed=" + str(seed) + " --set_custom_model_VDJ ./olga_model_removed_TRBV5_1")

    rep = load_olga_repertoire(filepath=Path("./data/olga/" + uid + ".tsv"), result_path=Path("./data/Exp1/immuneML_format/"))

    if state==0:
        make_repertoire_without_signal(repertoire=rep, signal_name=signal.id, path=result_path)
    else:
        make_repertoire_with_signal(rep, signal, result_path)

    return uid

In [None]:
Index = ds.Generic(name="index", function=np.arange, size_field="stop")
Confounder = ds.Generic(name="location", function=np.random.choice, arguments={"a": [1,2,3], "p":[0.5, 0.2, 0.3]})
ImmuneState = ds.Generic(name="state", function=get_state, arguments={"confounder": Confounder})
Repertoire = ds.Generic(name="repertoire", function=generate_repertoire, arguments={"state": ImmuneState, "confounder": Confounder, "seed": Index})
Selection = ds.Selection(name="S", function=)

In [None]:
CAIRR_graph = ds.Graph(name="CAIRR_graph_1", list_nodes=[Confounder, ImmuneState, Repertoire])
CAIRR_data = CAIRR_graph.simulate(num_samples=10, csv_name="CAIRR_data_!")

In [None]:
dataset = make_AIRR_dataset(repertoires=[load_iml_repertoire(filepath=Path("./data/Exp1/immuneML_format/repertoires/"), identifier=id) for id in CAIRR_data["repertoire"]],
                            path=Path("./data/exported/"),
                            dataset_name="Exp1", signal_names=[signal.id])

print(f"Dataset id: {dataset.identifier}, dataset name: {dataset.name}, number of repertoires: {dataset.get_example_count()}")