# OLGA

OLGA is used to simulate repertoires and confounders.

OLGA repository: https://github.com/statbiophys/OLGA/tree/master/olga

Running OLGA from the command line with the default model to simulate TCRbeta:

```
olga-generate_sequences --humanTRB -n 1000 -o rep_1.tsv --seed=1
```

Running OLGA from the command line with the custom model:

```
olga-generate_sequences -n 1000 -o rep_1.tsv --seed=1 --set_custom_model_VDJ ./path_to_model_folder/
```

The custom model is located under ./olga_model_removed_TRBV5_1/.

# immuneML

immuneML is used to load the repertoire and implant signals in the repertoires, to simulate the immune state.


In [1]:
import os
import dagsim.baseDS as ds
import numpy as np
import uuid
from pathlib import Path
from util.load_repertoire import load_olga_repertoire, load_iml_repertoire
from util.implanting import make_immune_signal, make_repertoire_with_signal
from immuneML.util.PathBuilder import PathBuilder
from util.implanting import make_repertoire_without_signal
from util.implanting import make_AIRR_dataset

# os.chdir("./simData")
result_path = PathBuilder.build("./data/implanted/")
signal = make_immune_signal()

In [2]:
def get_state(location, age):
    if location == 0 and age==0:
        return bool(np.random.binomial(1, 0.8))
    elif location == 0 and age==1:
        return bool(np.random.binomial(1, 0.2))
    elif location == 1 and age==0:
        return bool(np.random.binomial(1, 0.3))
    else:
        return bool(np.random.binomial(1, 0.9))


def select(location, age):
    if location == 0 and age==0:
        return bool(np.random.binomial(1, 0.8))
    elif location == 0 and age==1:
        return bool(np.random.binomial(1, 0.2))
    elif location == 1 and age==0:
        return bool(np.random.binomial(1, 0.3))
    else:
        return bool(np.random.binomial(1, 0.9))


def generate_repertoire(state, location, age, seed):
    uid = str(uuid.uuid4())
    # path = "healthy" if state==1 else "disease"

    PathBuilder.build('./data/olga/')

    if age==0:
        num_seq = "1000"
    else:
        num_seq = "800"

    if location==1:
        os.system("olga-generate_sequences --humanTRB -n " + num_seq + " -o ./data/olga/" + uid + ".tsv --seed=" + str(seed))
    else:
        os.system("olga-generate_sequences -n " + num_seq + " -o ./data/olga/" + uid + ".tsv --seed=" + str(seed) + " --set_custom_model_VDJ ./olga_model_removed_TRBV5_1")


    rep = load_olga_repertoire(filepath=Path("./data/olga/" + uid + ".tsv"), result_path=Path("./data/Exp3/immuneML_format/"))

    if state==0:
        make_repertoire_without_signal(repertoire=rep, signal_name=signal.id, path=result_path)
    else:
        make_repertoire_with_signal(rep, signal, result_path)

    return uid

In [3]:
Index = ds.Generic(name="index", function=np.arange, size_field="stop")
Location = ds.Generic(name="location", function=np.random.binomial, arguments={"n": 1, "p":0.5})
Age = ds.Generic(name="age", function=np.random.binomial, arguments={"n": 1, "p":0.5})
ImmuneState = ds.Generic(name="state", function=get_state, arguments={"location": Location, "age": Age})
Repertoire = ds.Generic(name="repertoire", function=generate_repertoire, arguments={"state": ImmuneState, "location": Location, "age": Age, "seed": Index})
Selection = ds.Selection(name="S", function=select, arguments={"location": Location, "age": Age})

In [1]:
CAIRR_graph = ds.Graph(name="CAIRR_graph_3", list_nodes=[Location, Age, ImmuneState, Repertoire, Index, Selection])
CAIRR_data = CAIRR_graph.simulate(num_samples=10, csv_name="CAIRR_data_Exp3_tr")
CAIRR_graph.draw()

NameError: name 'ds' is not defined

In [None]:
dataset = make_AIRR_dataset(repertoires=[load_iml_repertoire(filepath=Path("./data/Exp3/immuneML_format/repertoires/"), identifier=id) for id in CAIRR_data["repertoire"]],
                            path=Path("./data/exported/"),
                            dataset_name="Exp3_tr", signal_names=[signal.id])

print(f"Dataset id: {dataset.identifier}, dataset name: {dataset.name}, number of repertoires: {dataset.get_example_count()}")