# OLGA

OLGA is used to simulate repertoires and confounders. 

OLGA repository: https://github.com/statbiophys/OLGA/tree/master/olga

Running OLGA from the command line with the default model to simulate TCRbeta:

```
olga-generate_sequences --humanTRB -n 1000 -o rep_1.tsv --seed=1
```

Running OLGA from the command line with the custom model:

```
olga-generate_sequences -n 1000 -o rep_1.tsv --seed=1 --set_custom_model_VDJ ./path_to_model_folder/
```

The custom model is located under ./olga_model_removed_TRBV5_1/.

# immuneML 

immuneML is used to load the repertoire and implant signals in the repertoires, to simulate the immune state.



In [None]:
from pathlib import Path
from util.load_repertoire import load_olga_repertoire

repertoire = load_olga_repertoire(filepath=Path("./data/rep1.tsv"), result_path=Path("./data/immuneML_format/"))

print(f"Repertoire id: {repertoire.identifier}\nMetadata: {repertoire.metadata}")

## A new repertoire with a signal

In [None]:
from util.implanting import make_default_signal, make_repertoire_with_signal
from immuneML.util.PathBuilder import PathBuilder

signal = make_default_signal()

result_path = PathBuilder.build("./data/implanted/")

new_repertoire = make_repertoire_with_signal(repertoire, signal, result_path)

print(f"Repertoire id: {new_repertoire.identifier}\nMetadata: {new_repertoire.metadata}")

## A new repertoire with no signal

In [None]:
from util.implanting import make_repertoire_without_signal

new_repertoire_no_signal = make_repertoire_without_signal(repertoire=repertoire, signal_name=signal.id, path=result_path)

print(f"Repertoire id: {new_repertoire_no_signal.identifier}\nMetadata: {new_repertoire_no_signal.metadata}")

## Making a dataset

In [None]:
from util.implanting import make_AIRR_dataset

dataset = make_AIRR_dataset(repertoires=[new_repertoire, new_repertoire_no_signal], path=Path("./data/exporting"), 
                           dataset_name="new_dataset", signal_name=signal.id)

print(dataset.labels)

Exported 

In [None]:
import os
import dagsim.baseDS as ds
import numpy as np
import uuid
from pathlib import Path
from util.load_repertoire import load_olga_repertoire, load_iml_repertoire
from util.implanting import make_default_signal, make_repertoire_with_signal
from immuneML.util.PathBuilder import PathBuilder
from util.implanting import make_repertoire_without_signal
from util.implanting import make_AIRR_dataset

# os.chdir("./simData")
result_path = PathBuilder.build("./data/implanted/")
signal = make_default_signal()

In [None]:
def get_state(confounder):
    return np.random.binomial(n=1, p=0.5) if confounder==1 else np.random.binomial(n=1, p=0.2)

def generate_repertoire(state, confounder):
    uid = str(uuid.uuid4())
    seed = np.random.randint(0,10e5)
    # path = "healthy" if state==1 else "disease"
    
    PathBuilder.build('./data/olga/')
    
    if confounder==1:
        os.system("olga-generate_sequences --humanTRB -n 1000 -o ./data/olga/" + uid + ".tsv --seed=" + str(seed))
    else:
        os.system("olga-generate_sequences -n 1000 -o ./data/olga/" + uid + ".tsv --seed=" + str(seed) + " --set_custom_model_VDJ ./olga_model_removed_TRBV5_1")

    rep = load_olga_repertoire(filepath=Path("./data/olga/" + uid + ".tsv"), result_path=Path("./data/immuneML_format/"))
    
    if state==0:
        make_repertoire_without_signal(repertoire=rep, signal_name=signal.id, path=result_path)
    else:
        make_repertoire_with_signal(rep, signal, result_path)

    return uid

In [None]:
Confounder = ds.Generic(name="sex", function=np.random.binomial, arguments={"n": 1, "p":0.5})
ImmuneState = ds.Generic(name="state", function=get_state, arguments={"confounder": Confounder})
Repertoire = ds.Generic(name="repertoire", function=generate_repertoire, arguments={"state": ImmuneState, "confounder": Confounder})

In [None]:
CAIRR_graph = ds.Graph(name="CAIRR_graph", list_nodes=[Confounder, ImmuneState, Repertoire])
CAIRR_data = CAIRR_graph.simulate(num_samples=10, csv_name="CAIRR_data")

In [None]:
dataset = make_AIRR_dataset(repertoires=[load_iml_repertoire(filepath=Path("./data/immuneML_format/repertoires/"), identifier=id) for id in CAIRR_data["repertoire"]], 
                            path=Path("./data/exported/"),
                            dataset_name="new_dataset", signal_name=signal.id)

print(f"Dataset id: {dataset.identifier}, dataset name: {dataset.name}, number of repertoires: {dataset.get_example_count()}")