# Experiment 2: influence of an experimental protocol on the immune state prediction

In [1]:
from util.implanting import make_immune_signal, make_exp_protocol_signal

# make immune state signal:

immune_state_signal = make_immune_signal()

# make experimental protocol signal (two signals for two experimental protocols):

exp_protocol1_signal = make_exp_protocol_signal(protocol_id=1)
exp_protocol2_signal = make_exp_protocol_signal(protocol_id=2)


In [2]:
import os
import dagsim.baseDS as ds
import numpy as np
import uuid
from pathlib import Path
from util.load_repertoire import load_olga_repertoire, load_iml_repertoire
from util.implanting import make_immune_signal, make_repertoire_with_signal
from immuneML.util.PathBuilder import PathBuilder
from util.implanting import make_repertoire_without_signal
from util.implanting import make_AIRR_dataset

# os.chdir("./simData")
result_path = PathBuilder.build("./data/implanted/")

In [3]:
def get_state():
    return np.random.binomial(n=1, p=0.5)

def get_protocol(state):
    return np.random.binomial(n=1, p=0.5) if state==1 else np.random.binomial(n=1, p=0.2)

def generate_repertoire(state, protocol, seed, exp: str):
    uid = str(uuid.uuid4())
    # path = "healthy" if state==1 else "disease"

    PathBuilder.build('./data/olga/')

    os.system("olga-generate_sequences --humanTRB -n 1000 -o ./data/olga/" + uid + ".tsv --seed=" + str(seed))

    rep = load_olga_repertoire(filepath=Path("./data/olga/" + uid + ".tsv"), result_path=Path("./data/" + exp + "/immuneML_format/"))

    if state == 0:
        rep = make_repertoire_without_signal(repertoire=rep, signal_name=immune_state_signal.id, path=result_path)
    else:
        rep = make_repertoire_with_signal(rep, immune_state_signal, result_path)

    if protocol == 0:
        make_repertoire_with_signal(rep, exp_protocol1_signal, result_path)
    else:
        make_repertoire_with_signal(rep, exp_protocol2_signal, result_path)

    return uid

## Exp2a

In [4]:
exp = "Exp2a"

Index = ds.Generic(name="index", function=np.arange, size_field="stop")
ImmuneState = ds.Generic(name="state", function=get_state)
Protocol = ds.Generic(name="protocol", function=get_protocol, arguments={"state": ImmuneState})
Repertoire = ds.Generic(name="repertoire", function=generate_repertoire, arguments={"state": ImmuneState, "protocol": Protocol, "seed": Index, "exp": exp})

In [5]:
CAIRR_graph_a = ds.Graph(name="CAIRR_graph_2a", list_nodes=[Index, Protocol, ImmuneState, Repertoire])
CAIRR_data_a = CAIRR_graph_a.simulate(num_samples=10, csv_name="CAIRR_data_2a")


Starting sequence generation... 
Completed generating all 1000 sequences in 0.10 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.09 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.10 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.10 seconds.


In [6]:
dataset = make_AIRR_dataset(repertoires=[load_iml_repertoire(filepath=Path("./data/" + exp + "/immuneML_format/repertoires/"), identifier=id) for id in CAIRR_data_a["repertoire"]],
                            path=Path("./data/exported/"),
                            dataset_name=exp, signal_names=[immune_state_signal.id])

print(f"Dataset id: {dataset.identifier}, dataset name: {dataset.name}, number of repertoires: {dataset.get_example_count()}")


Dataset id: e05deba6b9d74cde8445bca244f45c2a, dataset name: Exp2a, number of repertoires: 10


## Exp2b

In [7]:
exp = "Exp2b"

Protocol = ds.Generic(name="protocol", function=get_protocol, arguments={"state": None})
Repertoire = ds.Generic(name="repertoire", function=generate_repertoire, arguments={"state": ImmuneState, "protocol": Protocol, "seed": Index, "exp": exp})

In [8]:
CAIRR_graph_b = ds.Graph(name="CAIRR_graph_2b", list_nodes=[Index, Protocol, ImmuneState, Repertoire])
CAIRR_data_b = CAIRR_graph_b.simulate(num_samples=10, csv_name="CAIRR_data_2b")


Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.07 seconds.
Starting sequence generation... 
Completed generating all 1000 sequences in 0.08 seconds.


In [9]:
dataset = make_AIRR_dataset(repertoires=[load_iml_repertoire(filepath=Path("./data/" + exp + "/immuneML_format/repertoires/"), identifier=id) for id in CAIRR_data_b["repertoire"]],
                            path=Path("./data/exported/"),
                            dataset_name=exp, signal_names=[immune_state_signal.id])

print(f"Dataset id: {dataset.identifier}, dataset name: {dataset.name}, number of repertoires: {dataset.get_example_count()}")



Dataset id: 37904b13104e4d7981f53c3b6c3e042a, dataset name: Exp2b, number of repertoires: 10
