<a href="https://colab.research.google.com/github/zephyris/tryptag/blob/main/examples/analyse_list.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Install tryptag module

#@markdown Install the `tryptag` module using `pip`.

!pip install git+https://github.com/zephyris/tryptag

In [None]:
#@title Setup tryptag

#@markdown The `tryptag` module handles all data retrieval.
#@markdown `tryptools` provides tools for image analysis of trypanosome cells.

# import and set up TryTag instance
from tryptag import TrypTag
from tryptag import tryptools
tryptag = TrypTag()

# define a function to print worklists nicely
def prettyprint_worklist(worklist, max_shown=15):
    print(len(worklist), "worklist entries")
    for entry in worklist[:max_shown]:
        print(f"{entry.gene_id} {entry.terminus}: {entry.localisation}")
        #print(worklist[i].gene_id, worklist[i].terminus, ":", loc_to_str(tryptag.gene_list[tryptag.life_stages[0]][worklist[i].gene_id][worklist[i].terminus]["loc"]))
    if len(worklist) > max_shown:
        print(" ... and", len(worklist) - max_shown, "more")

# define a function to print results nicely
def prettyprint_results(results, max_shown=15):
    print(len(results), "results")
    for entry in results[:max_shown]:
        cell_line = entry["cell_line"]
        print(f"{cell_line.gene_id} {cell_line.terminus}:", entry["result"])
    if len(results) > max_shown:
        print(" ... and", len(results) - max_shown, "more")

In [None]:
#@title Set up a worklist

#@markdown A worklist is a list `CellLine` objects to analyse, recording `life_stage`, `gene_id` and `terminus` for each.

#@markdown `tryptag` provides some useful built-in worklists. `worklist_parental`, four replicates of the untagged parental cell line, and `worklist_all`, all `gene_id`/`terminus` combinations with image data.

#@markdown The `localisation_search` function also returns list of `CellLine` objects in the format for use as a worklist.

worklist_type = "worklist_parental" #@param ["localisation_search", "worklist_all", "worklist_parental"]

#@markdown If you're trying out this notebook, you probably want to use the `worklist_parental` worklist to avoid downloading too much data.

if worklist_type == "worklist_parental":
    print("Using parental worklist")
    worklist = tryptag.worklist_parental()
elif worklist_type == "worklist_all":
    print("Using all worklist")
    worklist = tryptag.worklist_all()
elif worklist_type == "localisation_search":
    print("Using example localisation search worklist")
    print("Searching for `lipid droplets`")
    worklist = tryptag.localisation_search("lipid droplet")

prettyprint_worklist(worklist)

In [None]:
#@title A simple multithreaded analysis

#@markdown A large scale analysis can use the `tryptag` built-in parallel processes or thread handling in the `analyse_list` function.
#@markdown It requires a user-defined analysis function that takes `tryptag` (`TrypTag` object), `cell_line` (`CellLine` object) as arguments.

#@markdown You can do anything you want to analyse the `cell_line` in this function, though you probably want to loop through fields or cells and analyse them using `tryptools` or custom image analysis.
#@markdown The analysis function should return the result data you want linked with that `cell_line`.

# user-defined analysis function, taking the arguments tryptag, gene_id and terminus
# tryptag is an instance of TrypTag, passed from the parallel processing code
# gene_id and terminus are passed from your worklist, the parallel processing code has already fetched the data for that gene_id/terminus
def analyse(tryptag, cell_line):
    result = {}
    cell_list = tryptag.cell_list(cell_line)
    for cell in cell_list:
        cell_image = tryptag.open_cell(cell_line, cell.field.index, cell.index)
        kn_result = tryptools.cell_kn_analysis(cell_image)
        if kn_result["count_kn"] not in result:
            result[kn_result["count_kn"]] = 0
        result[kn_result["count_kn"]] += 1
    return result

# do the analysis
results = tryptag.analyse_list(worklist, analyse)

#@markdown The result is a list of dicts where every dict has a `result` and a `cell_line` entry.
#@markdown These may be in a different order to the original worklist. 

# print the results
prettyprint_results(results)


In [None]:
#@title Controlling multiprocessing parameters

#@markdown You can control how the parallel processing is handled using `multiprocess_mode`.

#@markdown `process` uses multiple python processes, `thread` uses multiple python threads.
#@markdown `None` uses no parallel processing.
#@markdown Default behaviour (if you do not set `multiprocess_mode`) is to use `process`, which is probably the best solution unless you really know what you're doing.
#@markdown Selecting `None` can help simplify error messages for debugging.

multiprocess_mode = "thread" #@param ["None", "process", "thread"]
if multiprocess_mode == "None":
    multiprocess_mode = None

#@markdown You can control the number of threads/processes using `workers`. Default (if you do not set `workers`) is to use one worker per CPU core.

workers = 4 #@param integer

results = tryptag.analyse_list(worklist, analyse, multiprocess_mode=multiprocess_mode, workers=workers)

# print the results
prettyprint_results(results)