<a href="https://colab.research.google.com/github/zephyris/discoba_alphafold/blob/main/examples/analyse_list.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Install tryptag module

#@markdown Install the `tryptag` module using `pip`.

!pip install git+https://github.com/zephyris/tryptag

In [None]:
#@title Setup tryptag

#@markdown The `tryptag` module handles all data retrieval.
#@markdown `tryptools` provides tools for image analysis of trypanosome cells.

# import and set up TryTag instance
from tryptag import TrypTag
from tryptag import tryptools
tryptag = TrypTag()

# define a function to print results nicely
def prettyprint_hits(hits, max_shown=15):
  print(len(hits), "hits")
  for i in range(min(len(hits), max_shown)):
    if "result" in hits[i]:
      print(hits[i]["gene_id"], hits[i]["terminus"], ":", hits[i]["result"])
    else:
      print(hits[i]["gene_id"], hits[i]["terminus"], ":", loc_to_str(tryptag.gene_list[hits[i]["gene_id"]][hits[i]["terminus"]]["loc"]))
  if len(hits) > max_shown:
    print(" ... and", len(hits) - max_shown, "more")

# define a function to print localisation objects as nice strings
def loc_to_str(locs):
  loc_strs = []
  for loc in locs:
    str = loc["term"]
    if "modifiers" in loc:
      str = str + "["+", ".join(loc["modifiers"])+"]"
    loc_strs.append(str)
  return ", ".join(loc_strs)

In [None]:
#@title Set up a worklist

#@markdown A worklist is a list of dicts indicating the `gene_id` and `terminus` to analyse, in the form: `{"gene_id": "Tb927.7.1920", "terminus": "n"}`.

#@markdown `tryptag` provides some useful built-in worklists. `worklist_parental`, four replicates of the untagged parental cell line, and `worklist_all`, all `gene_id`/`terminus` combinations with image data.

#@markdown The `localisation_search` function also returns list in the correct format for use as a worklist.

worklist_type = "parental" #@param ["localisation_search", "all", "parental"]

#@markdown If you're trying out this notebook, you probably want to use the `parental` worklist to avoid downloading too much data.

if worklist_type == "parental":
  print("Using parental worklist")
  worklist = tryptag.worklist_parental
elif worklist_type == "all":
  print("Using all worklist")
  worklist = tryptag.worklist_all
elif worklist_type == "localisation_search":
  print("Using example localisation search worklist")
  print("Searching for `lipid droplets`")
  worklist = tryptag.localisation_search("lipid droplet")

prettyprint_hits(worklist)

In [None]:
#@title A simple multithreaded analysis

#@markdown A large scale analysis can use the `tryptag` built-in parallel processes or thread handling in the `analyse_list` function.
#@markdown It requires a user-defined analysis function that takes `tryptag`, `gene_id` and `terminus` as arguments.

#@markdown You can do anything you want to analyse the `gene_id` and `terminus` in this function, though you probably want to loop through fields or cells and analyse them using `tryptools` or custom image analysis.
#@markdown The analysis function should return the result data you want linked with that `gene_id` and `terminus`

# user-defined analysis function, taking the arguments tryptag, gene_id and terminus
# tryptag is an instance of TrypTag, passed from the parallel processing code
# gene_id and terminus are passed from your worklist, the parallel processing code has already fetched the data for that gene_id/terminus
def analyse(tryptag, gene_id, terminus):
  result = []
  for field in range(len(tryptag.gene_list[gene_id][terminus]["cells"])):
    for cell in range(len(tryptag.gene_list[gene_id][terminus]["cells"][field])):
      [pha, mng, dna, pth, dth] = tryptag.open_cell(gene_id, terminus, field, cell)
      result.append(tryptools.cell_morphology_analysis(pth, dth, dna))
  return result

multiprocess_mode = "process" #@param ["None", "process", "thread"]
if multiprocess_mode == "None":
  multiprocess_mode = None

workers = 2 #@param integer

# do the analysis
results = tryptag.analyse_list(tryptag.worklist_parental, analyse, multiprocess_mode=None, workers=workers)

#@markdown The result is a list of dicts where every dict now has a `result` entry in addition to `gene_id` and `terminus`, in the form: `{"gene_id": "Tb927.7.1920", "terminus": "n", "result": "analysis_return_value"}`.
#@markdown These may be in a different order to the original worklist. 

# print the results
prettyprint_hits(results)


In [None]:
#@title Controlling multiprocessing parameters

#@markdown You can control how the parallel processing is handled using `multiprocess_mode`.

#@markdown `process` uses multiple python processes, `thread` uses multiple python threads.
#@markdown `None` uses no parallel processing.
#@markdown Default behaviour (if you do not set `multiprocess_mode`) is to use `process`, which is probably the best solution unless you really know what you're doing.

multiprocess_mode = "process" #@param ["None", "process", "thread"]
if multiprocess_mode == "None":
  multiprocess_mode = None

#@markdown You can control the number of threads/processes using `workers`. Default (if you do not set `workers`) is to use one worker per CPU core.

workers = 2 #@param integer

results = tryptag.analyse_list(tryptag.worklist_parental, analyse, multiprocess_mode=None, workers=workers)

# print the results
prettyprint_hits(results)