<a href="https://colab.research.google.com/github/zephyris/discoba_alphafold/blob/main/examples/localisation_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Install tryptag module

#@markdown Install the `tryptag` module using `pip`.

!pip install git+https://github.com/zephyris/tryptag

In [None]:
#@title Setup tryptag

#@markdown The `tryptag` module handles all data retrieval and provides functions for searching based on localisation.

# import tryptag module and set up TryTag instance
from tryptag import TrypTag
# non-verbose output, only small resources are requested so should be pretty responsive
tryptag = TrypTag(verbose=False)

# define a function to print results nicely
def prettyprint_hits(hits, max_shown=15):
  print(len(hits), "hits")
  for i in range(min(len(hits), max_shown)):
    print(hits[i]["gene_id"], hits[i]["terminus"], ":", loc_to_str(tryptag.gene_list[hits[i]["gene_id"]][hits[i]["terminus"]]["loc"]))
  if len(hits) > max_shown:
    print(" ... and", len(hits) - max_shown, "more")

# define a function to print localisation objects as nice strings
def loc_to_str(locs):
  loc_strs = []
  for loc in locs:
    str = loc["term"]
    if "modifiers" in loc:
      str = str + "["+", ".join(loc["modifiers"])+"]"
    loc_strs.append(str)
  return ", ".join(loc_strs)

In [None]:
#@title Simple search

#@markdown Search for localisations matching the query localisation using the `localisation_search` function.

# a simple search, proteins localising to the nucleus
query = "nucleus" #@param {type:"string"}
print("Query:", query)
print("")
hits = tryptag.localisation_search(query)

# result is a list of hits, in the form {"gene_id": gene_id, "terminus": terminus}
prettyprint_hits(hits)

In [None]:
#@title Advanced search

#@markdown By default, a search matches all localisations which are a child of the query (eg. `nucleoplasm` is a child of `nucleus`) and excludes localisations with a modifier term of `weak` or `<10%`. This can be customised.


# term from the localisation ontology
query = "nucleoplasm" #@param {type:"string"}
# true or false, whether to recurse matches to child structures
match_subterms = False #@param {type:"boolean"}

#@markdown Comma delimited lists of modifiers
# list of modifiers which preclude a hit
exclude_modifiers = "weak, <10%, 25%" #@param {type:"string"}
# list of modifiers which must be present for a hit
required_modifiers = "strong" #@param {type:"string"}

#@markdown If a localisation matching the query has _any_ modifier in the `exclude_modifiers` list then it will not be accepted as a hit.
#@markdown A localisation matching the query must have _all_ of the modifiers in the `required_modifiers` list to be accepted as a hit.

print("Query:", query)
print("Match subterms:", match_subterms)

def split_to_list(str):
  # no entries, return None
  if str == "":
    return None
  # return split str
  if "," in str:
    return [x.strip() for x in str.split(",")]
  # return one entry list containing str
  return [str.strip()]

# parse excluded modifiers into a list
exclude_modifiers = split_to_list(exclude_modifiers)
print("Exclude modifiers:", exclude_modifiers)
required_modifiers = split_to_list(required_modifiers)
print("Include modifiers:", required_modifiers)
print("")

hits = tryptag.localisation_search(query, match_subterms=match_subterms, exclude_modifiers=exclude_modifiers, required_modifiers=required_modifiers)
prettyprint_hits(hits)

In [None]:
#@title Combining search results

#@markdown Complex search strategies can be achieved by combining results from multiple searches.

query_1 = "nucleoplasm" #@param {type:"string"}
query_2 = "cytoplasm" #@param {type:"string"}
combine_strategy = "intersection" #@param ["union", "intersection", "1 minus 2"]

# do the searches
print("Query 1")
hits_1 = tryptag.localisation_search(query_1)
prettyprint_hits(hits_1, max_shown=5)
print("")

print("Query 2")
hits_2 = tryptag.localisation_search(query_2)
prettyprint_hits(hits_2, max_shown=5)
print("")

# define union, intersection and 1 minus 2 functions
def union(hits_1, hits_2):
  hits = hits_1.copy()
  for hit in hits_2:
    if hit not in hits:
      hits.append(hit)
  return hits

def intersection(hits_1, hits_2):
  hits = []
  for hit in hits_1:
    if hit in hits_2:
      hits.append(hit)
  return hits

def minus(hits_1, hits_2):
  hits = []
  for hit in hits_1:
    if hit not in hits_2:
      hits.append(hit)
  return hits

# print the combined hits
if combine_strategy == "union":
  print("Union")
  prettyprint_hits(union(hits_1, hits_2), max_shown=5)

if combine_strategy == "intersection":
  print("Intersection")
  prettyprint_hits(intersection(hits_1, hits_2), max_shown=5)

if combine_strategy == "1minus2":
  print("Query 1 minus query 2")
  prettyprint_hits(minus(hits_1, hits_2), max_shown=5)

In [None]:
#@title Ontology

#@markdown To construct an inteligent query ensure that you check the ontology.

#@markdown In particular, consider the parent terms when carrying out a search where `match_subterms` is true, including the default behaviour.
#@markdown Most hierachies are obvious however some may not be. In particular, small cytoplasmic organelles like `glycosome` and `acidocalcisome` are children of `cytoplasm`.

def prettyprint_ontology(ontology):
  for entry in ontology:
    print(entry)
    print("-" * len(entry))
    if "synonyms" in ontology[entry]: print("Synonyms:", ", ".join(ontology[entry]["synonyms"]))
    if "comment" in ontology[entry]: print("Description:", ontology[entry]["comment"])
    if "ident" in ontology[entry]: print("Identification:", ontology[entry]["ident"])
    if "go" in ontology[entry]: print("GO term:", "http://amigo.geneontology.org/amigo/term/"+ontology[entry]["go"])
    print("Parent hierachy:", " -> ".join(ontology[entry]["parent"]))
    if "children" in ontology[entry]: print("Children:", ", ".join(ontology[entry]["children"]))
    if "examples" in ontology[entry]:
      urls = []
      for example in ontology[entry]["examples"]:
        urls.append("http://tryptag.org/?id="+example["id"])
      print("Examples:", ", ".join(urls))
    print("")

tryptag.fetch_ontologies()
prettyprint_ontology(tryptag.localisation_ontology)