#### TODO
- ~~Generate list of marginal components for N-locus haplotype~~
- ~~Generate list of precedence levels for N-locus haplotype~~
- ~~For each precedence level, generate marginal component groupings which cover all N loci, and where the number of loci in each component is at most _Max Loci_~~
- Define data structure for caching of cypher query results from lookup of marginal components
- Define cypher queries for batch lookup of marginal components relevant/related to input HLA

#### combi

A Pythonic package for combinatorics.  See also:

- https://pypi.python.org/pypi/combi/1.1.2
- https://combi.readthedocs.io/en/stable/

In [None]:
from combi import CombSpace, Comb

#### Combinatorics

A module to supplement Python's itertools.  See also:

- https://pypi.python.org/pypi/Combinatorics/
- http://phillipmfeldman.org/Python/combinatorics.html
- [https://en.wikipedia.org/wiki/Partition_(number_theory)](https://en.wikipedia.org/wiki/Partition_(number_theory%29)

To install, download zip file and pip install it.

In [None]:
from Combinatorics import m_way_unordered_combinations, partitions2

#### frozendict

An immutable wrapper around dictionaries.  See also:

- https://pypi.python.org/pypi/frozendict/

In [None]:
from frozendict import frozendict

In [None]:
from collections import OrderedDict

In [None]:
FULL_LOCI = 'ABCQR'

In [None]:
NUM_LOCI = len(FULL_LOCI)

In [None]:
# for each partition of FULL_LOCI, compute all marginal component combinations
for partition in reversed(list(partitions2(NUM_LOCI))):
    print("\n", list(reversed(partition)), "partition combinations:")
    for combination in m_way_unordered_combinations(NUM_LOCI,partition):
        # print('',combination)
        print(' ', [ Comb.coerce(marginal_component, CombSpace(NUM_LOCI,len(marginal_component))).apply(FULL_LOCI) for marginal_component in combination ])

In [None]:
# example CombSpace usage
print([ ''.join(combination) for combination in CombSpace(FULL_LOCI,2) if 'B' in combination ])

In [None]:
class Locus(object):
    """
    (originally imagined as an Enum, but those are reportedly very slow in Python)
    """

    # class variables
    _locus_shortname_dict = OrderedDict({ 'A':'A', 'B':'B', 'C':'C', 'DQB1':'Q', 'DRB1':'R' })
    _locus_singleletter_dict = OrderedDict({ y: x for x, y in _locus_shortname_dict.items() })

    @classmethod
    def all_shortnames(cls):
        return cls._locus_shortname_dict.keys()

    @classmethod
    def all_singleletters(cls):
        return cls._locus_singleletter_dict.keys()

    @classmethod
    def shortname_to_singleletter(cls, locus_shortname: str) -> str:
        return cls._locus_shortname_dict.get(locus_shortname)

    @classmethod
    def singleletter_to_shortname(cls, locus_singleletter: str) -> str:
        return cls._locus_singleletter_dict.get(locus_singleletter)


In [None]:
print("all_shortnames:             ", list(Locus.all_shortnames()))
print("all_singleletters:          ", list(Locus.all_singleletters()))
print("shortname_to_singleletter:  ", [Locus.shortname_to_singleletter(sn) for sn in Locus.all_shortnames()])
print("singleletter_to_shortname:  ", [Locus.singleletter_to_shortname(sl) for sl in Locus.all_singleletters()])

In [None]:
class HaplotypeType(object):
    """
    """

    def __init__(self, locus_singleletters: str):
        """
        Constructor, e.g. ht = HaplotypeType("ABR")
        """
        self.locus_set = frozenset(locus_singleletters)
        self.locus_singleletters = ''.join(sorted(locus_singleletters))


In [None]:
haplotype_type = HaplotypeType("ABR")

In [None]:
print("locus_set:       ", haplotype_type.locus_set)
print("singleletters:   ", haplotype_type.locus_singleletters)
print("isdisjoint(CQ):  ", haplotype_type.locus_set.isdisjoint(HaplotypeType("CQ").locus_set))
print("isdisjoint(QR):  ", haplotype_type.locus_set.isdisjoint(HaplotypeType("QR").locus_set))

In [None]:
class Haplotype(object):
    """
    """

    def __init__(self, tilde_delimited_haplotype: str):
        """
        Constructor, e.g. hk = Haplotype("A*01:01~B*08:01~DRB1*03:01")
        """
        self.locus_shortname_dict = frozendict({k: v for k, v in [allele.split("*") for allele in tilde_delimited_haplotype.split("~")]})
        self.haplotype_type = HaplotypeType(''.join([Locus.shortname_to_singleletter(sn) for sn in self.locus_shortname_dict.keys()]))
        self.canonical_tilde_delimited_haplotype = '~'.join('*'.join([sn, self.locus_shortname_dict.get(sn)]) for sn in sorted(self.locus_shortname_dict.keys()))


In [None]:
haplotype = Haplotype("A*01:01~B*08:01~DRB1*03:01")

In [None]:
print("dict:           ", haplotype.locus_shortname_dict)
print("singleletters:  ", haplotype.haplotype_type.locus_singleletters)
print("haplotype:      ", haplotype.canonical_tilde_delimited_haplotype)

#### To Be Continued ??

For additional thoughts and notes about plan b, see also:

https://github.com/nmdp-bioinformatics/graph-imputation-match/blob/master/neo4j_guide/guide/imputation-plan-b.adoc

https://github.com/nmdp-bioinformatics/graph-imputation-match/blob/master/neo4j_guide/guide/imputation-plan-b-redux.adoc