In [1]:
import itertools
import re
import logging
import math
logging.basicConfig(format='%(message)s', level=logging.ERROR)

In [2]:
class Normalization012_rules_checker:
    
    def __init__(self):
        """
        Prepare rules ...
        """
        self._ei = {"0": Ei("0"), "1": Ei("1"), "2": Ei("2")}
        self._generate_factor_rules()        
        self._compile_rules()
    
    _bad_prefixes_and_correction = (
            ("(00)*02", "0012", 1), # 1.
            ("0010", "122100", 2), # new added rule 29.
            ("00(120R)+10", "1220", 3), # 2.
            ("0012(0R12)*01", "0R21", 4),  # 3.
            ("001221(1R11)*12", "1R22", 5), # 7.fixed rule and rewrite
            ("0012211R(111R)*10", "1100", 6), # 8.
            ("001222", "210012", 7), # 10.
            ("0012(0R12)*00", "0R20", 8), # 12. fixed error
            ("00(120R)*11", "1221", 9), # 13.-- 9
            ("(001221)*00122R", "211R", 10), # 14. --10
            ("(001221)*001R","120R", 11), # 15. * because of rule 9 --11
            ("(001221)+0R","002R", 12), # 16. --12
            ("(001221)+1R2R", "222R", 13), # 17. --13
            ("(001221)+002R2R", "210R", 14), # 19. --14
            ("(001221)*00120R2R", "201R", 15), # 18. --15
            ("00(120R)*122111", "1R11", 16), # 20. -- 16
            ("0012(0R12)*0R2022", "2112", 17), # 21. --17
            ("(00)+1212", "1R02", 18), # 22. --18
            ("0012(0R12)+2020", "2R10", 19), # 23. (can be also + and then rule3)
            ("(00)+1210", "1120", 20), # 24. --20
            ("0012(0R12)*0R2112", "1022", 21), # 25. -- 21
            ("001221(1R11)*0020", "2R10", 22), # rewritten rule 26. -- 22
            ("001221(1R11)*1R2201", "0021", 23), # 27. -- 23
            ("(00)+1202", "0R12", 24), # new added rule 28. !!!! -- 24
            ("(00)+001111", "1R11", 25), # from (011, R11)
            ("(00)+001020", "2R10", 26), # from (012, R00)
            ("00(120R)+202111", "120021", 27), # 1st added rule for 2 pseudopal
            ("(00)+121121", "200211", 28), # 2nd added rule for 2 pseudopal
            ("00(120R)+211020", "220110", 29), # 3rd added rule for 2 pseudopal
            ("001221(1R11)*1R220020", "211200", 30) # 4th rules added for 2 pseudopal
    )
            
    def find_applicable_rule(self, biseq):
        """ Function looking is there is a bad prefix of a bad factor
        inside the preprocessed biseq. If so, it returns the bad prefix position and the
        correction to apply. If not, it returned an emply field (for now)??."""
    
        logging.info("Checking for an applicable rule in" + str((biseq[0::2], biseq[1::2])))
        
        applicable_rule = self._find_next_prefix_rule(biseq)
        if applicable_rule:
            return applicable_rule
        
        # If there is no bad prefix, we look for a factor rule
        applicable_rule = self._find_next_factor_rule(biseq)
        if applicable_rule:
            return applicable_rule  
    
    def _find_next_prefix_rule(self, biseq):
        # Looking for a prefix rule
        for prefix_rule in self._prefix_rules:
            match = re.match(prefix_rule[0], biseq)
            if match:
                logging.debug("prefix rule: " + str(prefix_rule))
                index = match.end() - 2
                return [index, prefix_rule[1]] # bad prefix to repare
    
    def _find_next_factor_rule(self, biseq):
        matches = []
        for rules_index,_rules in self._factor_rules.items():
            for rule in _rules:
                match = re.match(rule, biseq)
                if match:
                    logging.debug("rule" + str(rules_index) + ": " + \
                    str(self._print_factor_rule(rule)) +
                    " in biseq " + str((biseq[0::2], biseq[1::2])))
                    
                    position = match.end() - 2 # The position to be corrected
                    #Index of match and correction:
                    matches.append([position, self._factor_rules_replacement(rules_index, match.group(2))])
                    biseq = biseq[:position + 3]

        logging.info("all non-prefix matches: " + str(matches))
        # Final leftmost factor rule
        if matches:
            final = matches[0]
            for rule in matches[1:]:
                if rule[0] < final[0]:
                    final = rule
            logging.debug("Final change:" + str(final))
            return final 
    
    def _factor_rules_replacement(self, index, rule):
        ei = self._ei
        if index == 1:
            return rule[4]+"R"+ rule[2] + rule[3]
        elif index == 2:
            return rule[4]+rule[1]+rule[2] + "R"
        elif index == 3:
            return (rule[4]+ei[rule[1]][int(rule[3])] 
                    + ei[rule[1]][int(ei[rule[3]][int(rule[2])])] + rule[1])
        elif index == 4:
            return rule[6]+rule[1]+rule[2]+rule[3]+rule[4] + rule[5]
        else:
            logging.error("No replacement rule found")
    
    def _generate_factor_rules(self):
        ei = self._ei
        a_b = [i[0]+i[1] for i in itertools.product('012', repeat = 2)]
        i = ["0", "1", "2"]
        
        # we consider here b as b_2
        self._rules1 = (k[0][0]+ "R" + ei[k[1]][int(k[0][1])]+ k[1] + 
                  k[0][1] + k[1] for k in itertools.product(a_b, i))
        self._rules2 = (k[0][0]+ k[1] + ei[k[1]][int(k[0][1])]+ "R" + 
                  k[0][1] +"R" for k in itertools.product(a_b, i))

        ij = itertools.permutations("012", 2)
        # we consider here b as b_1
        self._rules3 = (k[0][0] + k[1][0] + k[0][1] + 
                   k[1][1] + ei[k[1][1]][int(ei[k[1][0]][int(k[0][1])])] + k[1][0] 
                  for k in itertools.product(a_b, ij))

        ijk = itertools.permutations("012", 3)
        self._rules4 =(k[0][0] + k[1][0] + k[0][1] + k[1][1] + 
                 ei[k[1][1]][int(ei[k[1][0]][int(k[0][1])])] + k[1][2] + 
                    ei[k[1][2]][int(ei[k[1][0]][int(k[0][1])])] + k[1][2]
                 for k in itertools.product(a_b, ijk))
    def _compile_rules(self):
        self._prefix_rules = tuple((re.compile(rule[0]),rule[1],rule[2])
                                   for rule in self._bad_prefixes_and_correction)

        self._factor_rules = {}
        self._factor_rules[1] = tuple( re.compile('^([012R]{2})*('+ rule + ')')
                                      for rule in self._rules1 )
        self._factor_rules[2] = tuple( re.compile('^([012R]{2})*('+ rule + ')')
                                      for rule in self._rules2 )
        self._factor_rules[3] = tuple( re.compile('^([012R]{2})*('+ rule + ')')
                                      for rule in self._rules3 )
        self._factor_rules[4] = tuple( re.compile('^([012R]{2})*('+ rule + ')')
                                      for rule in self._rules4 )
    
    def print_all_factor_rules_readable(self):
        for index in self._factor_rules:
            self._print_factor_rules(index)
            
    def _print_factor_rules(self, index):
        readable_rules = []
        print("Factor rules " + str(index))
        for rule in self._factor_rules[index]:
            readable_rules.append(self._print_factor_rule(rule))
        print(tuple(readable_rules))
                                  
    def _print_factor_rule(self, rule):
        rule_from_regex = rule.pattern.split("*")[1][1:-1]
        return (rule_from_regex[0::2], rule_from_regex[1::2])

In [3]:
class Normalizer012:
    def __init__(self):
        self._rules_checker = Normalization012_rules_checker()
        
    def normalize(self, delta1, theta1):
        """Returns the normalized directive bi-sequence giving the same GPS word
        as (delta1, theta1)"""
        # Changing the letters to be in order 0,1,2
        delta, theta, substitution = self._change_letters_order(delta1, theta1)
        
        # Interleaving delta and theta to get only one sequence from two
        biseq = "".join(d + t for d, t in zip(delta, theta))

        # Normalization of the prefix
        biseq = self._initial_normalization(biseq)

        # The main algorithm:
        applicable_rule = self._rules_checker.find_applicable_rule(biseq)
        while applicable_rule:
            biseq = self._apply_rule(biseq, applicable_rule);
            applicable_rule = self._rules_checker.find_applicable_rule(biseq)

        # Post-processing
        delta, theta = (biseq[0::2], biseq[1::2])
        logging.info("bi-sequence before changing the letters back: (" +\
                         delta + ", " + theta + ")")

        delta, theta = self._change_letters_order_back(delta, theta, substitution)
        
        notchanged = (delta1 == delta) and (theta1 == theta)
        return (delta, theta, notchanged)
    
    # Preprocessing
    @staticmethod
    def _substitute(dic, seq):
        """Substitutes letters in a word according to rules in dic, if there is
        no rule for the letter, keeps the letter."""
        newseq = ""
        for l in seq:
            if l in dic:
                newseq = newseq + dic[l]
            else:
                newseq = newseq + l
        return newseq
    
    @staticmethod
    def _compose_substitutions(subs1, subs2):
        """Composes two substitutions of letter."""
        csub = {}
        for l in ["0", "1", "2"]:
            if l in subs1:
                csub[l] = subs1[l]
                if csub[l] in subs2:
                    csub[l] = subs2[csub[l]]
            elif l in subs2:
                csub[l] = subs2[l]
        return csub

    def _change_letters_order(self, delta, theta):
        """ Change (delta, theta) so that the word obtained is the same as the 
        original one, but the first symbol is 0, the second 1 and the third 2."""
        subs = {}
        subs2 = {"2": "1", "1": "2"}
        if delta[0] != "0":
            subs = {delta[0]: "0", "0": delta[0]}
            delta = self._substitute(subs, delta)
            theta = self._substitute(subs, theta)
        i = 0
        l = len(delta)
        while i < l and delta[i] == "0":
            if theta[i] == "2":
                return [delta, theta, subs]
            if theta[i] == "1":
                delta = self._substitute(subs2, delta)
                theta = self._substitute(subs2, theta)            
                return [delta, theta, compose_substitutions(subs, subs2)]
            #otherwise whe have to continue
            i = i + 1
        if i < l and delta[i] == "2":
            delta = self._substitute(subs2, delta)
            theta = self._substitute(subs2, theta) 
            return [delta, theta, compose_substitutions(subs, subs2)]
        return [delta, theta, subs]

    def _change_letters_order_back(self, delta, theta, subs):
        """ Give back the original delta and theta that were transformed with 
        the substitution subs"""
        backsubs = {v:k for k,v in subs.items()}
        delta = self._substitute(backsubs, delta)
        theta = self._substitute(backsubs, theta)
        return [delta, theta]
    
    @staticmethod
    def _initial_normalization(biseq):
        m = re.match("(0(R|0))+", biseq)
        if m:
            biseq = "00"*int((m.end()-m.start())/2) + biseq[m.end():]
        return biseq
    
    @staticmethod
    def _apply_rule(biseq, rule):
        """ Function that applies the correction 'rule' in the biseq."""
        return biseq[:rule[0]] + rule[1] + biseq[rule[0] + 2:]


In [4]:
def is_eipal(seq, i):
    """Checks if a string seq is an E_i palindrome."""
    ei = Ei(i)
    l = len(seq)
    if l == 1:
        if seq == str(i):
            return True
        else:
            return False
    for x in range(0, math.ceil(l/2)):
        if seq[x] != ei[int(seq[l-1-x])]:
            return False
    return(True)
def is_pal(seq):
    """Checks if a string is a palindrome."""
    l = len(seq)
    if l == 1:
        return(True)
    for x in range(0, l // 2):
        if seq[x] != seq[l - 1 - x]:
            return(False)
    return(True)
def make_pal_closure(seq):
    """Makes palindromic closure from a string."""
    if is_pal(seq) == True:
        return(seq)
    i = 1
    while is_pal(seq[i:]) != True:
        i = i + 1
    logging.debug("{0} longest palindromic suffix: {1}"
                  .format(seq, seq[i:]))
    closure = seq + seq[i - 1::-1]
    return(closure)
def make_eipal_closure (seq, i):
    """Makes E_i-th palindromic closure of a string."""
    ei = Ei(i)
    if is_eipal(seq, i) == True:
        return(seq)
    j = 1
    while is_eipal(seq[j:], i) != True:
        j = j+1
    logging.debug("{0} longest ei-palindromic suffix : {1}"
                  .format(seq,seq[j:]))
    closure = seq
    pref = seq[j-1::-1]
    for letter in pref:
        closure = closure + ei[int(letter)]
    return(closure)
def make012Word(delta, theta, steps, seed = ""):
    """Makes a GPS word over {0,1,2} from sequences delta and theta."""
    w = seed
    for step in range(0,steps):
        w = w + delta[step]
        if theta[step] == "R":
            w = gpc.makePalClosure(w)
        elif theta[step] in ["0", "1", "2"]:
            w = makeEipalClosure(w, theta[step])
        else:
            logging.error("wrong symbol")
            return
        logging.info("w{0} = {1}".format(step+1,w))
    return(w)

In [5]:
class NaiveNormalizer012:
    def normalize(self, delta, theta):
        """Checks if delta and theta are normalized and if not, 
        returns the beginning of the normalized sequence."""
        if len(delta) != len(theta):
            logging.error("The length of delta and theta must be equal.")
            return
                         
        w = ""
        l=1
        prefixes = []
        for step in range(0,len(delta)):
            w = w + delta[step]
            if theta[step] == "R":
                w = make_pal_closure(w)
            elif theta[step] in ["0", "1", "2"]:
                w = make_eipal_closure(w, theta[step])
            else:
                logging.error("wrong symbol")
                return
            prefixes.append(w)
        logging.info("Prefixes from (delta, theta): " + str(prefixes))
        logging.info("Obtained word: " + w)
        
        newdelta = delta[0]
        newtheta = ""
        while l <= len(w):
            prefix = w[:l]
            res = self._test_palindromicity(prefix)
            if res[0] == True:
                logging.info(prefix)
                if l < len(w):
                    newdelta = newdelta + w[l]
                newtheta = newtheta + res[1]           
            l=l+1
        if newdelta == delta and newtheta == theta:
            return (newdelta, newtheta, True)
        else:
            return (newdelta, newtheta, False)
        
    @staticmethod
    def _test_palindromicity(seq):
        """Checks if a seq is an palindrome or and E-palindrome and 
        returns its nature."""
        if is_eipal(seq,0):
            return [True, "0"]
        elif is_eipal(seq, 1):
            return [True, "1"]
        elif is_eipal(seq, 2):
            return [True, "2"]
        elif is_pal(seq):
            return [True, "R"]
        else:
            return [False]

In [6]:
def set_logging(logging_level):
    logging.getLogger().setLevel(logging_level)

def Ei(i):
    i = int(i)
    ei = [0,0,0]
    ei[i] = str(i)
    ei[(i+1)%3] = str((2+i)%3)
    ei[(i+2)%3] = str((1+i)%3)
    return tuple(ei)

In [7]:
delta = "21022101"
theta = "RR021210"
set_logging("INFO")
normalizer = Normalizer012()
normalizer.normalize(delta, theta)

Checking for an applicable rule in('01200121', '0R201012')
Checking for an applicable rule in('010200121', '02R201012')
all non-prefix matches: [[14, '2201']]
Checking for an applicable rule in('0102001201', '02R2010212')
all non-prefix matches: [[18, '1022']]
Checking for an applicable rule in('01020012012', '02R20102102')
all non-prefix matches: []
bi-sequence before changing the letters back: (01020012012, 02R20102102)


('21202210210', '20R02120120', False)

In [8]:
naive_normalizer = NaiveNormalizer012()

In [9]:
naive_normalizer.normalize(delta, theta)

Prefixes from (delta, theta): ['2', '212', '2120121', '212012120201202', '21201212020120220020120201012010', '212012120201202200201202010120101212012112212012120201202', '21201212020120220020120201012010121201211221201212020120201012010011012010121201212020120220020120201012010', '2120121202012022002012020101201012120121122120121202012020101201001101201012120121202012022002012020101201012120121122120121202012020101201001101201012120121']
Obtained word: 2120121202012022002012020101201012120121122120121202012020101201001101201012120121202012022002012020101201012120121122120121202012020101201001101201012120121
2
21
212
2120121
212012120201202
21201212020120220020120201012010
212012120201202200201202010120101212012112212012120201202
2120121202012022002012020101201012120121122120121202012020101201001101201012120121
21201212020120220020120201012010121201211221201212020120201012010011012010121201212020120220020120201012010
2120121202012022002012020101201012120121122120121202012020101201001101

('21202210210', '20R02120120', False)

In [10]:
nrc = Normalization012_rules_checker()
nrc.print_all_factor_rules_readable()

Factor rules 1
(('000', 'R00'), ('020', 'R11'), ('010', 'R22'), ('021', 'R00'), ('011', 'R11'), ('001', 'R22'), ('012', 'R00'), ('002', 'R11'), ('022', 'R22'), ('100', 'R00'), ('120', 'R11'), ('110', 'R22'), ('121', 'R00'), ('111', 'R11'), ('101', 'R22'), ('112', 'R00'), ('102', 'R11'), ('122', 'R22'), ('200', 'R00'), ('220', 'R11'), ('210', 'R22'), ('221', 'R00'), ('211', 'R11'), ('201', 'R22'), ('212', 'R00'), ('202', 'R11'), ('222', 'R22'))
Factor rules 2
(('000', '0RR'), ('020', '1RR'), ('010', '2RR'), ('021', '0RR'), ('011', '1RR'), ('001', '2RR'), ('012', '0RR'), ('002', '1RR'), ('022', '2RR'), ('100', '0RR'), ('120', '1RR'), ('110', '2RR'), ('121', '0RR'), ('111', '1RR'), ('101', '2RR'), ('112', '0RR'), ('102', '1RR'), ('122', '2RR'), ('200', '0RR'), ('220', '1RR'), ('210', '2RR'), ('221', '0RR'), ('211', '1RR'), ('201', '2RR'), ('212', '0RR'), ('202', '1RR'), ('222', '2RR'))
Factor rules 3
(('002', '010'), ('001', '020'), ('001', '101'), ('002', '121'), ('002', '202'), ('001', 