In [38]:
from lark import Lark, Transformer, v_args

import re
import sys

def split_floors(text):     
    return re.split(' to | via ',text)
    
def parse(s):
    floors = split_floors(s)
    rez = []
    err = ""
    for floor in floors:
        tree = None
        try:
            tree = l1.parse(floor)
        except:
            err += f"l1 failed to parse: {floor}\n{sys.exc_info()}\n"
        if tree is None:
            try:
                tree = l2.parse(floor)
            except:    
                err += f"l2 failed to parse: {floor}\n{sys.exc_info()}\n"
        if tree is None:
            continue
        rez.append(TreeToObj().transform(tree))
    return [platform for sublist in rez for platform in sublist], err

In [42]:
class Platform:
    def __init__(self, lines, direction):
        self.lines = lines
        self.direction = direction if direction else Direction("both directions")
    
    def __str__(self):
        return f"<dir {self.direction}, lines {self.lines}>"
    
    def __repr__(self):
        return str(self)
    
class Direction:
    BOTH = ["north", "south"]
    def __init__(self, direction):
        if "both directions" in direction:
            self.direction = self.BOTH
        else:
            self.direction = [re.sub("-bound", "", str(direction))]

    def __str__(self):
        return str(self.direction) 
    
    def __repr__(self):
        return self.direction

class Lines:
    def __init__(self, lines):
        self.lines = [str(line) for line in lines]
        
    def __str__(self):
        return str(self.lines)
    
    def __repr__(self):
        return self.lines 

In [49]:
#(?<=and ).*-(?=bound)
# uptown A/B/C service -> direction line "service"
# A/B/C service in both directions -> line "service" direction
# uptown 6 and E/M service in both directions
#         | /(\w+(?<!and) )*\w+-bound/

service = r"""
    services : (service "and")* service
        
    service : [floor "for"] [lines] "service in both directions"
        | [floor "for"] direction (directionless_service "and")* directionless_service
        
    directionless_service : lines [floor]
        | floor
"""

simple_dir = r"""
    direction : /(uptown|downtown)/i
        | /\w+-bound/
        | /terminal/i
        | /(\w+(?<!and) )*\w+-bound/
"""

complex_dir = simple_dir + r"""
        | /\w.*-bound/
"""

base = r"""
    floor : "terminal"i | "service"i | "platform"i | "mezzanine"i
    
    lines : _line
    
    _line : /([a-zA-Z0-9])/
        | _line "and" _line
        | (_line "/")* _line
    
    %import common.WS
    %ignore WS
"""

l1 = Lark(service + simple_dir + base, start='services')

l2 = Lark(service + complex_dir + base, start='services')

@v_args(inline=True)
class TreeToObj(Transformer):    
    direction = lambda self, x: Direction(str(x))
    line = lambda self, x: str(x)
    lines = lambda self, *x: Lines(x)
    directionless_service = lambda self, *x: [i for i in x if isinstance(i, Lines)]
    
    def service(self, *args):
        lines = []
        direction = None

        for arg in args:
            if isinstance(arg, list):
                for subarg in arg:
                    if isinstance(subarg, Lines):
                        lines.append(subarg)
            elif isinstance(arg, Direction):
                direction = arg
            elif isinstance(arg, Lines):
                lines.append(arg)
        rez = []
        if len(lines):
            for line in lines:
                rez.append(Platform(line, direction))
        else:
            rez.append(Platform(None, direction))
        return rez
    
    def services(self, *x):
        rez = []
        for service in x:
            for platform in service:
                if isinstance(platform, Platform):
                    rez.append(platform)
        return rez


In [51]:
class TestCase:
    def __init__(self, case, expected):
        self.case = case
        self.expected = expected
    
    def check(self, actual, err):
        conv_actual = [str(a) for a in actual]
        
        for e in self.expected:
            if str(e) not in conv_actual:
                print(err)
                raise Exception(f"expected: {e}, to be in {actual}")
                
        if len(actual) != len(self.expected):
            print(err)
            raise Exception(f"expected: {self.expected}, actual: {actual}")
            
        print(f"SUCCESS: {actual}")

def plat(direction=None, lines=None):
    return Platform(
        Lines(lines) if lines else None,
        Direction(direction) if direction else None)
            
tests = [
    TestCase(
        "uptown 6 and E/M service in both directions",
        [plat(direction="uptown", lines=["6"]), plat(direction="both directions", lines=["E", "M"])]
    ),
    TestCase(
        "mezzanine to Manhattan-bound platform",
        [plat(direction="Manhattan")]
    ),
    TestCase(
        "mezzanine to Pelham Bay Parkway-bound platform",
        [plat(direction="Pelham Bay Parkway")]
    ),
    TestCase(
        "mezzanine to uptown a/b/c service",
        [plat(direction="uptown", lines=["a", "b", "c"])]
    ),
    TestCase(
        "Manhattan-bound Platform via mezzanine",
        [plat(direction="Manhattan")]
    ),
    TestCase(
        "mezzanine to service in both directions",
        [plat(direction="both directions")]
    ),
    TestCase(
        "mezzanine to a/b service in both directions",
        [plat(direction="both directions", lines=["a", "b"])]
    ),
    TestCase(
        "mezzanine to a/b and 4 service in both directions",
        [plat(direction="both directions", lines=["a", "b", "4"])]
    ),
    TestCase(
        "mezzanine to terminal platform",
        [plat(direction="terminal")]
    ),
    TestCase(
        "downtown 6 platform to underpass for access to uptown 6 and E/M service in both directions",
        [plat(direction="downtown", lines=["6"]), plat(direction="uptown", lines=["6"]), plat(direction="both directions", lines=["E", "M"])]
    ),
    TestCase(
        "2/3 service in both directions and Manhattan-bound 4 service",
        [plat(direction="Manhattan", lines=["4"]), plat(direction="both directions", lines=["2", "3"])]
    ),
    TestCase(
        "platform for 2/3 service in both directions and test-bound 4 service",
        [plat(direction="test", lines=["4"]), plat(direction="both directions", lines=["2", "3"])]
    ),
    TestCase(
        "mezzanine to platform for Far Rockaway - Mott Av and Rockaway Park - Beach 116 St-bound service",
        [plat(direction="Far Rockaway - Mott Av and Rockaway Park - Beach 116 St")]
    ),
    TestCase(
        "25 St & Lexington Ave (NE corner) to mezzanine for service in both directions",
        [plat(direction="both directions")]
    ),
    TestCase(
        "125 St & St Nicholas Ave (SW corner) to mezzanine for service in both directions",
        [plat(direction="both directions")]
    ),
]

for test in tests:
    print("===========")
    print(test.case)
    rez, err = parse(test.case)
    test.check(rez, err)

uptown 6 and E/M service in both directions
SUCCESS: [<dir ['uptown'], lines ['6']>, <dir ['north', 'south'], lines ['E', 'M']>]
mezzanine to Manhattan-bound platform
SUCCESS: [<dir ['Manhattan'], lines None>]
mezzanine to Pelham Bay Parkway-bound platform
SUCCESS: [<dir ['Pelham Bay Parkway'], lines None>]
mezzanine to uptown a/b/c service
SUCCESS: [<dir ['uptown'], lines ['a', 'b', 'c']>]
Manhattan-bound Platform via mezzanine
SUCCESS: [<dir ['Manhattan'], lines None>]
mezzanine to service in both directions
SUCCESS: [<dir ['north', 'south'], lines None>]
mezzanine to a/b service in both directions
SUCCESS: [<dir ['north', 'south'], lines ['a', 'b']>]
mezzanine to a/b and 4 service in both directions
SUCCESS: [<dir ['north', 'south'], lines ['a', 'b', '4']>]
mezzanine to terminal platform
SUCCESS: [<dir ['terminal'], lines None>]
downtown 6 platform to underpass for access to uptown 6 and E/M service in both directions
SUCCESS: [<dir ['downtown'], lines ['6']>, <dir ['uptown'], lines