### **IMPORTS**

In [8]:
# %pip install igraph
# %pip install graphviz

In [9]:
import numpy as np
import re
import json
import igraph as ig
from graphviz import Digraph

### **Check Regex Validity**

In [10]:

def is_regex_valid(regex):

	# 1- Check that the characters in regex are within the valid set of characters
	# 2- check that all brackets are closed
	regex_operations = ['|', '(', ')', '[', ']', '.', '?', '*', '+', '-', '\\\\']
	bracket,parenthesis = 0, 0

	for char in regex:
		if not char.isalnum() and char != ' ' and char not in regex_operations:
			return False
		
		if char == '(':
			bracket += 1
		elif char == ')':
			bracket -= 1
		elif char == '[':
			parenthesis += 1
		elif char == ']':
			parenthesis -= 1
	if bracket != 0 or parenthesis != 0:
		return False
	
	return True

def is_regex_validdd(regex):
    try:
        re.compile(regex)
        return True
    except:
        return False
# Test
validity_check = is_regex_valid("[A-Zl]/ko;]")
print(validity_check)

False


### **Change regex to PostFix (Shunt_Yard)**

In [43]:
def regex_to_postfix(regex):
    '''
    Turns regex to Postfix notation using Shunt Yard Algorithm
    Returns: postfix regex
    '''
    # Operators and precedance level: * (kleene star), + (one or more), ? (zero or one), . (concatenation), and | (ORing).
    operators = {'*': 5, '+': 4, '?': 3, '.': 2, '|': 1}
    # Initialize the postfix and stack (temp) strings to empty strings.
    postfix, stack = "", ""
    
    # Insert a concatenation (.) between any two adjacent symbols if there is none exists/bracket
    dotsIndex = []
    i = 0
    while i < len(regex)-1:
        startOps = [')', "*", "+",']']
        endOps = ["*", "+", ".", "|", ")",']']
        if regex[i] =='[':
            while regex[i] !=']':
                i+=1
            dotsIndex.append(i)
        elif regex[i] in startOps and regex[i+1] not in endOps:
            dotsIndex.append(i)
        elif regex[i].isalnum() and (regex[i+1].isalnum() or regex[i+1] == '(' or regex[i+1] == '['):
            dotsIndex.append(i)
        i+=1
    for i in range(len(dotsIndex)):
        regex = regex[:dotsIndex[i] + 1 + i] + \
            '.' + regex[dotsIndex[i] + 1 + i:]


    # Shunt_Yard Algorithm
    for i in range(len(regex)):
        c = regex[i]
        # If we have a parenthesis push till closing parenthesis, pop operators from stack and append them to the output postfix string til the opening parenthesis 
        if c == '(':
            stack = stack + c
        elif c == ')':
            while stack[-1] != '(':
                # place the character at the end of the stack
                postfix = postfix + stack[-1]
                stack = stack[:-1]
            stack = stack[:-1]  # remove the parenthesis 
        elif c =='?':
                postfix = postfix + c
        # If the character is an operator append if higher precedence and push the other one to stack.
        elif c in operators:
            while stack and operators.get(c, 0) <= operators.get(stack[-1], 0):
                postfix, stack = postfix + stack[-1], stack[:-1]
            stack = stack + c

        # Appendf the character is a operand not operator/parenthesis)
        else:
            postfix = postfix + c
    # Pop remaining operators 
    while stack:
        postfix, stack = postfix + stack[-1], stack[:-1]

    return postfix

print(regex_to_postfix("c[a-z]?"))

c[a-z]?..


### **Make a Class for NFA**

In [12]:
# Each state has a list of transitions and epsilon transitions
# We have 2 types of states accepting and non accepting
class State:
	# id = 0 # Smarter than using index in NFA
	def __init__(self, name, start=False, accepting=True, transitions=[], parents=[]):
		# self.id = State.id
		# State.id += 1
		self.name = name
		self.transitions = []
		self.parents = []
		self.accepting = accepting
		self.start = start
	
	def add_transition(self, transition, state):
		self.transitions.append((transition,state))
		state.parents.append(self)
		self.accepting = False

	# For future use -> may delete later
	def get_transitions(self):
		return self.transitions.copy()
	
	def get_parents(self):
		return self.parents.copy()
	
	# Print states override  -> may delete later
	# def __str__(self):
	# 	output = "State: " + str(id(self)) + "\n"
	# 	output += "  Transitions: " + str(self.transitions) + "\n"
	# 	output += ("  Accepting? " + str(self.accepting) + "\n")
	# 	output += ("  Start? " + str(self.start) + "\n")
	# 	return output

In [13]:
# NFA class
	# Consists of
		# 1. Start State
		# 2. Final State			 

	# Operations that can be done on them
		# 1. Concatenation
		# 2. Union
		# 3. Kleene Star
		# 4. Positive Closure

class NFA:
	def __init__(self, start_state=None, accept_state=None, regex=None):
		self.start_state = start_state
		self.accept_state = accept_state
		if not start_state and not accept_state and regex:
			nfa = self.create_nfa(regex)
			self.start_state = nfa.start_state
			self.accept_state = nfa.accept_state

	def create_nfa(self, regex):
		"""
		Converts a regular expression in postfix notation to an NFA.
		"""
		NFAStack, index = [], 0
		regex = iter(enumerate(regex))
		for i,symbol in regex:
			if symbol == '.':  # Concatenation
				# Pop the operands
				nfa2 = NFAStack.pop()
				nfa1 = NFAStack.pop()
				#Concatenate with an epsilon transition
				nfa1.accept_state.add_transition('ϵ', nfa2.start_state)
				# Add to NFA Stack output
				NFAStack.append(NFA(nfa1.start_state, nfa2.accept_state))

			elif symbol == '|':  # Union
				# Pop the operands
				nfa2 = NFAStack.pop()
				nfa1 = NFAStack.pop()
				# we have a start and an end (between which we put the symbols)
				start = State("S"+str(index))
				accepting = State("S"+str(index+1))
				# Add the 2 paths with epsilon transition from start
				start.add_transition('ϵ',nfa1.start_state)
				start.add_transition('ϵ',nfa2.start_state)
				# Add the 2 paths with epsilon transition to the end
				nfa1.accept_state.add_transition('ϵ', accepting)
				nfa2.accept_state.add_transition('ϵ', accepting)
				# Add to NFA Stack output
				NFAStack.append(NFA(start,accepting))
				index+=2

			elif symbol == '*':  # Kleene star
				# Pop the operand
				nfa = NFAStack.pop()
				# we have a start and an end 
				start = State("S"+str(index))
				accepting = State("S"+str(index+1))
				# Add the 2 paths one with symbol and one empty to the end
				start.add_transition('ϵ',nfa1.start_state)
				start.add_transition('ϵ',accepting)
				# Add the 2 paths with epsilon transition to the end and back again to start
				nfa.accept_state.add_transition('ϵ', start)
				nfa.accept_state.add_transition('ϵ', accepting)
				# Add to NFA Stack output
				NFAStack.append(NFA(start,accepting))
				index+=2

			elif symbol == '+':  # Positive closure (A+)
				nfa = NFAStack.pop()  
				# we have a start and an end 
				start = State("S"+str(index))
				accepting = State("S"+str(index+1))
				# Add the path one with symbol 
				start.add_transition('ϵ',nfa.start_state)
				# Add the 2 paths with epsilon transition to the end  and back again to start
				nfa.accept_state.add_transition('ϵ', start)
				nfa.accept_state.add_transition('ϵ', accepting)
				# Add to NFA Stack output
				NFAStack.append(NFA(start,accepting))
				index+=2

			elif symbol == '?':  # Kleene star
				nfa = NFAStack.pop()  
				# we have a start and an end 
				start = State("S"+str(index))
				accepting = State("S"+str(index+1))
				# Add the 2 paths one with symbol and one empty to the end
				start.add_transition('ϵ',nfa1.start_state)
				start.add_transition('ϵ',accepting)
				# Add the path with epsilon transition to the end
				nfa.accept_state.add_transition('ϵ', accepting)
				# Add to NFA Stack output
				NFAStack.append(NFA(start,accepting))
				index+=2

			else:  # character/symbol/number
				if symbol == '[':
					temp ='['
					while symbol!=']':
						i,symbol = next(regex,None)
						temp+=symbol
					start = State("S"+str(index))
					accepting = State("S"+str(index+1))
					# Add an epsilon transition
					start.add_transition(temp,accepting)
					# Add to NFA Stack output
					NFAStack.append(NFA(start,accepting))
					index+=2
				else:
					start = State("S"+str(index))
					accepting = State("S"+str(index+1))
					# Add an epsilon transition
					start.add_transition(symbol,accepting)
					# Add to NFA Stack output
					NFAStack.append(NFA(start,accepting))
					index+=2
		return NFAStack.pop()  # Final NFA
	
	def get_states(self):
		'''
		Return states in the NFA as a list
		'''
		possibleStates, statesList, queue = set(), [], [self.start_state]
		possibleStates.add(self.start_state)
		while queue:
			state = queue.pop(0)
			statesList.append(state)
			for (transition) in state.transitions:
				if transition[1] not in possibleStates:
					possibleStates.add(transition[1])
					queue.append(transition[1])
		return statesList
	
	def get_states_by_sumbols(self, symbol):
		statesList = []
		for state in self.getStates():
			if state.name == symbol:
				statesList.append(state)
		return statesList
	
	def get_symbols(self):
		'''
		Returns the symbols used in states' transitions
		'''
		states = self.getStates()
		symbols = set()
		for state in states:
			for symbol, __ in state.transitions:
				if symbol != 'ϵ':
					symbols.add(symbol)
		return list(symbols)
	

	def to_json(self):
		states={}
		for state in self.get_states:
			stateDictionary = {
                'isTerminatingState': state.accepting,
            }
			for symbol, nextState in state.transitions:
				if symbol not in stateDictionary:
					stateDictionary[symbol] = nextState.name
				else:
					stateDictionary[symbol] += ',' + nextState.name
			states[state.name] = stateDictionary
		
		return {'startingState': self.start_state.name,**states,}

	def get_graph(self, name="fsm", view=False):
		'''
		Return the NFA as a graph
		'''
		nfa = self.get_states()
		g = Digraph(engine='dot')
		for state, transitions in nfa.items():
			if state == 'startingState':
				continue
			if transitions['isTerminatingState']:
				g.node(state, shape='doublecircle')
			else:
				g.node(state, shape='circle')
			
			for symbol, nextState in transitions.items():
				if symbol == 'isTerminatingState':
					continue
				childStates = nextState.split(',')
				for child in childStates:
					g.edge(state, child, label=symbol)
		g.render(name, view=view)
		return g
	
	# # Override print
	# def __str__(self):
	# 	return self.get_states()
	




### **Write Result to JSON**

In [14]:
def write_json(nfa, filename = "fsm.json"):
    json_object = json.dumps(nfa, indent = 4) 
    with open(filename, "w") as f:
        json.dump(json_object, f)
    

### **Create Graph**

In [15]:
def display_graph(nfa, filename="fsm.gv"):
    nfa.get_graph(name=filename)
    pass

## **MAIN**

In [16]:
# 1. Get user Input
# regex = input("Enter a regex: ")
regex_1 ='a|b'

# # 2. Check if the regex is valid
if not is_regex_valid(regex_1):
	print("Invalid regex")

# # 3. Turn regex to postfix
postfixRegex = regex_to_postfix(regex_1)
print("postfix regex:", postfixRegex)

# # 4. Turn postfix to NFA
nfa = NFA(regex= postfixRegex)
print("NFA:", nfa.get_states())

# # 5. Write the FSM to a file
write_json(nfa.to_json())

# # 6. Display the NFA as a graph
display_graph(nfa)



postfix regex: ab|
NFA: [<__main__.State object at 0x000001FF9EF98AD0>, <__main__.State object at 0x000001FF9EF1BED0>, <__main__.State object at 0x000001FF9EF98B50>, <__main__.State object at 0x000001FF9ED6CD90>, <__main__.State object at 0x000001FF9EF98BD0>, <__main__.State object at 0x000001FF9EF98E50>]


TypeError: 'method' object is not iterable

In [None]:
# # # 1. Get user Input
# # regex = input("Enter a regex: ")
# regex_2 ='ab(b|c)*d+'

# # # 2. Check if the regex is valid
# if not is_regex_valid(regex_2):
# 	print("Invalid regex")

# # # 3. Turn regex to postfix
# postfixRegex = regex_to_postfix(regex_2)
# print("postfix regex:", postfixRegex)

# # # 4. Turn postfix to NFA
# nfa = NFA(regex= postfixRegex)
# print("NFA:", nfa.get_states())

# # # 5. Write the FSM to a file
# write_json(nfa.get_states())

# # # 6. Display the NFA as a graph
# display_graph(nfa,"fsm2.gv")



In [None]:
# # # 1. Get user Input
# # regex = input("Enter a regex: ")
# regex_2 ='2[a-zdfdc]'

# # # 2. Check if the regex is valid
# if not is_regex_valid(regex_2):
# 	print("Invalid regex")

# # # 3. Turn regex to postfix
# postfixRegex = regex_to_postfix(regex_2)
# print("postfix regex:", postfixRegex)

# # # 4. Turn postfix to NFA
# nfa = NFA(regex= postfixRegex)
# print("NFA:", nfa.get_states())

# # # 5. Write the FSM to a file
# write_json(nfa.get_states())

# # # 6. Display the NFA as a graph
# display_graph(nfa,"fsm2.gv")



postfix regex: 2[a-zdfdc].
NFA: {'startingState': 'S0', 'S0': {'isTerminatingState': False, '2': 'S1'}, 'S1': {'isTerminatingState': False, 'ϵ': 'S2'}, 'S2': {'isTerminatingState': False, '[a-zdfdc]': 'S3'}, 'S3': {'isTerminatingState': True}}
