### **IMPORTS**

In [9]:
%pip install igraph
%pip install graphviz

Note: you may need to restart the kernel to use updated packages.
Collecting graphviz
  Obtaining dependency information for graphviz from https://files.pythonhosted.org/packages/00/be/d59db2d1d52697c6adc9eacaf50e8965b6345cc143f671e1ed068818d5cf/graphviz-0.20.3-py3-none-any.whl.metadata
  Downloading graphviz-0.20.3-py3-none-any.whl.metadata (12 kB)
Downloading graphviz-0.20.3-py3-none-any.whl (47 kB)
   ---------------------------------------- 0.0/47.1 kB ? eta -:--:--
   ----------------- ---------------------- 20.5/47.1 kB 682.7 kB/s eta 0:00:01
   ---------------------------------------- 47.1/47.1 kB 594.9 kB/s eta 0:00:00
Installing collected packages: graphviz
Successfully installed graphviz-0.20.3
Note: you may need to restart the kernel to use updated packages.


In [10]:
import numpy as np
import re
import json
import igraph as ig
from graphviz import Digraph

### **Helper Functions**

In [11]:
def isOperator(c):
    return (not c.isalpha()) and (not c.isdigit())

# Function to get the priority of operators
def getPriority(c):
    if c == '-' or c == '+':
        return 1
    elif c == '*' or c == '/':
        return 2
    elif c == '^':
        return 3
    return 0

### **Check Regex Validity**

In [19]:
## MSH 3ARFA ANHY EL ASAH 3SHANLAW 3AYZ SPECIAL CHARACTER MSLN????

def is_regex_valid(regex):

	# 1- Check that the characters in regex are within the valid set of characters
	# 2- check that all brackets are closed
	regex_operations = ['|', '(', ')', '[', ']', '.', '?', '*', '+', '-', '\\\\']
	bracket,parenthesis = 0, 0

	for char in regex:
		if not char.isalnum() and char != ' ' and char not in regex_operations:
			return False
		
		if char == '(':
			bracket += 1
		elif char == ')':
			bracket -= 1
		elif char == '[':
			parenthesis += 1
		elif char == ']':
			parenthesis -= 1
	if bracket != 0 or parenthesis != 0:
		return False
	
	return True

def is_regex_validdd(regex):
    try:
        re.compile(regex)
        return True
    except:
        return False
# Test
validity_check = is_regex_valid("[A-Zl]/ko;]")
print(validity_check)

False


### **Change regex to PostFix (Shunt_Yard)**

In [29]:
def regex_to_postfix(regex):
        # Operators and precedance level: * (kleene star), + (one or more), ? (zero or one), . (concatenation), and | (ORing).
        operators = {'*': 5, '+': 4, '?': 3, '.': 2, '|': 1}
		# Initialize the postfix and stack (temp) strings to empty strings.
        postfix, stack = "", ""
        # 1. Check for square brakcets of letter as ORs and replace them with |. i.e. Character class
        for i in range(len(regex)):
            char = regex[i]
            if char == '[':
                j = i + 1
                while regex[j] != ']':
                    if regex[j].isalnum() and regex[j + 1].isalnum():
                        regex = regex[:j + 1] + '|' + regex[j + 1:]
                    j += 1

        # Replace all remaining square brackets with parentheses.
        # This is done because parentheses are used to group sub-expressions in regular expressions
        regex = regex.replace('[', '(')
        regex = regex.replace(']', ')')

        # print("postfix: ", regex)

        # Replace ranges in character classes with the individual characters they represent.
        hyphen_count = regex.count('-')
        for i in range(hyphen_count):
            for j in range(len(regex)):
                char = regex[j]
                if char == '-':
                    final = regex[j + 1]
                    first = regex[j - 1]
                    temp_list = ''
                    for k in range(int(ord(final) - ord(first))):
                        temp_list = temp_list + '|'
                        temp = chr(ord(first) + k + 1)
                        temp_list = temp_list + temp
                    regex = regex[0: j] + temp_list + regex[j + 2:]
                    break
        # print("postfix2", regex)
        
        # Insert a concatenation operator (.) between any two adjacent characters if there is no operator between them. OR there is a bracket
        dotIndices = []
        for i in range(len(regex) - 1):
            startOps = [')', "*","+", "*"]
            endOps = ["*", "+", ".", "|", ")"]
            if regex[i] in startOps and regex[i+1] not in endOps:
                dotIndices.append(i)
            elif regex[i].isalnum() and (regex[i+1].isalnum() or regex[i+1] == '('):
                dotIndices.append(i)
        
        for i in range(len(dotIndices)):
            regex = regex[:dotIndices[i] + 1 + i] + '.' + regex[dotIndices[i] + 1 + i:]
        # print("postfix: ", regex)
        

        # Shunt_Yard Algorithm
        for i in range(len(regex)):
            c = regex[i]
            # If the character is an opening parenthesis, push it onto the stack.
            if c == '(':
                stack = stack + c
            # If the character is a closing parenthesis, pop operators off the stack and append them to the postfix string until an opening parenthesis is found & delete the parenthesis
            elif c == ')':
                while stack[-1] != '(':
                    # places the character at the end of the stack in the postfix expression
                    postfix = postfix + stack[-1]
                    # [:-1] denotes up to or including the last character
                    stack = stack[:-1]
                stack = stack[:-1]  # removes the open bracket in the stack

            # If the character is an operator, pop operators off the stack and append them to the postfix string as long as they have higher or equal precedence to the current operator. Then push the current operator onto the stack.
            elif c in operators:
                while stack and operators.get(c, 0) <= operators.get(stack[-1], 0):
                    postfix, stack = postfix + stack[-1], stack[:-1]
                stack = stack + c

            # If the character is a operand (i.e. not an operator or parenthesis), append it to the postfix string.
            else:
                postfix = postfix + c
        # After iterating over all characters of the regular expression, the function pops any remaining operators off the stack and appends them to the postfix string.
        while stack:
            postfix, stack = postfix + stack[-1], stack[:-1]
        # print("postfix: ", regex)

        # Finally, the function returns the postfix notation of the input regular expression.
        return postfix

print(shunt_yard("[a-c]"))

ab|c|


### **Make a Class for NFA**

In [None]:
# Each state has a list of transitions and epsilon transitions
# We have 2 types of states accepting and non accepting
class State:
	id = 0
	def __init__(self):
		self.id = State.id
		State.id += 1
		self.transitions = []
		self.accepting = False
		self.start = False

		self.epsilon_transitions = [] # NO7OTAHAA TRANSITION 3ADY?
	
	# Print states override
	def __str__(self):
		output = "State: " + str(id(self)) + "\n"
		output += "  Transitions: " + str(self.transitions) + "\n"
		output += "  Epsilon transitions: " + str(self.epsilon_transitions) + "\n"
		output += ("  Accepting? " + str(self.accepting) + "\n")
		output += ("  Saart? " + str(self.accepting) + "\n")
		return output

In [None]:
# NFA class
	# Consists of
		# 1. States ( eaach has its transitions and epsilon transitions)
		# 2. Start State
		# 3. Final State			 

	# Operations that can be done on them
		# 1. Concatenation
		# 2. Union
		# 3. Kleene Star
		# 4. Positive Closure

class NFA:
	def __init__(self, start_state = None, final_state = None, states = [] ) -> None:
		self.states = states
		self.start_state = start_state
		self.final_state = final_state

	# msh 3rfa hn7tag dool walla la2
		####################
	def add_state(self, state):
		self.states.append(state)

	def add_transition(self, from_state, to_state, transition): #transition can be epsilon
		from_state.transitions.append((to_state, transition))

	def add_epsilon_transition(self, from_state, to_state):
		from_state.epsilon_transitions.append(to_state)
    	####################
	 
	def base_nfa(self, symbol): #NFA for a single symbol
		if symbol == "EPSILON":
			start_state = State()
			accept_state = State()
			start_state.epsilon_transitions.append(accept_state)
			return NFA(start_state=start_state, final_state=accept_state)
		else:
			start_state = State()
			accept_state = State()
			accept_state.accepting = True
			start_state.transitions.append((accept_state, symbol))
			return NFA(start_state=start_state, final_state=accept_state)

	def concatenate(self, nfa1, nfa2): # NFA for (A.B)
		nfa1.final_state.accepting = False  
		nfa1.final_state.epsilon_transitions.append(nfa2.start_state)
		self.start_state = nfa1.start_state
		self.final_state = nfa2.final_state
		# self.states.extend(set(nfa1.states + nfa2.states))  # Remove duplicates
		return self
	
	def union(self, nfa1, nfa2): # NFA for (A|B)
		# Add new start and accept states
		start_state = State()
		accept_state = State()
		accept_state.accepting = True
		# Add epsilon transitions from new start state to the start states of nfa1 and nfa2
		start_state.epsilon_transitions.append(nfa1.start_state)
		start_state.epsilon_transitions.append(nfa2.start_state)
		# Add epsilon transitions from the final states of nfa1 and nfa2 to the new accept state
		nfa1.final_state.epsilon_transitions.append(accept_state)
		nfa2.final_state.epsilon_transitions.append(accept_state)
		# Set the new start and accept states of the result NFA
		self.start_state = start_state
		self.final_state = accept_state
		# self.states.extend(set(nfa1.states + nfa2.states + [start_state, accept_state])) # Remove duplicates
		return self
	
	def kleene_star(self, nfa): # NFA for (A*)
		# Add new start and accept states
		start_state = State()
		accept_state = State()
		accept_state.accepting = True
		# Add epsilon transitions from new start state to the start state of nfa
		start_state.epsilon_transitions.append(nfa.start_state)
		# Add epsilon transitions from the final state of nfa to the new accept state (for more than one repetition)
		nfa.final_state.epsilon_transitions.append(nfa.start_state)
		# Add epsilon transitions from the final state of nfa to the new accept state (for zero repetition)
		nfa.final_state.epsilon_transitions.append(accept_state)
		self.start_state = start_state
		self.final_state = accept_state
		# self.states.extend(set(nfa.states + [start_state, accept_state])) # Remove duplicates
		return self

	########## NB Positive closure can be done using a kleen star and concatenation

	def thompson(self, regex):
		"""
		Converts a regular expression in postfix notation to an NFA.
		"""
		nfa_stack = []
		for symbol in regex:
			if symbol.isalpha():  # Single character
				print(self.base_nfa(symbol))
				return
				nfa_stack.append(self.base_nfa(symbol))
			elif symbol == '.':  # Concatenation
				nfa2 = nfa_stack.pop()
				nfa1 = nfa_stack.pop()
				nfa_stack.append(self.concatenate(nfa1, nfa2))
			elif symbol == '|':  # Union
				nfa2 = nfa_stack.pop()
				nfa1 = nfa_stack.pop()
				nfa_stack.append(self.union(nfa1, nfa2))
			elif symbol == '*':  # Kleene star
				nfa = nfa_stack.pop()
				nfa_stack.append(self.kleene_star(nfa))
			elif symbol == '+':  # Positive closure (A+)
				nfa = nfa_stack.pop()  # Get the NFA for A
				nfa_stack.append(self.concatenate(nfa, self.kleene_star(nfa)))  # A followed by A*
		return nfa_stack.pop()  # Final NFA
	
	def __str__(self):
		output = "Start state: " + str(self.start_state.id) + "\n" 
		output += "Final state: " + str(self.final_state.id) + "\n" 
		output += "States:\n"
		for state in self.states:
			output += str(state) + "\n"
		return output

nfa = NFA()
nfa.base_nfa("a")
print(nfa)

### **Write Result to JSON**

In [None]:
def write_json(nfa, filename = "fsm.json"):
    json_object = json.dumps(nfa, indent = 4) 
    with open(filename, "w") as f:
        json.dump(json_object, f)
    

### **Create Graph**

In [None]:
def display_graph(nfa, filename):
    pass

## **MAIN**

In [None]:
# 1. Get user Input
regex = input("Enter a regex: ")

# 2. Check if the regex is valid
if not is_regex_valid(regex):
	print("Invalid regex")


# # 3. Turn regex to postfix
postfixRegex = regex_to_postfix(regex)

# # 4. Implement Thompson's Algorithm
# fsm = thompson_algorithm(regex)

# # 5. Write the FSM to a file
# write_FSM(fsm)


Start state: 4
Final state: State: 2079234431184
  Transitions: []
  Epsilon transitions: []
  Accepting: True

States:

None
