### Define Types

In [3]:
import random

# Define types for different parts of a Cypher query
class NodeType:
    def __init__(self, label):
        self.label = label

    def __str__(self):
        return f"({self.label})"

class RelationshipType:
    def __init__(self, from_node, to_node, relation, min_hops=None, max_hops=None):
        self.from_node = from_node
        self.to_node = to_node
        self.relation = relation
        self.min_hops = min_hops
        self.max_hops = max_hops

    def __str__(self):
        # Handling different hop scenarios
        if self.min_hops is not None and self.max_hops is not None:
            hops = f"*{self.min_hops}..{self.max_hops}"
        elif self.min_hops is not None:
            hops = f"*{self.min_hops}.."
        elif self.max_hops is not None:
            hops = f"*..{self.max_hops}"
        else:
            hops = ""
        
        return f"({self.from_node.label})-[:{self.relation}{hops}]->({self.to_node.label})"

class MatchType:
    def __init__(self, match_object):
        self.match_object = match_object
    def __str__(self):
        return f"MATCH {self.match_object}"

class ConditionType: #Next: Specify this
    def __init__(self, condition):
        self.condition = condition

    def __str__(self):
        return f"WHERE {self.condition}"

class ReturnType:
    def __init__(self, return_value):
        self.return_value = return_value

    def __str__(self):
        return f"RETURN {self.return_value}"

# Cypher Query class to handle strongly-typed parts
class CypherQuery:
    def __init__(self, parts=None):
        if parts is None:
            parts = [NodeType('n'), ReturnType('n')]
        self.parts = parts
    
    def __str__(self):
        return " ".join(str(part) for part in self.parts)

    def clone(self):
        return CypherQuery(parts=self.parts.copy())

# Mutation that respects types
# def mutate(query):
#     if len(query.parts) > 1 and random.random() < 0.5:
#         # Simple mutation: Modify a condition or add a new type-specific return
#         index = random.randint(0, len(query.parts) - 1)
#         if isinstance(query.parts[index], ConditionType):
#             query.parts[index] = ConditionType("n.age < 25")
#         elif isinstance(query.parts[index], ReturnType):
#             query.parts[index] = ReturnType("count(n)")
#     return query

# Crossover that respects types
def crossover(query1, query2):
    # Find points that have compatible types
    point1 = random.randint(1, len(query1.parts) - 1)
    point2 = random.randint(1, len(query2.parts) - 1)
    if type(query1.parts[point1]) == type(query2.parts[point2]):
        new_parts1 = query1.parts[:point1] + query2.parts[point2:]
        new_parts2 = query2.parts[:point2] + query1.parts[point1:]
        return CypherQuery(new_parts1), CypherQuery(new_parts2)
    return query1, query2



In [4]:
# Example population initialization
def initialize_population(size):
    node = NodeType('Person')
    condition = ConditionType('p.age > 30')
    ret = ReturnType('p')
    templates = [
        [node, condition, ret],
        [NodeType('Book'), ReturnType('b.title')],
        [node, ReturnType('p.name')]
    ]
    population = []
    for _ in range(size):
        parts = random.choice(templates)
        population.append(CypherQuery(parts=parts.copy()))
    return population

# Simulate generations
population = initialize_population(10)
for _ in range(10):  # Number of generations
    new_population = []
    while len(new_population) < len(population):
        parent1, parent2 = random.sample(population, 2)
        child1, child2 = crossover(parent1.clone(), parent2.clone())
        new_population.append(child1)
        new_population.append(child2)
    population = new_population
    for query in population:
        print(query)


(Person) WHERE p.age > 30 RETURN p
(Book) RETURN b.title
(Person) WHERE p.age > 30 RETURN p
(Person) WHERE p.age > 30 RETURN p
(Book) RETURN p.name
(Person) RETURN b.title
(Book) RETURN b.title
(Book) RETURN b.title
(Person) WHERE p.age > 30 RETURN p.name
(Person) RETURN p
(Person) WHERE p.age > 30 RETURN p
(Person) WHERE p.age > 30 RETURN p
(Book) RETURN p
(Person) RETURN b.title
(Book) RETURN p.name
(Person) WHERE p.age > 30 RETURN b.title
(Book) RETURN b.title
(Person) WHERE p.age > 30 RETURN p.name
(Person) WHERE p.age > 30 RETURN p
(Person) WHERE p.age > 30 RETURN p.name
(Book) RETURN p.name
(Person) WHERE p.age > 30 RETURN p
(Person) RETURN p
(Person) WHERE p.age > 30 RETURN b.title
(Person) WHERE p.age > 30 RETURN b.title
(Book) RETURN p.name
(Person) WHERE p.age > 30 RETURN b.title
(Person) RETURN p
(Person) WHERE p.age > 30 RETURN p
(Book) RETURN b.title
(Book) RETURN b.title
(Person) WHERE p.age > 30 RETURN b.title
(Person) RETURN p
(Person) WHERE p.age > 30 RETURN p
(Person)

In [81]:
import re
import random
import json
import pandas as pd

# Open and load the graph schema json file
with open('schema.json', 'r',encoding='utf-8-sig') as file:
    schema = json.load(file)
    
# Extract nodes and edges from the schema
#labels = [node['labels'][0] for node in schema[0]['nodes']]
relationships = [relationship['type'] for relationship in schema[0]['relationships']]


# Get detailed properties from the csv file
common_names = pd.read_csv('memgraph-query-results-export.csv', index_col=False)
common_names.head()

Unnamed: 0,label,commonName
0,Drug,Basiliximab
1,Drug,Muromonab
2,Drug,Trastuzumab
3,Drug,Rituximab
4,Drug,Ibritumomab tiuxetan


In [69]:
def group_labels(df, label_col, name_col):
    grouped = df.groupby(label_col)[name_col].apply(list).to_dict()
    return grouped

# Applying the function
grouped_names = group_labels(common_names, 'label', 'commonName')



In [78]:
list(grouped_names.keys())

['BiologicalProcess',
 'BodyPart',
 'CellularComponent',
 'Disease',
 'Drug',
 'DrugClass',
 'Gene',
 'MolecularFunction',
 'Pathway',
 'Symptom']

In [139]:
import random


class Node:
    _max_depth = 3  # Default maximum depth

    def __init__(self, value, children=None, depth=0):
        self.value = value
        self.children = children if children is not None else []
        self.depth = depth

    @classmethod
    def set_max_depth(cls, depth):
        cls._max_depth = depth
        
    def __str__(self):
        if not self.children:
            return str(self.value)
        # Exclude parentheses for MATCH node
        if self.value == 'MATCH':
            return f"{self.value} {' '.join(str(child) for child in self.children)}"
        if self.value == '-':  # Directly join relationship parts without extra characters
            return ' '.join(str(child) for child in self.children)
        return f"{self.value}({', '.join(str(child) for child in self.children)})"


# Define labels and properties
labels = list(grouped_names.keys())
property_labels= ["commonName"] #will be generalized later


def depth_control(func):
    def wrapper(depth, *args, **kwargs):
        if depth >= Node._max_depth:
            return None
        return func(depth + 1, *args, **kwargs)
    return wrapper

@depth_control
def add_node(depth):
    """ add a basic node with random label """
    label = random.choice(labels)
    return Node(f"({label.lower()}: {label})", depth=depth)

@depth_control
def add_relationship(depth):
    """ Randomly generate a relationship between two nodes """
    rel_type = random.choice(relationships)
    if depth>=3 and random.random() < 0.5:
        direction = "<-"
    else:
        direction = "->" 
    node1 = add_node(depth)  
    node2 = add_node(depth)  
    relationship = Node(f"{direction} [:{rel_type}] {direction}", depth=depth)
    return Node("-", [node1, relationship, node2], depth=depth)

@depth_control
def add_condition(depth):
    """ Generate a random WHERE condition """
    node_label = random.choice(labels)
    label_lower = node_label.lower()
    property_label = random.choice(property_labels)
    possible_properties = grouped_names[node_label]
    sample_prop_type = possible_properties[0]
    
    value = random.randint() if sample_prop_type is int else random.choice(possible_properties)
    operator = random.choice([">", "<", "=", "<=", ">="]) if sample_prop_type is int else '='
 
    return Node("WHERE", [Node(f"{label_lower}.{property_label} {operator} {value}")],depth=depth)

@depth_control
def add_return(depth, k):
    choices = random.sample(labels, k)
    nodes = [Node(f"{choice.lower()}: {choice}", depth=depth) for choice in choices]
    if nodes:  # Check if the list is not empty
        return Node("RETURN", nodes, depth)
    return None


def generate_random_query():
    depth = 0
    parts = []
    while True:
        part = add_relationship()
        depth += 1
        if part is None:
            break
        parts.append(part)
    if random.random() > 0.5:  # Optionally add a WHERE clause
        parts.append(add_condition())
        depth += 1
    parts.append(add_return())
    return Node("MATCH",parts, depth=depth)

# Set maximum depth dynamically
Node.set_max_depth(2)
# Generate and print some random queries
for _ in range(5):
    print(generate_random_query())


TypeError: wrapper() missing 1 required positional argument: 'depth'

### Current one

In [146]:
import random

class Clause:
    def __init__(self, value, children=None, depth=0):
        self.value = value
        self.children = children if children is not None else []
        self.depth = depth

    def __str__(self):
        if not self.children:
            return str(self.value)
        return f"{self.value} {' '.join(str(child) for child in self.children)}"

class Node:
    _max_depth = 3  # Default maximum depth

    def __init__(self, value, children=None, depth=0):
        self.value = value
        self.children = children if children is not None else []
        self.depth = depth

    @classmethod
    def set_max_depth(cls, depth):
        cls._max_depth = depth
        
    def __str__(self):
        if not self.children:
            return str(self.value)
        if self.value == 'MATCH':
            return f"{self.value} {' '.join(str(child) for child in self.children)}"
        if self.value == '-':  
            return ' '.join(str(child) for child in self.children)
        return f"{self.value}({', '.join(str(child) for child in self.children)})"



# Define labels and properties
labels = list(grouped_names.keys())
property_labels= ["commonName"]

def depth_control(func):
    def wrapper(depth, *args, **kwargs):
        if depth >= Node._max_depth:
            return None
        return func(depth + 1, *args, **kwargs)
    return wrapper

@depth_control
def add_node(depth):
    label = random.choice(labels)
    return Node(f"({label.lower()}: {label})", depth=depth)

@depth_control
def add_relationship(depth):
    """ Randomly generate a relationship between two nodes """
    rel_type = random.choice(relationships)
    if depth>=3 and random.random() < 0.5:
        direction1 = "<-"
        direction2 = "-"
    else:
        direction1 = "-" 
        direction2 = "->"
    node1 = add_node(depth)  
    node2 = add_node(depth)  
    relationship = Node(f"{direction1} [:{rel_type}] {direction2}", depth=depth)
    return Node("-", [relationship], depth=depth)
    # return Clause("-", [relationship], depth=depth)

@depth_control
def add_condition(depth):
    node_label = random.choice(labels)
    label_lower = node_label.lower()
    property_label = random.choice(property_labels)
    possible_properties = grouped_names[node_label]
    sample_prop_type = possible_properties[0]
    
    value = random.randint(20, 50) if isinstance(sample_prop_type, int) else random.choice(possible_properties)
    operator = random.choice([">", "<", "=", "<=", ">="]) if isinstance(sample_prop_type, int) else '='
    return Node("WHERE", [Node(f"{label_lower}.{property_label} {operator} {value}", [], depth)], depth=depth)
    # return Clause("WHERE", [Node(f"{label_lower}.{property_label} {operator} {value}", [], depth)], depth=depth)

def add_return(depth, k):
    choices = random.sample(labels, k)
    nodes = [add_node(depth) for choice in choices if add_node(depth)]
    if nodes:  # Check if the list is not empty
        return Node("RETURN", nodes, depth)
    return None

def alternate_functions(depth, flag):
    if flag:
        return add_node(depth), not flag
    else:
        return add_relationship(depth), not flag

def generate_random_query():
    depth = 0
    parts = []
    flag = True  # Ensure that we start with producing nodes
    return_num = 1 #default returns ONE random label

    # Keep adding nodes and relationships while depth is within limit
    while depth < Node._max_depth-2:
        part, flag = alternate_functions(depth, flag)
        if part is None:  # Break loop if no part can be added
            break
        parts.append(part)
        depth += 1  # Increment depth only when a part is successfully added

    # Optionally add a WHERE clause if depth is still under max_depth
    if depth < Node._max_depth and random.random() > 0.5:
        condition = add_condition(depth)
        if condition:
            parts.append(condition)
            depth += 1

    # Add a RETURN clause if depth is still under max_depth
    if depth < Node._max_depth:
        ret = add_return(depth, return_num)
        if ret:
            parts.append(ret)

    # Create the MATCH node only if there are parts to include
    return Node("MATCH", parts, depth=0) if parts else None
    # return Clause("MATCH", parts) if parts else None

# Set maximum depth dynamically
Node.set_max_depth(7)

# Generate and print some random queries
for _ in range(5):
    print(generate_random_query())


MATCH (drug: Drug) - [:DISEASEASSOCIATESWITHDISEASE] -> (cellularcomponent: CellularComponent) - [:GENEREGULATESGENE] -> (drug: Drug) WHERE(drug.commonName = N-Hydroxy-4-[(4-Methoxylphenyl)Sulfonyl]-2,2-Dimethyl-Hexahydro-1,4-Thiazepine-3(S)-Carboxamide) RETURN((cellularcomponent: CellularComponent))
MATCH (gene: Gene) - [:DISEASELOCALIZESTOANATOMY] -> (cellularcomponent: CellularComponent) - [:GENEHASMOLECULARFUNCTION] -> (drug: Drug) RETURN((biologicalprocess: BiologicalProcess))
MATCH (drug: Drug) - [:GENEASSOCIATESWITHDISEASE] -> (biologicalprocess: BiologicalProcess) <- [:GENEPARTICIPATESINBIOLOGICALPROCESS] - (drugclass: DrugClass) RETURN((molecularfunction: MolecularFunction))
MATCH (molecularfunction: MolecularFunction) - [:GENEHASMOLECULARFUNCTION] -> (molecularfunction: MolecularFunction) <- [:CHEMICALBINDSGENE] - (bodypart: BodyPart) WHERE(molecularfunction.commonName = transmitter-gated ion channel activity) RETURN((symptom: Symptom))
MATCH (biologicalprocess: BiologicalPro

### Retry

In [148]:
import random
class Query:
    _max_depth = 3  # Default maximum depth
    def __init__(self, depth=0):
        self.depth = depth
    
    @classmethod
    def set_max_depth(cls, depth):
        cls._max_depth = depth
    
    def depth_control(func):
        def wrapper(depth, *args, **kwargs):
            if depth >= Node._max_depth:
                return None
            return func(depth + 1, *args, **kwargs)
        return wrapper


class Clause(Query):
    def __init__(self, depth, value, children=None):
        super(Query,self).__init__(depth) 
        self.value = value
        self.children = children if children is not None else []

    def __str__(self):
        if not self.children:
            return str(self.value)
        return f"{self.value} {' '.join(str(child) for child in self.children)}"
    

    
class NodeRel(Query): #responsible for nodes and relationships
    def __init__(self, depth, value, children=None):
        super(Query,self).__init__(depth) 
        self.value = value
        self.children = children if children is not None else []
        
    def __str__(self):
        if not self.children:
            return str(self.value)
        # if self.value == 'MATCH':
        #     return f"{self.value} {' '.join(str(child) for child in self.children)}"
        if self.value == '-':  
            return ' '.join(str(child) for child in self.children)
        return f"{self.value}({', '.join(str(child) for child in self.children)})"
    
    @Query.depth_control
    def add_node(depth):
        label = random.choice(labels)
        return Node(f"({label.lower()}: {label})", depth=depth)
    
    @Query.depth_control
    def add_relationship(depth):
        """ Randomly generate a relationship between two nodes """
        rel_type = random.choice(relationships)
        if depth>=3 and random.random() < 0.5:
            direction1 = "<-"
            direction2 = "-"
        else:
            direction1 = "-" 
            direction2 = "->"
        node1 = add_node(depth)  
        node2 = add_node(depth)  
        relationship = Node(f"{direction1} [:{rel_type}] {direction2}", depth=depth)
        return Node("-", [relationship], depth=depth)
        # return Clause("-", [relationship], depth=depth)

In [None]:
# Define labels and properties
labels = list(grouped_names.keys())
property_labels= ["commonName"]


@Query.depth_control
def add_condition(depth):
    node_label = random.choice(labels)
    label_lower = node_label.lower()
    property_label = random.choice(property_labels)
    possible_properties = grouped_names[node_label]
    sample_prop_type = possible_properties[0]
    
    value = random.randint(20, 50) if isinstance(sample_prop_type, int) else random.choice(possible_properties)
    operator = random.choice([">", "<", "=", "<=", ">="]) if isinstance(sample_prop_type, int) else '='
    return Node("WHERE", [Node(f"{label_lower}.{property_label} {operator} {value}", [], depth)], depth=depth)
    # return Clause("WHERE", [Node(f"{label_lower}.{property_label} {operator} {value}", [], depth)], depth=depth)

def add_return(depth, k):
    choices = random.sample(labels, k)
    nodes = [add_node(depth) for choice in choices if add_node(depth)]
    if nodes:  # Check if the list is not empty
        return Node("RETURN", nodes, depth)
    return None

def alternate_functions(depth, flag):
    if flag:
        return add_node(depth), not flag
    else:
        return add_relationship(depth), not flag

### To-Do
- Redesign logics for relationship type

    currently can think of: 

        -switchers: type and direction; within type, node and relationship

        - to be able to use strongly-typed GP, maybe should not make it multi-typed
            - relationship [] creation fn
            - direction_allowed_after_relationship fn: "-" or "->"
            - direction_allowed_before_relationship fn: only "-" I think
            - same for nodes
        
        - After generating working queries, try to connect to memgraph to filter out workable queries+results returned
        
        - After getting this whole workflow, start designing fitness function and hence GP that optimizes the fitness (diversity, complexity coverage, etc)




In [114]:
# try to alternate between node() and relationship() 
flag = True

def alternate_functions():
    global flag
    if flag:
        add_node()
    else:
        add_relationship()
    flag = not flag

# Example usage:
for _ in range(10):
    alternate_functions()



# def add_direction_before_node(symbol, node):
#     return f"{symbol} {node}"

# def add_direction_after_node(node, symbol):
#     return f"{node} {symbol}"

# def add_direction_before_relationship(symbol, relationship):
#     return f"{symbol} {relationship}"

# def add_direction_after_relationship(relationship, symbol):
#     return f"{relationship} {symbol}"


TypeError: wrapper() missing 1 required positional argument: 'depth'

_________

TypeError: depth_control() missing 1 required positional argument: 'func'

In [151]:
# Test 
def func():
	def inner():
		print(123)
	print(inner) #<function func.<locals>.inner at 内存地址 -->意味着inner无括号是一个func内部的局部函数
	return inner #返回的是一个函数，此时把函数当成变量返回

b1=func() #--> b1是func内部的inner; 且其内存地址也一样 => 这样就可以间接把函数变为全局
b1() #-->可以调用inner


SyntaxError: invalid syntax (1787651148.py, line 12)

In [160]:
class DepthManager:
    max_depth = 3  # You can modify this as needed
    
    def __init__(self):
        self.depth = 0  # Starting depth
    
    def depth_control(self, func):
        def wrapper(*args, **kwargs):
            if self.depth > self.max_depth:
                print("Max depth reached")
                return None
            result = func(*args, **kwargs)
            self.depth += 1  # Increment depth after function call
            return result
        return wrapper
    

manager = DepthManager()

# Use the depth_control decorator from the manager instance
@manager.depth_control
def test_func():
    print("function output")


# Calling the decorated function
test_func()

# Accessing the updated depth
print(f"Current depth after call: {manager.depth}")

function output
Current depth after call: 1
