**RI**

This is the subgraph matching algorithm from the field of bioinformation, so the idea is not very complex.

The authors didn't try to reduce the search space of candidate set. Instead, they start from a **optimal search path**, and then search subgraph in the graph data based on it.

At the begining of generating the search path, they select the node with **maximum degree** in the query graph as the starting node(this is reasonable, because nodes with maximum degree usually have least matching node in the data graph, which could reduce the search space.). And then to generate the following search path, they use **three conditions** to measure which node should be the next one in it. These three conditions are based on **how many hops** between current unordered node and node already in the path.

The first one is that, how many nodes in the path now are the neighbors of an unselected node(**one hop**). Since we have to match the node in path one by one, if we use the node with maximum neighbors that are matched, it would reduce next search space.

The second one is that, how many nodes in the path now have at least one neighbor that are neighbor of the unordered node(**two hops**). 

The thrid one is that, how many nodes in the path now don't have neighbor that are neighbor of the unordered node(**three or more hops**).

These three conditions are the relation between current unordered node and nodes in the path. The more closer they are, the more search space they could reduce.

In [1]:
!git clone https://github.com/RapidsAtHKUST/SubgraphMatching.git

Cloning into 'SubgraphMatching'...
remote: Enumerating objects: 278, done.[K
remote: Counting objects: 100% (278/278), done.[K
remote: Compressing objects: 100% (270/270), done.[K
remote: Total 278 (delta 9), reused 260 (delta 3), pack-reused 0[K
Receiving objects: 100% (278/278), 2.51 MiB | 11.95 MiB/s, done.
Resolving deltas: 100% (9/9), done.


In [2]:
from collections import defaultdict

In [3]:
class graph():
  def __init__(self, graphid, node2label, node2degree, edges):
    self.graphid = graphid
    self.node2label = node2label
    self.node2degree = node2degree
    self.edges = edges
    self.candidateset = defaultdict(set)
    self.label2node = defaultdict(set)
    for node in self.node2label:
      self.label2node[self.node2label[node]].add(node)
    self.phi = []
    self.phiparent = {}

  def reset(self):
    self.candidateset = defaultdict(set)
    self.phi = []
    self.phiparent = {}

In [4]:
def get_graph(filepath, filename):
  global qcount
  global gcount

  node2label = {}
  node2degree = {}
  edges = defaultdict(set)
  f = open(filepath, "r", encoding="utf-8")

  _, nodenum, edgenum = f.readline().strip().split()
  for i in range(int(nodenum)):
    _, nodeid, nodelabel, nodedegree = f.readline().strip().split()
    node2label[int(nodeid)] = int(nodelabel)
    node2degree[int(nodeid)] = int(nodedegree)  
  for i in range(int(edgenum)):
    _, node1, node2 = f.readline().strip().split()
    edges[int(node1)].add(int(node2))
    edges[int(node2)].add(int(node1))

  f.close()
  g = graph(filename, node2label, node2degree, edges)

  return g

In [5]:
qcount = 0
gcount = 0

import os
qs = []
qdir = "SubgraphMatching/test/query_graph"
for f in os.listdir(qdir):
  filepath = os.path.join(qdir, f)
  qs.append(get_graph(filepath, f))

gs = []
gdir = "SubgraphMatching/test/data_graph"
for f in os.listdir(gdir):
  filepath = os.path.join(gdir, f)
  gs.append(get_graph(filepath, f))

print(len(qs))
print(len(gs))

f = open("SubgraphMatching/test/expected_output.res", "r", encoding="utf-8")
lines = f.readlines()
f.close()

expects = {}
for line in lines:
  name, times = line.strip().split(":")
  expects[name + ".graph"] = int(times)
print(len(expects))

200
1
200


In [6]:
def RI_MOG(q):
  maxdegree = 0
  maxnode = 0
  visited = set()
  for node in q.node2degree:
    if maxdegree < q.node2degree[node]:
      maxdegree = q.node2degree[node]
      maxnode = node
  visited.add(maxnode)
  q.phi.append(maxnode)
  q.phiparent[maxnode] = -1
  while len(visited) != len(q.node2degree):
    m = len(q.phi)
    um = -1
    urank = (-1, -1, -1)
    for node in q.node2degree:
      if node in visited:
        continue
      v1 = visited & q.edges[node]
      v2 = set()
      for vis in visited:
        for vode in q.node2degree:
          if node == vode or vode in visited:
            continue
          if vode in q.edges[vis] and vode in q.edges[node]:
            v2.add(vis)
            break
      v3 = set()
      for vode in q.node2degree:
        flag = True
        if node == vode or vode in visited:
          continue            
        for vis in visited:
          if vode in q.edges[vis]:
            flag = False
            break
        if flag:
          v3.add(vode)
      rank = (len(v1), len(v2), len(v3))
      if urank <= rank:
        um = node
        urank = rank
    for parent in q.phi:
      if parent in q.edges[um]:
        q.phiparent[um] = parent
        break
    q.phi.append(um)
    visited.add(um)

In [7]:
def RI_EP(q, g, m, i, totalresult): # not equal to the original code
  if i == len(q.phi) + 1:
    totalresult.append(m.copy())
    return 
  result = {}
  u = -1
  for node in q.phi:
    if node not in m:
      u = node
      break

  lc = set()

  if i == 1:
    vs = g.label2node[q.node2label[u]]
    for v in vs:
      if g.node2degree[v] >= q.node2degree[u]:
        lc.add(v)
  else:
    for v in g.edges[m[q.phiparent[u]]]:
      if q.node2label[u] == g.node2label[v] and g.node2degree[v] >= q.node2degree[u]:
        flag = True
        for node in q.phi:
          if node == u:
            break
          if node == q.phiparent[u]:
            continue
          if v == m[node] or (m[node] not in g.edges[v] and (node in q.edges[u] or u in q.edges[node])):
            flag = False
            break
        if flag:
          lc.add(v)

  for node in lc:
    if node not in set(m.values()):
      m[u] = node
      RI_EP(q, g, m, i + 1, totalresult)
      m.pop(u)

In [8]:
queries = {}
for g in gs:
  for q in qs:
    q.reset()
    RI_MOG(q)
    totalresult = []
    RI_EP(q, g, {}, 1, totalresult)
    queries[q.graphid] = len(totalresult)

print(queries)

{'query_dense_16_116.graph': 1, 'query_dense_16_142.graph': 16, 'query_dense_16_63.graph': 44, 'query_dense_16_28.graph': 5, 'query_dense_16_92.graph': 18, 'query_dense_16_181.graph': 8, 'query_dense_16_76.graph': 41, 'query_dense_16_87.graph': 12, 'query_dense_16_98.graph': 8, 'query_dense_16_9.graph': 42, 'query_dense_16_23.graph': 6, 'query_dense_16_186.graph': 10, 'query_dense_16_47.graph': 16, 'query_dense_16_88.graph': 12, 'query_dense_16_71.graph': 9, 'query_dense_16_163.graph': 8, 'query_dense_16_80.graph': 13, 'query_dense_16_139.graph': 12, 'query_dense_16_141.graph': 20, 'query_dense_16_119.graph': 8, 'query_dense_16_57.graph': 3, 'query_dense_16_138.graph': 12, 'query_dense_16_140.graph': 8, 'query_dense_16_52.graph': 6, 'query_dense_16_136.graph': 1, 'query_dense_16_156.graph': 6, 'query_dense_16_200.graph': 4, 'query_dense_16_99.graph': 260, 'query_dense_16_13.graph': 12, 'query_dense_16_175.graph': 8, 'query_dense_16_130.graph': 16, 'query_dense_16_4.graph': 6, 'query_de

In [9]:
flag = True
for name in expects:
  if expects[name] != queries[name]:
    print(name)
    flag = False
if flag:
  print("correct")

correct
