**VF2++**

This algorithm is a simple but efficient subgraph matching algorithm, which upgrades from VF, VF2, VF2+.

Unlike most of the subgraph matching algorithm, this algorithm only contains two steps, matching order generation and enumeration procedure. It includes the candidates filter in the enumeration part.

For the matching order generation part, they adopt the classic order, **BFS**. For each level of the BFS tree, the authors sort the nodes based on **the degree and frequency of labels**. Then they arrange all the nodes level by level.

For the enumeration part, they not only just check the candidates of **before nodes** of the current node, but also check the candidates of **following nodes** to further reduce the search space.

In [None]:
!git clone https://github.com/RapidsAtHKUST/SubgraphMatching.git

fatal: destination path 'SubgraphMatching' already exists and is not an empty directory.


In [None]:
from collections import defaultdict

In [None]:
class graph():
  def __init__(self, graphid, node2label, node2degree, edges):
    self.graphid = graphid
    self.node2label = node2label
    self.node2degree = node2degree
    self.edges = edges
    self.candidateset = defaultdict(set)
    self.label2node = defaultdict(set)
    for node in self.node2label:
      self.label2node[self.node2label[node]].add(node)
    self.phi = []
    self.phiparent = {}

  def reset(self):
    self.candidateset = defaultdict(set)
    self.phi = []
    self.phiparent = {}

In [None]:
def get_graph(filepath, filename):
  global qcount
  global gcount

  node2label = {}
  node2degree = {}
  edges = defaultdict(set)
  f = open(filepath, "r", encoding="utf-8")

  _, nodenum, edgenum = f.readline().strip().split()
  for i in range(int(nodenum)):
    _, nodeid, nodelabel, nodedegree = f.readline().strip().split()
    node2label[int(nodeid)] = int(nodelabel)
    node2degree[int(nodeid)] = int(nodedegree)  
  for i in range(int(edgenum)):
    _, node1, node2 = f.readline().strip().split()
    edges[int(node1)].add(int(node2))
    edges[int(node2)].add(int(node1))

  f.close()
  g = graph(filename, node2label, node2degree, edges)

  return g

In [None]:
qcount = 0
gcount = 0

import os
qs = []
qdir = "SubgraphMatching/test/query_graph"
for f in os.listdir(qdir):
  filepath = os.path.join(qdir, f)
  qs.append(get_graph(filepath, f))

gs = []
gdir = "SubgraphMatching/test/data_graph"
for f in os.listdir(gdir):
  filepath = os.path.join(gdir, f)
  gs.append(get_graph(filepath, f))

print(len(qs))
print(len(gs))

f = open("SubgraphMatching/test/expected_output.res", "r", encoding="utf-8")
lines = f.readlines()
f.close()

expects = {}
for line in lines:
  name, times = line.strip().split(":")
  expects[name + ".graph"] = int(times)
print(len(expects))

200
1
200


In [None]:
def VF2PP_CSG(q, g):
  return

In [None]:
def VF2PP_MOG(q, g):
  glabelfreq = defaultdict(int)
  for gnode in g.node2label:
    glabelfreq[g.node2label[gnode]] += 1
  
  qlabels = set()
  for label in q.label2node:
    qlabels.add(label)

  minfreq = len(g.node2label)
  minlabels = set()
  for label in qlabels:
    if minfreq > glabelfreq[label]:
      minfreq = glabelfreq[label]
      minlabels = set()
      minlabels.add(label)
    elif minfreq == glabelfreq[label]:
      minlabels.add(label)
  
  
  rs = set()
  for label in minlabels:
    rs |= q.label2node[label]
  
  maxdegree = 0
  for r in rs:
    if maxdegree < q.node2degree[r]:
      maxdegree = q.node2degree[r]
  
  root = -1
  for r in rs:
    if maxdegree == q.node2degree[r]:
      root = r
      break
  
  node2level = {}
  visited = set()
  queue = []
  visited.add(root)
  node2level[root] = 1
  maxlevel = 1
  queue.append(root)
  while queue:
    top = queue[0]
    queue.pop(0)
    for neighbor in q.edges[top]:
      if neighbor not in visited:
        node2level[neighbor] = node2level[top] + 1
        maxlevel = max(maxlevel, node2level[neighbor])
        visited.add(neighbor)
        queue.append(neighbor)

  level2node = defaultdict(set)
  for node in node2level:
    level2node[node2level[node]].add(node)


  curlevel = 1
  while curlevel <= maxlevel:
    while level2node[curlevel].copy():
      maxneighbor = 0
      maxnodes = set()
      for node in level2node[curlevel]:
        neighbornum = len(q.edges[node] & set(q.phi))
        if neighbornum > maxneighbor:
          maxneighbor = neighbornum
          maxnodes = set()
          maxnodes.add(node)
        elif neighbornum == maxneighbor:
          maxnodes.add(node)

      maxdegrees = set()
      maxdegree = 0
      for node in maxnodes:
        if maxdegree < q.node2degree[node]:
          maxdegree = q.node2degree[node]
          maxdegrees = set()
          maxdegrees.add(node)
        elif maxdegree == q.node2degree[node]:
          maxdegrees.add(node)

      minlabels = set()
      minlabel = len(g.node2label)
      for node in maxdegrees:
        if minlabel > len(g.label2node[q.node2label[node]]):
          minlabel = len(g.label2node[q.node2label[node]])
          minlabels = set()
          minlabels.add(node)
        elif minlabel == len(g.label2node[q.node2label[node]]):
          minlabels.add(node)
      curnode = list(minlabels)[0]
      q.phi.append(curnode)
      level2node[curlevel].remove(curnode)
    curlevel += 1

  q.phiparent[root] = -1
  for i in range(len(q.phi)):
    for j in range(i):
      if q.phi[j] in q.edges[q.phi[i]]:
        q.phiparent[q.phi[i]] = q.phi[j]
        break

  return

In [None]:
def VF2PP_EP(q, g, m, i, totalresult): # not equal to the original code
  if i == len(q.phi) + 1:
    totalresult.append(m.copy())
    return 
  result = {}
  u = -1
  for node in q.phi:
    if node not in m:
      u = node
      break

  lc = set()

  if i == 1:
    vs = g.label2node[q.node2label[u]]
    for v in vs:
      if g.node2degree[v] >= q.node2degree[u]:
        lc.add(v)
  else:
    for v in g.edges[m[q.phiparent[u]]]:
      if q.node2label[u] == g.node2label[v] and g.node2degree[v] >= q.node2degree[u]:
        flag = True
        for node in q.phi:
          if node == u:
            break
          if node == q.phiparent[u]:
            continue
          if v == m[node] or (m[node] not in g.edges[v] and (node in q.edges[u] or u in q.edges[node])):
            flag = False
            break
        if flag:
          lc.add(v)
    
    reverseorder = q.phi.copy()
    reverseorder.reverse()
    qlabels = defaultdict(int)
    for node in reverseorder:
      if node == u:
        break
      if node not in q.edges[u]:
        continue
      qlabels[q.node2label[node]] += 1
    
    
    for v in lc.copy():
      glabels = defaultdict(int)
      vneighbors = g.edges[v]
      for vneighbor in vneighbors:
        if vneighbor not in set(m.values()):
          glabels[g.node2label[vneighbor]] += 1

      for qlabel in qlabels:
        if qlabels[qlabel] > glabels[qlabel]:
          lc.remove(v)
          break

  for node in lc:
    if node not in set(m.values()):
      m[u] = node
      VF2PP_EP(q, g, m, i + 1, totalresult)
      m.pop(u)

In [None]:
queries = {}
for g in gs:
  for q in qs:
    q.reset()
    VF2PP_CSG(q, g)
    VF2PP_MOG(q, g)
    totalresult = []
    VF2PP_EP(q, g, {}, 1, totalresult)
    queries[q.graphid] = len(totalresult)

print(queries)

{'query_dense_16_62.graph': 8, 'query_dense_16_46.graph': 30, 'query_dense_16_74.graph': 8, 'query_dense_16_113.graph': 6, 'query_dense_16_172.graph': 16, 'query_dense_16_170.graph': 18, 'query_dense_16_142.graph': 16, 'query_dense_16_98.graph': 8, 'query_dense_16_37.graph': 1, 'query_dense_16_114.graph': 4, 'query_dense_16_132.graph': 20, 'query_dense_16_165.graph': 208, 'query_dense_16_94.graph': 2, 'query_dense_16_68.graph': 256, 'query_dense_16_9.graph': 42, 'query_dense_16_39.graph': 1, 'query_dense_16_70.graph': 42, 'query_dense_16_196.graph': 2, 'query_dense_16_84.graph': 8, 'query_dense_16_22.graph': 9, 'query_dense_16_6.graph': 132, 'query_dense_16_107.graph': 21, 'query_dense_16_4.graph': 6, 'query_dense_16_1.graph': 3, 'query_dense_16_146.graph': 24, 'query_dense_16_112.graph': 2, 'query_dense_16_141.graph': 20, 'query_dense_16_139.graph': 12, 'query_dense_16_160.graph': 2688, 'query_dense_16_136.graph': 1, 'query_dense_16_179.graph': 184, 'query_dense_16_19.graph': 2, 'quer

In [None]:
flag = True
for name in expects:
  if expects[name] != queries[name]:
    print(name)
    flag = False
if flag:
  print("correct")

correct
