**DAF(DPiso)**

This is a subgraph maching algorithm using the idea of dynamic programming.

Also, this algorithm uses the classic three-step way to get the result. At fitst, the authors generate a **DAG graph** to get the parent-child relationship between all nodes. And then they use the **classic filters** like NLF, DF and so on to initialize candidate set for each node in the query graph.

Then for each candidate node in the data graph, the authors check the neighbors of them who has the same label as the node in query graph. They first remove candidates based on **the number of appearance** of the children in the DAG and then the parents in the DAG. They iterate this procedure 3 times to get the optimal candidate set for each node.

For the matching order generation part, they use a **weight matrix**: **weights[qnode][gnode]**. To generation this weight matrix, they use a auxiliary structure **edgematrix[qnode][qneighbor][gnode]** to store the intersection between the neighbors of gnode and candidates of qneighbor, where qnode is the node we are processing, qneighbor are the neighbors of it, gnode are the candidates of qnode. 

Based on the weights of candidates set for each node, we could optimize the enumeration procedure while matching node with its candidates. It also use BFS order to process nodes in query graph, but for each query graph node they are processing, **the order of candidates is not random**. Other than that, they proposes a **failing set** structure to reduce the search sapce of enumeration procedure. The idea is that, while during enumeration, we meeting a conflict or empty candidate, that means this node would conflict with the ancestor in the BFS tree with its candidate now. In this case, for the current node, we should mark the failing set as **itself and its ancestor** with the conflict candidates, so we don't have to continue process this candidate in other sub-tree of this one.

In [None]:
!git clone https://github.com/RapidsAtHKUST/SubgraphMatching.git

fatal: destination path 'SubgraphMatching' already exists and is not an empty directory.


In [None]:
from collections import defaultdict
from queue import PriorityQueue
from copy import deepcopy

In [None]:
class graph():
  def __init__(self, graphid, node2label, node2degree, edges):
    self.graphid = graphid
    self.node2label = node2label
    self.node2degree = node2degree
    self.edges = edges
    self.candidateset = defaultdict(set)
    self.label2node = defaultdict(set)
    for node in self.node2label:
      self.label2node[self.node2label[node]].add(node)
    self.phi = []
    self.phiparent = {}
    self.dagparents = defaultdict(set)
    self.dagchildren = defaultdict(set)
    self.weights = defaultdict(dict)

  def reset(self):
    self.candidateset = defaultdict(set)
    self.phi = []
    self.phiparent = {}
    self.dagparents = defaultdict(set)
    self.dagchildren = defaultdict(set)
    self.weights = defaultdict(dict)

In [None]:
def get_graph(filepath, filename):
  global qcount
  global gcount

  node2label = {}
  node2degree = {}
  edges = defaultdict(set)
  f = open(filepath, "r", encoding="utf-8")

  _, nodenum, edgenum = f.readline().strip().split()
  for i in range(int(nodenum)):
    _, nodeid, nodelabel, nodedegree = f.readline().strip().split()
    node2label[int(nodeid)] = int(nodelabel)
    node2degree[int(nodeid)] = int(nodedegree)  
  for i in range(int(edgenum)):
    _, node1, node2 = f.readline().strip().split()
    edges[int(node1)].add(int(node2))
    edges[int(node2)].add(int(node1))

  f.close()
  g = graph(filename, node2label, node2degree, edges)

  return g

In [None]:
qcount = 0
gcount = 0

import os
qs = []
qdir = "SubgraphMatching/test/query_graph"
for f in os.listdir(qdir):
  filepath = os.path.join(qdir, f)
  qs.append(get_graph(filepath, f))

gs = []
gdir = "SubgraphMatching/test/data_graph"
for f in os.listdir(gdir):
  filepath = os.path.join(gdir, f)
  gs.append(get_graph(filepath, f))

print(len(qs))
print(len(gs))

f = open("SubgraphMatching/test/expected_output.res", "r", encoding="utf-8")
lines = f.readlines()
f.close()

expects = {}
for line in lines:
  name, times = line.strip().split(":")
  expects[name + ".graph"] = int(times)
print(len(expects))

200
1
200


In [None]:
def DAF_CSG(q, g, k):
  for qnode in q.node2label:
    label = q.node2label[qnode]
    for gnode in g.label2node[label]:
      if q.node2degree[qnode] <= g.node2degree[gnode]:
        q.candidateset[qnode].add(gnode)

  qneighborlabels = defaultdict(lambda: defaultdict(int))
  qlabelfreq = defaultdict(int)
  for qnode in q.node2label:
    qlabelfreq[q.node2label[qnode]] += 1
    qneighbors = q.edges[qnode]
    for qneighbor in qneighbors:
      qneighborlabels[qnode][q.node2label[qneighbor]] += 1
  
  gneighborlabels = defaultdict(lambda: defaultdict(int))
  glabelfreq = defaultdict(int)
  for gnode in g.node2label:
    glabelfreq[g.node2label[gnode]] += 1
    gneighbors = g.edges[gnode]
    for gneighbor in gneighbors:
      gneighborlabels[gnode][g.node2label[gneighbor]] += 1
  
  for qnode in q.node2label:
    for gnode in q.candidateset[qnode].copy():
      for label in qneighborlabels[qnode]:
        if qneighborlabels[qnode][label] > gneighborlabels[gnode][label]:
          q.candidateset[qnode].remove(gnode)
          break

  for qnode in q.node2label:
    qneighbors = q.edges[qnode]
    for qneighbor in qneighbors:
      for nodecandidate in q.candidateset[qnode].copy():
        if len(g.edges[nodecandidate] & q.candidateset[qneighbor]) == 0:
          q.candidateset[qnode].remove(nodecandidate)
  
  scores = {}
  for qnode in q.node2label:
    scores[qnode] = len(q.candidateset[qnode]) / q.node2degree[qnode]
  
  r = min(scores, key=scores.get)

  queue = [r]
  visited = set()
  popped = set()
  visited.add(r)
  b = 0
  e = 1
  while (b < e):
    tmp = queue[b:e]
    tmp.sort(key = lambda x: -q.node2degree[x])
    tmp.sort(key = lambda x: (glabelfreq[q.node2label[x]], q.node2label[x]))
    queue[b:e] = tmp
    cure = e
    while(b < cure):
      parent = queue[b]
      b += 1
      popped.add(parent)
      for child in q.edges[parent]:
        if child not in popped:
          q.dagchildren[parent].add(child)
          q.dagparents[child].add(parent)
          if child not in visited:
            visited.add(child)
            q.phi.append(child)
            q.phiparent[child] = parent
            queue.append(child)
            e += 1
  q.phi = queue

  def prunecandidates(q, g, qnode, pruneneighbors):
    qlabel = q.node2label[qnode]
    qdegree = q.node2degree[qnode]
    count = 0
    updatedflagcount = 0
    pivots = pruneneighbors[qnode]
    updatedflag = defaultdict(int)
    flag = defaultdict(int)
    for pivot in pivots:
      for vnode in q.candidateset[pivot]:
        vneighbors = g.edges[vnode]
        intersection = vneighbors & g.label2node[qlabel]
        for vneighbor in intersection:
          if flag[vneighbor] == count and g.node2degree[vneighbor] >= qdegree:
            flag[vneighbor] += 1
            if count == 0:
              updatedflag[updatedflagcount] = vneighbor
              updatedflagcount += 1
      count += 1
    
    for candidate in q.candidateset[qnode].copy():
      if flag[candidate] != count:
        q.candidateset[qnode].remove(candidate)


  reverseorder = q.phi.copy()
  reverseorder.reverse()
  
  for prune in range(k):
    if (prune % 2 == 0): 
      for qnode in q.phi:
        prunecandidates(q, g, qnode, q.dagparents)
    else:
      for qnode in reverseorder[1:]:
        prunecandidates(q, g, qnode, q.dagchildren)
  

In [None]:
def DAF_MOG(q, g):
  edgematrix = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
  for qnode in q.node2label:
    for qneighbor in q.edges[qnode]:
      for gnode in q.candidateset[qnode]:
        gnodeneighbors = g.edges[gnode]
        qneighborcandidateset = q.candidateset[qneighbor]
        intersection = gnodeneighbors & qneighborcandidateset
        edgematrix[qnode][qneighbor][gnode] = intersection

  reverseorder = q.phi.copy()
  reverseorder.reverse()

  for qnode in q.node2label:
    for gnode in q.candidateset[qnode]:
      q.weights[qnode][gnode] = 17373362

  for node in reverseorder:
    setone = True
    for child in q.dagchildren[node]:
      if len(q.dagparents[child]) == 1:
        setone = False
        for candidate in q.candidateset[node]:
          weight = 0
          for intersect in edgematrix[qnode][child][candidate]:
            weight += q.weights[child][intersect]
          
          if weight < q.weights[node][candidate]:
            q.weights[node][candidate] = weight
    if setone:
      for candidate in q.candidateset[node]:
        q.weights[node][candidate] = 1

  return edgematrix

In [None]:
"""def DAF_EP(q, g, m, i, totalresult): # not equal to the original code
  if i == len(q.phi) + 1:
    totalresult.append(m.copy())
    return 
  result = {}
  u = -1
  for node in q.phi:
    if node not in m:
      u = node
      break

  lc = set()

  if i == 1:
    vs = g.label2node[q.node2label[u]]
    for v in vs:
      if g.node2degree[v] >= q.node2degree[u]:
        lc.add(v)
  else:
    for v in g.edges[m[q.phiparent[u]]]:
      if q.node2label[u] == g.node2label[v] and g.node2degree[v] >= q.node2degree[u]:
        flag = True
        for node in q.phi:
          if node == u:
            break
          if node == q.phiparent[u]:
            continue
          #if m[node] not in g.edges[v]:
          if v == m[node] or (m[node] not in g.edges[v] and (node in q.edges[u] or u in q.edges[node])):
            flag = False
            break
        if flag:
          lc.add(v)

  for node in lc:
    if node not in set(m.values()):
      m[u] = node
      DAF_EP(q, g, m, i + 1, totalresult)
      m.pop(u)"""

def DAF_EP(q, g, m, totalresult, edgematrix):
  class infomation(object):
    def __init__(self, node, degree, weight):
      self.node = node
      self.degree = degree
      self.weight = weight
    def __lt__(self, other):
      if self.degree == 1 and other.degree == 1:
        return True
      elif self.degree != 1 and other.degree == 1:
        return False
      else:
        return self.weight > other.weight

  def updateextendablenode(q, m, mappednode, extendable, rankqueue, validcandidate, edgematrix, idcount, tempbuffer):
    for node in q.dagchildren[mappednode]:
      extendable[node] -= 1
      if extendable[node] == 0:
        
        bns = list(q.dagparents[node])
        validcandidate[node] = set()
        prebn = bns[0]
        validcandidate[node] |= edgematrix[prebn][node][m[prebn]]

        for i, curbn in enumerate(bns):
          if i == 0:
            continue
          curcandidates = edgematrix[curbn][node][m[curbn]]

          tempbuffer = validcandidate[node] & curcandidates
          
          tmp = tempbuffer
          tempbuffer = validcandidate[node]
          validcandidate[node] = tmp
        
        idcount[node] = len(validcandidate[node])


        weight = 0
        for candidate in validcandidate[node]:
          weight += q.weights[node][candidate]

        rankqueue[-1].put(infomation(node, q.node2degree[node], weight))

  extendable = {}
  for node in q.node2label:
    extendable[node] = len(q.dagparents[node])
  
  ancestors = defaultdict(set)
  for qnode in q.phi:
    ancestors[qnode].add(qnode)
    for qparent in q.dagparents[qnode]:
      ancestors[qnode] |= ancestors[qparent]

  failingset = defaultdict(set)
  curdepth = 0
  startnode = q.phi[0]
  visited = set()
  reversem = {}
  rankqueue = []
  validcandidate = defaultdict(set)
  tempbuffer = set()
  idx = {}
  idcount = {}
  maxdepth = len(q.node2label)
  for gnode in q.candidateset[startnode]:
    m[startnode] = gnode
    visited.add(gnode)
    reversem[gnode] = startnode
    pq = PriorityQueue()
    rankqueue.append(pq)
    updateextendablenode(q, m, startnode, extendable, rankqueue, validcandidate, edgematrix, idcount, tempbuffer)
    u = rankqueue[-1].get().node

    if idcount[u] == 0:
      failingset[curdepth] = ancestors[u]
    else:
      failingset[curdepth] = set()

    curdepth += 1
    q.phi[curdepth] = u
    idx[u] = 0
    while curdepth > 0:
      while idx[u] < idcount[u]:

        candidatelist = list(validcandidate[u])
        validnode = candidatelist[idx[u]]
        
        if validnode in visited:
          idx[u] += 1
          failingset[curdepth] = ancestors[u]
          failingset[curdepth] |= ancestors[reversem[validnode]]
          failingset[curdepth - 1] |= failingset[curdepth]

          continue
        
        m[u] = validnode
        visited.add(validnode)
        idx[u] += 1
        reversem[validnode] = u

        if curdepth == maxdepth - 1:
          totalresult.append(m.copy())
          visited.remove(validnode)
          reversem.pop(m[u])

          for qnode in q.node2label:
            failingset[curdepth].add(qnode)

          failingset[curdepth - 1] |= failingset[curdepth]
        else:
          curdepth += 1
          copydp = PriorityQueue()
          copydp.queue = deepcopy(rankqueue[-1].queue)
          rankqueue.append(copydp)
          updateextendablenode(q, m, u, extendable, rankqueue, validcandidate, edgematrix, idcount, tempbuffer)
          u = rankqueue[-1].get().node
          idx[u] = 0
          q.phi[curdepth] = u

          if idcount[u] == 0:
            failingset[curdepth - 1] = ancestors[u]
          else:
            failingset[curdepth - 1] = set()
      
      curdepth -= 1
      rankqueue.pop(-1)
      u = q.phi[curdepth]
      visited.remove(m[u])

      for children in q.dagchildren[u]:
        extendable[children] += 1

      reversem.pop(m[u])
      if curdepth != 0:
        if u not in failingset[curdepth]:
          failingset[curdepth - 1] = failingset[curdepth]
          idx[u] = idcount[u]
        else:
          failingset[curdepth - 1] |= failingset[curdepth]

In [None]:
queries = {}
for g in gs:
  for q in qs:
    q.reset()
    
    DAF_CSG(q, g, 3)
    edgematrix = DAF_MOG(q, g)
    totalresult = []
    print(q.graphid)
    DAF_EP(q, g, {}, totalresult, edgematrix)
    #DAF_EP(q, g, {}, 1, totalresult)
    queries[q.graphid] = len(totalresult)

print(queries)

query_dense_16_37.graph
query_dense_16_66.graph
query_dense_16_72.graph
query_dense_16_79.graph
query_dense_16_152.graph
query_dense_16_44.graph
query_dense_16_126.graph
query_dense_16_49.graph
query_dense_16_38.graph
query_dense_16_175.graph
query_dense_16_19.graph
query_dense_16_67.graph
query_dense_16_108.graph
query_dense_16_177.graph
query_dense_16_7.graph
query_dense_16_34.graph
query_dense_16_48.graph
query_dense_16_71.graph
query_dense_16_28.graph
query_dense_16_41.graph
query_dense_16_39.graph
query_dense_16_145.graph
query_dense_16_143.graph
query_dense_16_179.graph
query_dense_16_199.graph
query_dense_16_42.graph
query_dense_16_127.graph
query_dense_16_166.graph
query_dense_16_95.graph
query_dense_16_194.graph
query_dense_16_161.graph
query_dense_16_157.graph
query_dense_16_168.graph
query_dense_16_77.graph
query_dense_16_74.graph
query_dense_16_142.graph
query_dense_16_2.graph
query_dense_16_130.graph
query_dense_16_87.graph
query_dense_16_69.graph
query_dense_16_6.graph
qu

In [None]:
flag = True
for name in expects:
  if expects[name] != queries[name]:
    print(name)
    flag = False
if flag:
  print("correct")