Finding hidden cliques of size $K$ in a hypergraph on $n$ vertices

In [76]:
import csv
import math
import time as tm

import numpy as np

In [77]:
# increment the degree of each vertex on graph g by 1
def add_all(i, j, k, g):
    g[i, j, k] = 1
    g[i, k, j] = 1
    g[j, i, k] = 1
    g[j, k, i] = 1
    g[k, i, j] = 1
    g[k, j, i] = 1
    return g

# decrement the degree of each vertex on graph g to 0
def remove_all(i, j, k, g):
    g[i, j, k] = 0
    g[i, k, j] = 0
    g[j, i, k] = 0
    g[j, k, i] = 0
    g[k, i, j] = 0
    g[k, j, i] = 0
    return g

In [78]:
##create a graph with planted clique
def generate_graph(num_vertices, pr, clq_size):
    def create_edges():
        for i in range(num_vertices):
            for j in range(i + 1, num_vertices):
                for k in range(j + 1, num_vertices):
                    if np.random.uniform(0, 1, 1) < pr: # probability of creating edges
                        vec[[i, j, k]] += 1
                        add_all(i, j, k, g)

    def plant_clique():
        for i in range(clq_size):
            for j in range(i + 1, clq_size):
                for k in range(j + 1, clq_size):
                    vec[[clq_vertex[i], clq_vertex[j], clq_vertex[k]]] += 1
                    add_all(clq_vertex[i], clq_vertex[j], clq_vertex[k], g)

    g = np.zeros((num_vertices, num_vertices, num_vertices)) # graph on n vertices
    vec = np.zeros(num_vertices, ) # edge
    create_edges()
    clq_vertex = np.random.choice(range(num_vertices), clq_size, replace=False) # randomly choose a given number of clique vertices
    plant_clique() # plant a clique to vertices (add edges between every two pairs of vertices)

    return g, vec, clq_vertex

In [79]:
# calculate total number of edges
def count_edge(g, num_vertices):
    num_edges = 0
    for i in range(num_vertices):
        for j in range(i + 1, num_vertices):
            for k in range(j + 1, num_vertices):
                if g[i, j, k] == 1:
                    num_edges += 1
    return num_edges

In [80]:
# check if the graph is a clique
def is_clique(g, vec, num_vertices):
    active_count = np.count_nonzero(vec)  # number of vertices that are associated with at least 1 edge
    edge_sum = count_edge(g, num_vertices)  # number of edges
    return edge_sum == math.comb(active_count, 3)

In [81]:
# remove edge
def remove_edges(g, num_vertices, vec, curr_idx):
    vec[curr_idx] = 0
    for j in range(0, num_vertices):
        for k in range(j+1, num_vertices):
            if g[curr_idx, j, k] == 1:
                vec[[j, k]] -= 1
                remove_all(curr_idx, j, k, g)
    return g, vec

### Generate a graph

In [82]:
N = 50  # number of vertices
P = 0.5  # probability of an edge being included
K = 5  # clique size

start_generate = tm.time()
res = generate_graph(N, P, K)
G, V, planted_clique = res[0], res[1], res[-1] # G: graph V: vector storing the number of edges associated with each vertex
G_0 = G.copy()

time_generate = np.round(tm.time() - start_generate, 3)
planted_clique

array([18, 38, 15, 22,  9])

### Removal Phase

In [83]:
start_removal = tm.time()
itr = 0
removed = []  # keep track of the vertices removed from the original graph to form a clique

while not is_clique(G, V, N):
    itr += 1
    curr = -1
    idx_sorted = np.argsort(V)

    for idx in range(N):
        if V[idx_sorted[idx]] != 0:
            curr = idx_sorted[idx]
            removed.append(curr)
            break

    A = remove_edges(G, N, V, curr)
    
    G, V = A[0], A[1]

print(f"is a clique at iteration #{itr}!!!")
time_removal = tm.time() - start_removal

is a clique at iteration #46!!!


In [84]:
# print(f"number of iterations in the removal phase: {itr}")

In [85]:
included = np.setdiff1d(np.arange(N), removed)
included

array([14, 17, 22, 38])

### Inclusion Phase

In [86]:
def inclusion_phase(target, in_set, g):
    def connected(): 
        for j in range(len(in_set)):
            for k in range(j + 1, len(in_set)):
                if g[target_idx, in_set[j], in_set[k]] != 1:
                    return False
        return True

    for target_idx in target:
        if connected():
            print(f"add {target_idx} to clique!")
            in_set = np.append(in_set, target_idx)
            

    return in_set

In [87]:
start_include = tm.time()
res = inclusion_phase(removed, included, G_0)

time_include = tm.time() - start_include
total_run_time = tm.time() - start_generate

add 39 to clique!


In [88]:
print(f"Set of clique vertices after removal phase: {np.sort(included)}\n")
print(f"Set of clique vertices after inclusion phase: {np.sort(res)}")

Set of clique vertices after removal phase: [14 17 22 38]

Set of clique vertices after inclusion phase: [14 17 22 38 39]


In [89]:
found = 0
if set(planted_clique) ==  set(res):
    found = 1
found
planted_clique, res

(array([18, 38, 15, 22,  9]), array([14, 17, 22, 38, 39]))

In [90]:
def write_to_csv(n, k, output):
    header = ['Number of Vertices', 'Clique Size', 'Planted Clique', 'Clique Vertices after Inclusion', 'Run Time (graph generation) ', 'Run Time (exclusion)', 'Run Time (inclusion)', 'Total Run time', 'Success']

    with open('N=' + str(n) + '_K=' + str(k) + '.csv', 'w', encoding='UTF8', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(header)
        writer.writerow(output)

In [91]:
out = [N, K, np.sort(clique_vertices).tolist(), res.tolist(), time_generate, time_removal,time_include, total_run_time, found]
print(out)
write_to_csv(str(N), str(K), out)

[50, 5, [12, 23, 49, 52, 76], [14, 17, 22, 38, 39], 0.048, 0.10766410827636719, 0.00011181831359863281, 0.23654413223266602, 0]
