# Finding Planted Clique in Hypergraphs

Efficient recovery of planted cliques in hypergraphs.
A model on $n$ vertices with order $d$.
$K$ vertices forming a clique, meaning that any size-d tuple within the clique is connected by a hyperedge.
All other size-$d$ tuples form a hyperedge with probability $q = 1/2$ .

In [1]:

import math
import time as tm

import numpy as np

In [2]:
# increment the degree of each vertex on graph g by 1
def add_all(i, j, k, g):
    g[i, j, k] = 1
    g[i, k, j] = 1
    g[j, i, k] = 1
    g[j, k, i] = 1
    g[k, i, j] = 1
    g[k, j, i] = 1
    return g

In [3]:
# decrement the degree of each vertex on graph g to 0
def remove_all(i, j, k, g):
    g[i, j, k] = 0
    g[i, k, j] = 0
    g[j, i, k] = 0
    g[j, k, i] = 0
    g[k, i, j] = 0
    g[k, j, i] = 0
    return g


In [4]:
### create a graph with planted clique of size = clique_size

# g: graph (represented as a tensor)
# num_vertices: number of vertices on the graph (dimension, or rank of the tensor) 
# pr: probability of creating edges
# clique_size: clique size

def generate_graph(num_vertices, pr, clique_size):
    def plant_clique():
        for ii in range(clique_size):
            for jj in range(ii + 1, clique_size):
                for kk in range(jj + 1, clique_size):
                    aa = clique_v[ii]
                    # print(a)
                    vec[aa] += 1
                    bb = clique_v[jj]
                    # print(b)
                    vec[bb] += 1
                    cc = clique_v[kk]
                    # print(c)
                    vec[cc] += 1
                    add_all(aa, bb, cc, g)

    g = np.array([np.array([np.array([0 for _ in range(0, num_vertices)]) for _ in range(num_vertices)]) for _ in
                  range(num_vertices)])
    vec = np.array([0 for _ in range(0, num_vertices)])

    # Set edges
    for i in range(num_vertices):
        for j in range(i + 1, num_vertices):
            for k in range(j + 1, num_vertices):
                a = np.random.uniform(0, 1, 1)
                # every edge is included independently with probability 1/2
                if a < pr:
                    vec[i] += 1
                    vec[j] += 1
                    vec[k] += 1
                    add_all(i, j, k, g)

    clique_v = np.random.choice(range(num_vertices), clique_size, replace=False)

    plant_clique()

    return g, vec, clique_v


In [5]:
# calculate the total number of edges on a graph with num_vertices vertices
def find_num_edges(g, num_vertices):
    num_edges = 0
    for i in range(num_vertices):
        for j in range(i + 1, num_vertices):
            for k in range(j + 1, num_vertices):
                if g[i, j, k] == 1:
                    num_edges += 1
    return num_edges

In [6]:
# check if the graph is a clique
def is_clique(g, vec, num_vertices):
    # active_count: number of vertices that are associated with at least 1 edge
    active_count = np.count_nonzero(vec)
    # number of edges
    edge_sum = find_num_edges(g, num_vertices)
    if edge_sum == math.comb(active_count, 3):
        return True
    return False

In [7]:
# remove edge
def remove_edges(g, num_vertices, vec, curr_idx):
    vec[curr_idx] = 0
    graph_copy = g.copy()
    for j in range(0, num_vertices):
        for k in range(0, num_vertices):
            if graph_copy[curr_idx, j, k] == 1:
                vec[j] -= 1
                vec[k] -= 1
                remove_all(curr_idx, j, k, graph_copy)
    return graph_copy, vec

### Generate a graph
n: number of vertices
p: probability of an edge being included
k: clique size

In [8]:
# driver program

N = 100  # number of vertices
P = 0.5  # probability of an edge being included
K = 10  # clique size

# generate graph
start_generate = tm.time()
res = generate_graph(N, P, K)

# G graph with planted clique
# V: vector storing the number of edges associated with each vertex in the graph
# clique_vertices: the set of clique vertices
G, V, clique_vertices = res[0], res[1], res[-1]

# print(f"graph: {G}\n")
G_0 = G.copy()
print(f"edge-occurrence vector: {V}\n")
print(f"set of clique vertices after removal phase: {clique_vertices}\n")

time_generate = np.round(tm.time() - start_generate, 3)
# print("Time taken to generate graph: ", "seconds")


edge-occurrence vector: [2452 2474 2412 2458 2419 2391 2411 2465 2393 2477 2363 2418 2359 2459
 2451 2446 2514 2412 2383 2424 2407 2501 2436 2416 2479 2458 2439 2426
 2403 2409 2409 2466 2368 2392 2410 2451 2475 2416 2411 2379 2424 2410
 2409 2455 2456 2419 2438 2437 2446 2415 2398 2440 2448 2447 2388 2410
 2504 2466 2422 2435 2454 2433 2454 2413 2428 2464 2419 2417 2445 2442
 2445 2431 2453 2405 2414 2495 2426 2362 2448 2379 2470 2439 2386 2420
 2450 2435 2418 2357 2431 2441 2408 2431 2508 2442 2356 2442 2397 2370
 2466 2435]

set of clique vertices after removal phase: [ 1 17 69 16 36  7 53 29 24 75]



### Removal Phase

In [9]:
start_removal = tm.time()
itr = 0
removed = []  # keep track of the vertices removed from the original graph to form a clique

# clique = is_clique(G, V, N)

while not is_clique(G, V, N):

    itr += 1

    curr = -1
    idx_sorted = np.argsort(V)

    print(f"i = {itr}\nindex sorted: {idx_sorted}\n")
    # print(f"number of edges associated with each vertex: {V}")

    for idx in range(N):
        if V[idx_sorted[idx]] != 0:
            curr = idx_sorted[idx]
            removed.append(curr)
            break
    # print(f"vertex removed: {curr} number of edges: {V[curr]}\n")
    A = remove_edges(G, N, V, curr)
    G = A[0]

    clique = is_clique(G, V, N)

print(f"is a clique at iteration #{itr}!!!")
time_remove = np.round(tm.time() - start_removal, 3)

i = 1
index sorted: [94 87 12 77 10 32 97 79 39 18 82 54  5 33  8 96 50 28 73 20 90 42 29 30
 41 34 55  6 38 17  2 63 74 49 23 37 67 86 11 45  4 66 83 58 40 19 76 27
 64 71 91 88 61 85 59 99 22 47 46 26 81 51 89 95 93 69 70 68 15 48 53 52
 78 84 35 14  0 72 62 60 43 44 25  3 13 65  7 31 98 57 80  1 36  9 24 75
 21 56 92 16]

i = 2
index sorted: [94 87 12 77 10 32 97 79 82 18 39  5 54 96 33 50  8 20 28 37  2 41 42  6
 29 38 17 90 11 58 23 45 30 63 73 40 49 34 55  4 67 74 76 64 19 27 83 86
 66 59 91 22 81 71 88 47 69 61 46 89 85 99 51 95 72 84 15 48 52 68 93 78
 53 62 26  0 44 35 14 43 13 60 70 25  7 98 57 65 31  3 80  9 36 24  1 75
 56 21 92 16]

i = 3
index sorted: [94 87 77 12 97 10 32 18 79 82  5 39 33 54  8 96 58 11 50 63 23 28 20  6
 29 37 73 41 42 40 17 45  2  4 76 38 30 34 49 90 55 86 22 81 19 91 74 67
 27 83 71 47 88 64 69 89 59 61 99 46 85 51 72 95 66 48 84 35 78 52 26 68
 14  0 13 15 25 93 43 53 60 62 44 70 57 98  7 80 65 31 24  3  9 36  1 75
 56 92 21 16]

i = 4
index sorted:

In [10]:
print(f"number of iterations in the removal phase: {itr}")

number of iterations in the removal phase: 91


In [11]:
full_vertices = np.arange(N)
included = np.setdiff1d(full_vertices, removed)
assert len(removed) + len(included) == N
print(f"vertices included after the removal phase: {included}")

vertices included after the removal phase: [ 1  7 16 24 29 36 53 69 75]


### Inclusion Phase

In [12]:
def inclusion_phase(target, in_set, g):
    def connected():
        for j in range(len(in_set)):
            for k in range(j + 1, len(in_set)):
                if g[target_idx, in_set[j], in_set[k]] != 1:
                    # print(f" vertex {idx} is not connected to all clique vertices")
                    return False
        return True

    for target_idx in target:
        if connected():
            print(f"add {target_idx} to clique!")
            in_set = np.append(in_set, idx)

    return in_set


In [13]:
start_include = tm.time()
res = inclusion_phase(removed, included, G_0)
time_include = np.round(tm.time() - start_include, 3)

add 17 to clique!


In [14]:
print(f"Set of clique vertices after generation phase: {np.sort(clique_vertices)}\n")
print(f"Set of clique vertices after removal phase: {np.sort(included)}\n")
# print(f"set of vertices removed: {removed}, number of elements = {len(removed)}\n")
print(f"Set of clique vertices after inclusion phase: {np.sort(res)}")

Set of clique vertices after generation phase: [ 1  7 16 17 24 29 36 53 69 75]

Set of clique vertices after removal phase: [ 1  7 16 24 29 36 53 69 75]

Set of clique vertices after inclusion phase: [ 1  7 16 24 29 36 53 69 75 90]
