## Setup

In [1]:
here::here()

In [2]:
library(pacman)
p_load("dplyr", "here", "igraph", "RandomWalkRestartMH", install = FALSE)

In [3]:
df <- read.csv(here("03_outputs", "network-ready-layers", "network_dataframe.csv"))

In [4]:
g <- graph_from_data_frame(df, directed = FALSE)

In [6]:
g

IGRAPH 94b478d UN-- 2696 187121 -- 
+ attr: name (v/c), source_type (e/c), approved (e/c)
+ edges from 94b478d (vertex names):
 [1] (4) ATOH8  --(3) A1CF   (4) BARX2  --(3) A1CF   (4) BHLHA15--(3) A1CF  
 [4] (4) E2F5   --(3) A1CF   (4) EOMES  --(3) A1CF   (4) FOXH1  --(3) A1CF  
 [7] (4) GLIS2  --(3) A1CF   (4) HOXC5  --(3) A1CF   (4) HOXD13 --(3) A1CF  
[10] (4) IRF4   --(3) A1CF   (4) KLF15  --(3) A1CF   (4) LHX2   --(3) A1CF  
[13] (4) MAFA   --(3) A1CF   (4) MLXIPL --(3) A1CF   (4) PAX3   --(3) A1CF  
[16] (4) T      --(3) A1CF   (4) TCF21  --(3) A1CF   (4) ZBTB16 --(3) A1CF  
[19] (4) ZBTB47 --(3) A1CF   (4) ZBTB7C --(3) A1CF   (4) ZIC5   --(3) A1CF  
[22] (4) AHDC1  --(3) A4GALT (4) BHLHA15--(3) A4GALT (4) GLIS2  --(3) A4GALT
+ ... omitted several edges

## Calculating DMNC (Density of the max neighborhood component)

In [16]:
calculate_dmnc <- function(graph) {
  nodes <- V(graph)
  dmnc_values <- sapply(nodes, function(node) {
    neighbors <- neighbors(graph, node)
    subgraph <- induced_subgraph(graph, c(node, neighbors))
    components <- components(subgraph)
    largest_component_size <- max(components$csize)
    largest_component_nodes <- V(subgraph)[components$membership == which.max(components$csize)]
    largest_component_subgraph <- induced_subgraph(subgraph, largest_component_nodes)
    density <- edge_density(largest_component_subgraph, loops = FALSE)
    return(density * largest_component_size)
  })
  dmnc_df <- data.frame(node = names(dmnc_values), dmnc = dmnc_values)
  return(dmnc_df)
}

In [17]:
dmnc_df <- calculate_dmnc(g)

## MCC

In [10]:
approximate_max_clique_centrality <- function(graph, verbose = FALSE) {
  if (verbose) cat("Performing k-core decomposition...\n")
  cores <- coreness(graph)
  if (verbose) cat("K-core decomposition completed.\n")
  
  if (verbose) cat("Calculating neighborhood densities...\n")
  node_density <- rep(0, vcount(graph))
  names(node_density) <- V(graph)$name
  
  for (v in V(graph)) {
    node <- V(graph)[v]
    neighbors <- neighbors(graph, node)
    if (length(neighbors) > 0) {
      subgraph <- induced_subgraph(graph, c(node, neighbors))
      density <- edge_density(subgraph)
      node_density[node$name] <- density
      if (verbose) cat("Node", node$name, "density:", density, "\n")
    }
  }
  
  centrality_df <- data.frame(node = names(node_density), max_clique_centrality = node_density)
  return(centrality_df)
}

In [11]:
centrality_df <- approximate_max_clique_centrality(g, verbose = TRUE)

Performing k-core decomposition...
K-core decomposition completed.
Calculating neighborhood densities...
Node (4) ATOH8 density: 0.136253 
Node (4) BARX2 density: 0.1389046 
Node (4) BHLHA15 density: 0.6016254 
Node (4) E2F5 density: 0.1999538 
Node (4) EOMES density: 0.5636075 
Node (4) FOXH1 density: 0.004309078 
Node (4) GLIS2 density: 0.06281867 
Node (4) HOXC5 density: 0.05396741 
Node (4) HOXD13 density: 0.02305065 
Node (4) IRF4 density: 0.4256273 
Node (4) KLF15 density: 0.119131 
Node (4) LHX2 density: 0.01444788 
Node (4) MAFA density: 0.02946606 
Node (4) MLXIPL density: 0.1104968 
Node (4) PAX3 density: 0.07918447 
Node (4) T density: 0.1277607 
Node (4) TCF21 density: 0.119917 
Node (4) ZBTB16 density: 0.2097414 
Node (4) ZBTB47 density: 0.1582 
Node (4) ZBTB7C density: 0.1505998 
Node (4) ZIC5 density: 0.01917319 
Node (4) AHDC1 density: 0.07108027 
Node (4) HIF3A density: 0.1582079 
Node (4) KCNIP3 density: 0.4312705 
Node (4) NFIX density: 0.1392329 
Node (4) NR1D1 dens

## Results

In [22]:
res <- left_join(dmnc_df, centrality_df, by = "node")

In [30]:
prioritized_genes <- res %>% top_n(50, dmnc) %>% top_n(25, max_clique_centrality)

In [31]:
prioritized_genes

node,dmnc,max_clique_centrality
<chr>,<dbl>,<dbl>
(3) AURKA,534.2191,1.24237
(3) BIRC3,541.261,1.247145
(3) BTLA,532.9423,1.278039
(3) CCR2,541.0526,1.291295
(3) CD2,538.8863,1.247422
(3) CD38,540.0688,1.235855
(3) CD48,536.0187,1.249461
(3) CD69,538.3493,1.284843
(3) CD80,535.4499,1.245232
(3) CRTAM,546.9932,1.237541


In [36]:
df$source_type <- as.factor(df$source_type)

In [37]:
df %>% select(source, source_type) %>% distinct()  %>% summary()

    source           source_type 
 Length:1544        gcn    :758  
 Class :character   drug   :388  
 Mode  :character   ppin   :312  
                    grn    : 67  
                    gwas   : 14  
                    gda    :  3  
                    (Other):  2  