In [67]:
library(tidyverse)
library(igraph)

In [68]:
# edges = read.csv('../datasets/wits/simulated/edgelist.csv')
# nodes = read.csv('../datasets/wits/simulated/nodelist.csv')
# edges = read.csv('../datasets/wits/edgelist_2011.csv')
# nodes = read.csv('../datasets/wits/nodelist_2011.csv')
nodes = read.csv('../datasets/wits/nl_no_missing.csv')
edges = read.csv('../datasets/wits/el_no_missing.csv')

In [69]:
head(edges, 3)

Unnamed: 0_level_0,source,target,weight
Unnamed: 0_level_1,<chr>,<chr>,<dbl>
1,NPL,LBR,97.061
2,NPL,AZE,46.502
3,NPL,JOR,25.551


In [70]:
n = dim(nodes)[1]
N = 1000
country_names = nodes$country_iso3
nodes = transform(nodes, landlocked=as.logical(landlocked))

In [71]:
trade_graph = igraph::graph_from_edgelist(as.matrix(edges[, c("source", "target")]), directed = F) %>%
    igraph::set_edge_attr(name = "weight", value = edges$weight)

In [72]:
louvain_partitions = igraph::cluster_louvain(graph = trade_graph)

In [73]:
trade_graph$community = louvain_partitions$membership

In [74]:
communities_summary = data.frame()
for (i in unique(louvain_partitions$membership)) {
    subgraph = induced_subgraph(graph = trade_graph, vids = which(trade_graph$community == i))
    
    size = igraph::gorder(graph = subgraph)
    
    btwn_cent = betweenness(graph = subgraph)
    
    central_node = names(which(btwn_cent == max(btwn_cent)))
    
    communities_summary = communities_summary %>%
        dplyr::bind_rows(
            data.frame(
                community = i, 
                size = size, 
                central_node = central_node
            )
        )
    
}

In [75]:
communities_summary

community,size,central_node
<dbl>,<int>,<chr>
3,63,MDG
1,55,NER
2,23,SUR


In [76]:
numerical_columns = names(which(unlist(lapply(nodes, is.numeric))))

In [77]:
for (col in numerical_columns) {
    values = nodes[, col]
    breaks <- c(quantile(values)[1] - 1, quantile(values)[2], quantile(values)[4], quantile(values)[5] + 1)
    tags <- c("Low", "Medium", "High")
    nodes[, col] = cut(values, breaks = breaks, labels = tags)
}

In [78]:
head(nodes, 3)

Unnamed: 0_level_0,country_iso3,gdp_us_dollar,inflation_rate,gdp_growth,colonizer,area,landlocked,continent,langoff_1,population,gdp_per_capita,life_expectancy,gni_atlas,agriculture_forestry_fishing_of_gdp,industry_of_gdp,merchandise_of_gdp,net_barter_of_trade,foreign_direct_investment_inflows,happiness
Unnamed: 0_level_1,<chr>,<fct>,<fct>,<fct>,<chr>,<fct>,<lgl>,<chr>,<chr>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>
1,AFG,Medium,High,Low,USA,Medium,True,Asia,Persian,Medium,Low,Low,Medium,High,Medium,Low,Medium,Low,Medium
2,AGO,Medium,High,Medium,PRT,High,False,Africa,Portuguese,Medium,Medium,Low,Medium,Medium,High,Medium,High,Low,Medium
3,ALB,Low,Medium,Medium,NONE,Low,False,Europe,Albanian,Low,Medium,Medium,Low,High,Medium,Medium,Medium,Medium,Low


In [87]:
communities_df = data.frame(
    country = louvain_partitions$names, 
    community = louvain_partitions$membership
)

In [91]:
nodes = merge(x = nodes, y = communities_df, by.x = 'country_iso3', by.y = 'country')

In [95]:
num_communities = length(unique(nodes$community))

In [103]:
btwn_wthn_trade_counts = data.frame()
for (i in 1:num_communities) {
    for (j in 1:num_communities) {
        btwn_wthn_trade_counts = btwn_wthn_trade_counts %>%
            dplyr::bind_rows(
                data.frame(
                    source = i, 
                    target = j, 
                    count = count(
                                edges %>% 
                                    dplyr::filter(
                                        source %in% nodes$country_iso3[which(nodes$community == i)] & 
                                        target %in% nodes$country_iso3[which(nodes$community == j)]
                                    )

                    )
                )
            )
    }
}

In [104]:
btwn_wthn_trade_counts

source,target,n
<int>,<int>,<int>
1,1,2517
1,2,972
1,3,2707
2,1,983
2,2,466
2,3,1010
3,1,2283
3,2,845
3,3,2584
