In [114]:
library(tidyverse)
library(igraph)

In [115]:
# edges = read.csv('../datasets/wits/simulated/edgelist.csv')
# nodes = read.csv('../datasets/wits/simulated/nodelist.csv')
# edges = read.csv('../datasets/wits/edgelist_2011.csv')
# nodes = read.csv('../datasets/wits/nodelist_2011.csv')
nodes = read.csv('../datasets/wits/nl_no_missing.csv')
edges = read.csv('../datasets/wits/el_no_missing.csv')

In [116]:
head(edges, 3)

Unnamed: 0_level_0,source,target,weight
Unnamed: 0_level_1,<chr>,<chr>,<dbl>
1,NPL,LBR,97.061
2,NPL,AZE,46.502
3,NPL,JOR,25.551


In [117]:
n = dim(nodes)[1]
N = 1000
country_names = nodes$country_iso3
nodes = transform(nodes, landlocked=as.logical(landlocked))

In [118]:
trade_graph = igraph::graph_from_edgelist(as.matrix(edges[, c("source", "target")]), directed = F) %>%
    igraph::set_edge_attr(name = "weight", value = edges$weight)

In [119]:
louvain_partitions = igraph::cluster_louvain(graph = trade_graph)

In [120]:
trade_graph$community = louvain_partitions$membership

In [121]:
communities_summary = data.frame()
for (i in unique(louvain_partitions$membership)) {
    subgraph = induced_subgraph(graph = trade_graph, vids = which(trade_graph$community == i))
    
    size = igraph::gorder(graph = subgraph)
    
    btwn_cent = betweenness(graph = subgraph)
    
    central_node = names(which(btwn_cent == max(btwn_cent)))
    
    communities_summary = communities_summary %>%
        dplyr::bind_rows(
            data.frame(
                community = i, 
                size = size, 
                central_node = central_node
            )
        )
    
}

In [122]:
communities_summary

community,size,central_node
<dbl>,<int>,<chr>
3,63,MDG
1,55,NER
2,23,SUR


In [123]:
numerical_columns = names(which(unlist(lapply(nodes, is.numeric))))

In [124]:
for (col in numerical_columns) {
    values = nodes[, col]
    breaks <- c(quantile(values)[1] - 1, quantile(values)[2], quantile(values)[4], quantile(values)[5] + 1)
    tags <- c("Low", "Medium", "High")
    nodes[, col] = cut(values, breaks = breaks, labels = tags)
}

In [125]:
head(nodes, 3)

Unnamed: 0_level_0,country_iso3,gdp_us_dollar,inflation_rate,gdp_growth,colonizer,area,landlocked,continent,langoff_1,population,gdp_per_capita,life_expectancy,gni_atlas,agriculture_forestry_fishing_of_gdp,industry_of_gdp,merchandise_of_gdp,net_barter_of_trade,foreign_direct_investment_inflows,happiness
Unnamed: 0_level_1,<chr>,<fct>,<fct>,<fct>,<chr>,<fct>,<lgl>,<chr>,<chr>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>
1,AFG,Medium,High,Low,USA,Medium,True,Asia,Persian,Medium,Low,Low,Medium,High,Medium,Low,Medium,Low,Medium
2,AGO,Medium,High,Medium,PRT,High,False,Africa,Portuguese,Medium,Medium,Low,Medium,Medium,High,Medium,High,Low,Medium
3,ALB,Low,Medium,Medium,NONE,Low,False,Europe,Albanian,Low,Medium,Medium,Low,High,Medium,Medium,Medium,Medium,Low


In [126]:
communities_df = data.frame(
    country = louvain_partitions$names, 
    community = louvain_partitions$membership
)

In [127]:
nodes = merge(x = nodes, y = communities_df, by.x = 'country_iso3', by.y = 'country')

In [143]:
num_communities = length(unique(nodes$community))

In [144]:
get_between_within_counts = function(nodes, edges) {
    levels = c('Low', 'Medium', 'High')
    btwn_wthn_trade_counts = data.frame()
    for (i in 1:num_communities) {
        for (j in 1:num_communities) {
            for (source_level in levels) {
                for (target_level in levels) {
                    btwn_wthn_trade_counts = btwn_wthn_trade_counts %>%
                        dplyr::bind_rows(
                            data.frame(
                                source_community = i, 
                                target_community = j, 
                                source_gdp_level = source_level, 
                                target_gdp_level = target_level,
                                edge_count = count(
                                    edges %>% 
                                        dplyr::filter(
                                            source %in% nodes$country_iso3[which(nodes$community == i)] & 
                                            target %in% nodes$country_iso3[which(nodes$community == j)] & 
                                            source %in% nodes$country_iso3[which(nodes$gdp_us_dollar == source_level)] &
                                            target %in% nodes$country_iso3[which(nodes$gdp_us_dollar == target_level)]
                                        )
                                )
                            )
                        )
                }
            }
        }
    }
    return(btwn_wthn_trade_counts)
}

In [145]:
btwn_wthn_trade_counts = get_between_within_counts(nodes, edges)

In [149]:
head(btwn_wthn_trade_counts, 3)

Unnamed: 0_level_0,source_community,target_community,source_gdp_level,target_gdp_level,n
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<int>
1,1,1,Low,Low,41
2,1,1,Low,Medium,196
3,1,1,Low,High,140


In [148]:
btwn_wthn_trade_counts %>% 
    dplyr::group_by(source_community, target_community) %>%
    dplyr::summarise(sum=sum(n))

`summarise()` has grouped output by 'source_community'. You can override using the `.groups` argument.



source_community,target_community,sum
<int>,<int>,<int>
1,1,2517
1,2,972
1,3,2707
2,1,983
2,2,466
2,3,1010
3,1,2283
3,2,845
3,3,2584


In [268]:
head(btwn_wthn_trade_counts, 3)

Unnamed: 0_level_0,source_community,target_community,source_gdp_level,target_gdp_level,n
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<int>
1,1,1,Low,Low,41
2,1,1,Low,Medium,196
3,1,1,Low,High,140


In [166]:
reshaped_btwn_wthn_trade_counts = matrix(data = btwn_wthn_trade_counts$n, nrow = 9, ncol = 9)

In [167]:
rownames(reshaped_btwn_wthn_trade_counts) = paste(c(rep('Low', 3), rep('Medium', 3), rep('High', 3)), rep(c('Low', 'Medium', 'High'), 3))

In [169]:
colnames(reshaped_btwn_wthn_trade_counts) = paste(c(rep(1, 3), rep(2, 3), rep(3, 3)), rep(1:3, 3))

In [170]:
reshaped_btwn_wthn_trade_counts

Unnamed: 0,1 1,1 2,1 3,2 1,2 2,2 3,3 1,3 2,3 3
Low Low,41,8,72,4,8,4,54,17,164
Low Medium,196,50,157,33,29,38,248,60,276
Low High,140,52,101,47,19,33,237,80,179
Medium Low,218,72,421,69,46,129,156,60,367
Medium Medium,662,248,659,270,133,259,569,209,610
Medium High,398,174,298,192,83,142,419,177,313
High Low,160,64,345,64,27,130,106,40,236
High Medium,462,192,462,192,80,193,318,125,318
High High,240,112,192,112,41,82,176,77,121


In [261]:
chisq.result = chisq.test(reshaped_btwn_wthn_trade_counts)

In [264]:
chisq.result


	Pearson's Chi-squared test

data:  reshaped_btwn_wthn_trade_counts
X-squared = 1042, df = 64, p-value < 2.2e-16


In [265]:
chisq.result$observed

Unnamed: 0,1 1,1 2,1 3,2 1,2 2,2 3,3 1,3 2,3 3
Low Low,41,8,72,4,8,4,54,17,164
Low Medium,196,50,157,33,29,38,248,60,276
Low High,140,52,101,47,19,33,237,80,179
Medium Low,218,72,421,69,46,129,156,60,367
Medium Medium,662,248,659,270,133,259,569,209,610
Medium High,398,174,298,192,83,142,419,177,313
High Low,160,64,345,64,27,130,106,40,236
High Medium,462,192,462,192,80,193,318,125,318
High High,240,112,192,112,41,82,176,77,121


In [267]:
round(chisq.result$expected,2)

Unnamed: 0,1 1,1 2,1 3,2 1,2 2,2 3,3 1,3 2,3 3
Low Low,65.17,25.17,70.09,25.45,12.07,26.15,59.11,21.88,66.91
Low Medium,190.43,73.54,204.81,74.37,35.26,76.42,172.73,63.93,195.5
Low High,155.57,60.08,167.32,60.76,28.8,62.43,141.11,52.23,159.71
Medium Low,269.45,104.05,289.79,105.23,49.89,108.12,244.4,90.46,276.62
Medium Medium,634.02,244.84,681.88,247.61,117.38,254.42,575.08,212.85,650.9
Medium High,384.72,148.57,413.77,150.25,71.23,154.38,348.96,129.16,394.97
High Low,205.33,79.29,220.83,80.19,38.01,82.39,186.24,68.93,210.79
High Medium,410.3,158.45,441.27,160.24,75.96,164.64,372.16,137.75,421.22
High High,202.0,78.01,217.25,78.89,37.4,81.06,183.22,67.81,207.37
