### Loading the dataset containing the export data alongside with the nodal attribute available on each country

In [1]:
edges = read.csv('../datasets/wits/simulated/edgelist.csv')
nodes = read.csv('../datasets/wits/simulated/nodelist.csv')

In [2]:
n = dim(nodes)[1]
N = 1000
country_names = nodes$country_iso3
nodes = transform(nodes, landlocked=as.logical(landlocked))
nodes = subset(nodes, select = -c(population, area, gdp_per_capita))

### Importing the library required for doing permutation tests

In [3]:
library(coin)
library(sna)
library(ergm)
library(dplyr)

Loading required package: survival

Loading required package: statnet.common


Attaching package: 'statnet.common'


The following object is masked from 'package:base':

    order


Loading required package: network

network: Classes for Relational Data
Version 1.16.1 created on 2020-10-06.
copyright (c) 2005, Carter T. Butts, University of California-Irvine
                    Mark S. Handcock, University of California -- Los Angeles
                    David R. Hunter, Penn State University
                    Martina Morris, University of Washington
                    Skye Bender-deMoll, University of Washington
 For citation information, type citation("network").
 Type help("network-package") to get started.


sna: Tools for Social Network Analysis
Version 2.6 created on 2020-10-5.
copyright (c) 2005, Carter T. Butts, University of California-Irvine
 For citation information, type citation("sna").
 Type help(package="sna") to get started.



Attaching package: 'sna'


The followin

In [4]:
head(nodes, 3)

Unnamed: 0_level_0,country_iso3,industry_of_gdp,merchandise_of_gdp,continent,landlocked,agriculture_forestry_fishing_of_gdp,gdp_growth,gdp_us_dollar,langoff_1,foreign_direct_investment_inflows,inflation_rate,net_barter_of_trade
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<chr>,<lgl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
1,AFG,22.74025,38.70415,Asia,True,23.743664,0.4263548,17804292964,Persian,52173421,11.804186,144.7519
2,AGO,56.02651,78.30597,Africa,False,5.845681,3.4719763,111789686464,Portuguese,-3023770966,13.482468,244.3292
3,ALB,24.48441,56.86241,Europe,False,18.226765,2.5453218,12890765324,Albanian,1048706682,3.429123,94.3372


In [5]:
head(edges, 3)

Unnamed: 0_level_0,source,target,weight
Unnamed: 0_level_1,<chr>,<chr>,<dbl>
1,AFG,TKM,0.189
2,ALB,SLE,22.012
3,ALB,KEN,10.836


### Examining the effect of homophily on the network of trades

In [6]:
count_homophily = function(edges, nodes, attrs, ...) {
    count = 0
    for (i in 1:dim(edges)[1]) {
        if (attrs[nodes$country_iso3 == edges$source[i]] == attrs[nodes$country_iso3 == edges$target[i]]) {
            count = count + 1
        }
    }
    return(count)
}

In [7]:
execute_permutation_test = function(edges, nodes, func, attrs, attr, ...) {
    params = list(...)
    observed_statistic = func(edges, nodes, attrs, params)

    
#     generating new samples by permutating the attributes
    permutated_samples = matrix(0, nrow = n, ncol = N)
    for (i in 1:N) {
        permutated_samples[, i] = sample(x = attrs, size = n, replace = F)
    }
    
#    computing the specified statistic for all the permutated networks
    permutated_statistics = matrix(0, nrow = N, ncol = 1)
    for (i in 1:N) {
        permutated_statistics[j] = func(edges, nodes, permutated_samples[, i], params)
    }
    
    p_value = mean(permutated_statistics < observed_statistic)
    if (p_value > .5) {
        p_value = 1 - p_value
    }
    
    print(paste(attr, ',', p_value))
    pdf(paste('../results/', attr, 'residuals.pdf'))
    par(bg = 'white')
    hist(
        permutated_statistics, 
        xlim=c(min(min(permutated_statistics), observed_statistic), max(max(permutated_statistics), observed_statistic)), 
        main = paste('Comparison of the observed statistic and \npermutated ones with respect to \n', attr, 'with p-value: ', p_value)
    )
    abline(v=observed_statistic, col='red')
    dev.off()
    return(data.frame(
        effect = attr,
        p_value = p_value
    ))
}

In [8]:
categories = !unlist(lapply(nodes, FUN = is.numeric))
categories[attr(categories, 'names') == 'country_iso3'] = FALSE
categories = colnames(nodes[, categories])

In [9]:
homophily_results = data.frame()
for (col in categories) {
    homophily_results = homophily_results %>%
        dplyr::bind_rows(
            execute_permutation_test(
                edges,
                nodes,
                count_homophily,
                nodes[, col],
                paste('homophily ', as.character(col))
            )
        )
}

[1] "homophily  continent , 0"
[1] "homophily  landlocked , 0"
[1] "homophily  langoff_1 , 0"


### Examining the effect of closeness

In [10]:
compute_nodecov = function(edges, nodes, attrs, ...) {
#     log transformation parameter
    params = list(...)
    attrs = attrs - mean(attrs)
    sum = 0
    for (i in 1:dim(edges)[1]) {
        if (as.logical(params[[1]])) {
            sum = sum + log(attrs[nodes$country_iso3 == edges$source[i]] + attrs[nodes$country_iso3 == edges$target[i]])
        } else {
            sum = sum + attrs[nodes$country_iso3 == edges$source[i]] + attrs[nodes$country_iso3 == edges$target[i]]
        }
    }
    return(sum)
}

In [11]:
compute_absdiff = function(edges, nodes, attrs, ...) {
#     log transformation parameter
    params = list(...)
    attrs = attrs - mean(attrs)
    diff_sum = 0
    for (i in 1:dim(edges)[1]) {
        if (as.logical(params[[1]])) {
            diff_sum = diff_sum + log(1 + abs(attrs[nodes$country_iso3 == edges$source[i]] - attrs[nodes$country_iso3 == edges$target[i]]))
        } else {
            diff_sum = diff_sum + abs(attrs[nodes$country_iso3 == edges$source[i]] - attrs[nodes$country_iso3 == edges$target[i]])
        }
    }
    return(diff_sum)
}

In [12]:
numerical_homophily = function(edges, nodes, attrs, ...) {
    m = .5 * length(edges)
    attrs = attrs - mean(attrs)
    result = 0
    for (i in 1:dim(edges)[1]) {
        result = result + attrs[which(nodes$country_iso3 == edges$source[i])] * attrs[which(nodes$country_iso3 == edges$target[i])]
    }
    result = result / 2 / m / sum(attrs**2) * n
    return(result)
}

In [13]:
nums <- unlist(lapply(nodes, is.numeric))
nums = colnames(nodes[, nums])
nodes_numerical = nodes[, nums]

In [14]:
nodecov_results = data.frame()
absdiff_results = data.frame()
for (i in 1:length(nums)) {
    f = paste(nums[i], "~", paste(nums[-i], collapse=" + "))
    linear.model = lm(f, data = nodes_numerical)
    nodecov_results = nodecov_results %>%
        dplyr::bind_rows(
            execute_permutation_test(
                edges,
                nodes,
                compute_nodecov,
                linear.model$residuals,
                paste('nodecov ', as.character(nums[i])),
                FALSE
            )
        )
    absdiff_results = absdiff_results %>%
        dplyr::bind_rows(
            execute_permutation_test(
                edges,
                nodes,
                compute_absdiff, 
                linear.model$residuals,
                paste('absdiff ', as.character(nums[i])), 
                FALSE
            )
        )
}

[1] "nodecov  industry_of_gdp , 0.413"
[1] "absdiff  industry_of_gdp , 0.304"
[1] "nodecov  merchandise_of_gdp , 0.439"
[1] "absdiff  merchandise_of_gdp , 0.29"
[1] "nodecov  agriculture_forestry_fishing_of_gdp , 0.251"
[1] "absdiff  agriculture_forestry_fishing_of_gdp , 0.116"
[1] "nodecov  gdp_growth , 0.124"
[1] "absdiff  gdp_growth , 0.039"
[1] "nodecov  gdp_us_dollar , 0.016"
[1] "absdiff  gdp_us_dollar , 0"
[1] "nodecov  foreign_direct_investment_inflows , 0.06"
[1] "absdiff  foreign_direct_investment_inflows , 0"
[1] "nodecov  inflation_rate , 0.131"
[1] "absdiff  inflation_rate , 0.476"
[1] "nodecov  net_barter_of_trade , 0.393"
[1] "absdiff  net_barter_of_trade , 0.177"


### Transform numerical data to categorical and do homophily test

In [15]:
numerical_homophily_results = data.frame()
for (i in 1:length(nums)) {
    f = paste(nums[i], "~", paste(nums[-i], collapse=" + "))
    linear.model = lm(f, data = nodes_numerical)
    values = linear.model$residuals
    numerical_homophily_results = numerical_homophily_results %>%
        dplyr::bind_rows(
            execute_permutation_test(
                edges,
                nodes,
                numerical_homophily,
                values,
                paste('numerical homophily ', as.character(nums[i]))
            )
        )
}

[1] "numerical homophily  industry_of_gdp , 0.396"
[1] "numerical homophily  merchandise_of_gdp , 0.336"
[1] "numerical homophily  agriculture_forestry_fishing_of_gdp , 0"
[1] "numerical homophily  gdp_growth , 0.257"
[1] "numerical homophily  gdp_us_dollar , 0.239"
[1] "numerical homophily  foreign_direct_investment_inflows , 0.327"
[1] "numerical homophily  inflation_rate , 0.056"
[1] "numerical homophily  net_barter_of_trade , 0.199"


In [16]:
all_results = rbind(
    nodecov_results, 
    absdiff_results, 
    homophily_results, 
    numerical_homophily_results
)

In [17]:
write.csv(all_results, '../results/residual_permutation_tests.csv', row.names=F)