# Analysis

## Setup

In [2]:
d = read.csv("../data/wiki/user_data.csv", header = TRUE)
library(lsr)
head(d)

user_id,edit_count,gender,admin_ascention,admin,eigen_central,eigen_central_bin,community,coord_given_all,coord_received_all,coord_given_ingroup,coord_received_ingroup,post_count,italics_freq,bold_freq,link_freq,function_words_freq,avg_length_tokens
K-F.U.N 2,165,male,,False,3.761916e-07,False,0,,,,,1,,,,,1.0
JFreeman,9270,unknown,,False,0.001671275,False,1,,0.0,,,1,,,,28.0,4.0
Extcetc,27,unknown,,False,4.017304e-06,False,2,,0.0,,,1,,,,4.0,2.0
Merteselle,295,unknown,,False,4.261994e-08,False,3,,0.0,,,1,,,,2.0,2.0
The Sunshine Man,0,unknown,,False,6.056635e-07,False,9,0.0,0.0,,,3,,,,27.33333,3.333333
Kuebie,1607,unknown,,False,5.176361e-05,False,5,0.08333333,0.11875,,,9,,,0.1111111,23.77778,4.0


In [3]:
d.admin <- subset(d, admin == "True")
d.nonadmin <- subset(d, admin == "False")
d.higheigen <- subset(d, eigen_central_bin == "True")
d.loweigen <- subset(d, eigen_central_bin == "False")

## Replicating *Centre Stage* results

Relationship between power (adminship & centrality) and coordination.

In [4]:
# Coordination received vs. eigenvector centrality
cor.test(d$eigen_central, d$coord_received_all, alternative="greater", method="spearman", exact = FALSE)


	Spearman's rank correlation rho

data:  d$eigen_central and d$coord_received_all
S = 8.921e+11, p-value < 2.2e-16
alternative hypothesis: true rho is greater than 0
sample estimates:
      rho 
0.1898093 


In [6]:
t.test(d.admin$coord_received_all, d.higheigen$coord_received_all)


	Welch Two Sample t-test

data:  d.admin$coord_received_all and d.higheigen$coord_received_all
t = -0.99532, df = 412.59, p-value = 0.3202
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.008057243  0.002640557
sample estimates:
 mean of x  mean of y 
0.01898618 0.02169453 


## Ingroup / outgroup coordination

In [7]:
t.test(d$coord_given_all, d$coord_given_ingroup)



	Welch Two Sample t-test

data:  d$coord_given_all and d$coord_given_ingroup
t = -1.0902, df = 343.58, p-value = 0.2764
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.01283783  0.00368176
sample estimates:
  mean of x   mean of y 
0.005561945 0.010139981 


In [8]:
t.test(d$coord_received_all, d$coord_received_ingroup)


	Welch Two Sample t-test

data:  d$coord_received_all and d$coord_received_ingroup
t = 0.90586, df = 542.45, p-value = 0.3654
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.002468075  0.006692489
sample estimates:
  mean of x   mean of y 
0.006075535 0.003963327 


# Social power & linguistic style

In [9]:
t.test(d.admin$avg_length_tokens, d.nonadmin$avg_length_tokens)
t.test(d.higheigen$avg_length_tokens, d.loweigen$avg_length_tokens)


	Welch Two Sample t-test

data:  d.admin$avg_length_tokens and d.nonadmin$avg_length_tokens
t = 5.7325, df = 2998.1, p-value = 1.088e-08
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.09953836 0.20302950
sample estimates:
mean of x mean of y 
 3.228846  3.077562 



	Welch Two Sample t-test

data:  d.higheigen$avg_length_tokens and d.loweigen$avg_length_tokens
t = 3.6608, df = 206.98, p-value = 0.0003192
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.1299045 0.4331084
sample estimates:
mean of x mean of y 
 3.366401  3.084895 


In [10]:
t.test(d.admin$italics_freq, d.nonadmin$italics_freq)
t.test(d.higheigen$italics_freq, d.loweigen$italics_freq)


	Welch Two Sample t-test

data:  d.admin$italics_freq and d.nonadmin$italics_freq
t = -27.07, df = 3130.7, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.1722308 -0.1489658
sample estimates:
mean of x mean of y 
0.1459422 0.3065405 



	Welch Two Sample t-test

data:  d.higheigen$italics_freq and d.loweigen$italics_freq
t = -15.736, df = 226.93, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.1691757 -0.1315231
sample estimates:
mean of x mean of y 
0.1132400 0.2635894 


In [11]:
t.test(d.admin$bold_freq, d.nonadmin$bold_freq)
t.test(d.higheigen$bold_freq, d.loweigen$bold_freq)


	Welch Two Sample t-test

data:  d.admin$bold_freq and d.nonadmin$bold_freq
t = -2.7144, df = 168.98, p-value = 0.007329
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.13496216 -0.02130944
sample estimates:
mean of x mean of y 
0.1026712 0.1808070 



	Welch Two Sample t-test

data:  d.higheigen$bold_freq and d.loweigen$bold_freq
t = -9.3721, df = 301.14, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.1809507 -0.1181483
sample estimates:
 mean of x  mean of y 
0.01554881 0.16509832 


In [12]:
t.test(d.admin$link_freq, d.nonadmin$link_freq)
t.test(d.higheigen$link_freq, d.loweigen$link_freq)


	Welch Two Sample t-test

data:  d.admin$link_freq and d.nonadmin$link_freq
t = -9.6584, df = 800, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.12999727 -0.08608217
sample estimates:
 mean of x  mean of y 
0.03408172 0.14212144 



	Welch Two Sample t-test

data:  d.higheigen$link_freq and d.loweigen$link_freq
t = -10.81, df = 697.03, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.10907081 -0.07554059
sample estimates:
 mean of x  mean of y 
0.01707753 0.10938323 
