# Analysis

## Setup

In [None]:
d = read.csv("../data/wiki/user_data.csv", header = TRUE)
library(lsr)
head(d)

In [None]:
# d <- subset(d, eigen_central < 0.6) # eliminates 2 outliers
d.admin <- subset(d, admin == "True")
d.nonadmin <- subset(d, admin == "False")
d.higheigen <- subset(d, eigen_central_bin == "True")
d.loweigen <- subset(d, eigen_central_bin == "False")

In [None]:
d.higheigen.admin <- subset(d, eigen_central_bin == "True" & admin == "True")
d.higheigen.nonadmin <- subset(d, eigen_central_bin == "True" & admin == "False")
d.loweigen.admin <- subset(d, eigen_central_bin == "False" & admin == "True")
d.loweigen.nonadmin <- subset(d, eigen_central_bin == "False" & admin == "False")

# nrow(d.loweigen.admin) # 1703
# nrow(d.higheigen.admin) # 77
# nrow(d.higheigen.nonadmin) # 120
# nrow(d.loweigen.nonadmin) # 20197

## Replicating *Centre Stage* results

Relationship between power (adminship & centrality) and coordination.

In [None]:
# Coordination received vs. eigenvector centrality
cor.test(d$eigen_central, d$coord_received_all, alternative="greater", method="spearman", exact = FALSE)

In [None]:
# Do highgly central and non-central users receive different amounts of coordination? YES
t.test(d.loweigen$coord_received_all, d.higheigen$coord_received_all)
cohensD(d.loweigen$coord_received_all, d.higheigen$coord_received_all)
# Do admins and non-admins receive different amounts of coordination? YES
t.test(d.nonadmin$coord_received_all, d.admin$coord_received_all)
cohensD(d.nonadmin$coord_received_all, d.admin$coord_received_all)
# Do hihgly central users and admins receive different amounts of coordination? NO
t.test(d.higheigen$coord_received_all, d.admin$coord_received_all)
cohensD(d.higheigen$coord_received_all, d.admin$coord_received_all)

In [None]:
# This is what we had tested in Centre Stage:
# among highy central, do admins receive more coordination? NO
t.test(d.higheigen.admin$coord_received_all, d.higheigen.nonadmin$coord_received_all)
cohensD(d.higheigen.admin$coord_received_all, d.higheigen.nonadmin$coord_received_all)
# among non-central users, do admins receive more coordination? YES
t.test(d.loweigen.admin$coord_received_all, d.loweigen.nonadmin$coord_received_all)
cohensD(d.loweigen.admin$coord_received_all, d.loweigen.nonadmin$coord_received_all)

In [None]:
# We had not tested this counterpart, however: 
# among admins, do highly central users receive more coordination? NO
t.test(d.loweigen.admin$coord_received_all, d.higheigen.admin$coord_received_all)
cohensD(d.loweigen.admin$coord_received_all, d.higheigen.admin$coord_received_all)
# among non-admins, do highly central users receive more coordination? YES
t.test(d.loweigen.nonadmin$coord_received_all, d.higheigen.nonadmin$coord_received_all)
cohensD(d.loweigen.nonadmin$coord_received_all, d.higheigen.nonadmin$coord_received_all)

-  Weak positive correlation between coordination received and centrality: r = 1.9 p < 0.001
-  More coordination received by highly central than low central (p < 0.001, d=0.2)
-  More coodination received by admins than non-admins (p < 0.001, d=0.18)
-  The amount of coordination received by admins vs highly central users is not significantly different.
-  Among highly central users, there is no significant effect of adminship
-  Among admins, there is no significant effect of centrality


## Ingroup / outgroup coordination

In [None]:
# Is there more coordination towards users in own sub-group? NO
t.test(d$coord_given_all, d$coord_given_ingroup)
t.test(d.higheigen$coord_given_all, d.higheigen$coord_given_ingroup)
t.test(d.loweigen$coord_given_all, d.loweigen$coord_given_ingroup)
t.test(d.admin$coord_given_all, d.admin$coord_given_ingroup)
t.test(d.nonadmin$coord_given_all, d.nonadmin$coord_given_ingroup)

In [None]:
# Is there more correlation received by users within own sub-group? NO
t.test(d$coord_received_all, d$coord_received_ingroup)
t.test(d.higheigen$coord_received_all, d.higheigen$coord_received_ingroup)
t.test(d.loweigen$coord_received_all, d.loweigen$coord_received_ingroup)
t.test(d.admin$coord_received_all, d.admin$coord_received_ingroup)
t.test(d.nonadmin$coord_received_all, d.nonadmin$coord_received_ingroup)

# Social power & linguistic style

In [None]:
# Correlation between centrality and linguistic style features
cor.test(d$eigen_central, d$avg_length_tokens, alternative="greater", method="spearman", exact = FALSE)
cor.test(d$eigen_central, d$italics_freq, alternative="less", method="spearman", exact = FALSE)
cor.test(d$eigen_central, d$bold_freq, alternative="less", method="spearman", exact = FALSE)
cor.test(d$eigen_central, d$link_freq, alternative="less", method="spearman", exact = FALSE)

## Slight positive correlation with post length
## Clear negative correlations for italics, bold and links

In [None]:
# Length of posts: 
t.test(d.admin$avg_length_tokens, d.nonadmin$avg_length_tokens)
cat("effect size (Cohen's d): ", cohensD(d.admin$avg_length_tokens, d.nonadmin$avg_length_tokens))
t.test(d.higheigen$avg_length_tokens, d.loweigen$avg_length_tokens)
cat("Effect size (Cohen's d): ", cohensD(d.higheigen$avg_length_tokens, d.loweigen$avg_length_tokens))

In [None]:
# Use of italics
t.test(d.admin$italics_freq, d.nonadmin$italics_freq)
cat("effect size (Cohen's d): ", cohensD(d.admin$italics_freq, d.nonadmin$italics_freq))
t.test(d.higheigen$italics_freq, d.loweigen$italics_freq)
cat("effect size (Cohen's d): ", cohensD(d.higheigen$italics_freq, d.loweigen$italics_freq))

In [None]:
# Use of boldface
t.test(d.admin$bold_freq, d.nonadmin$bold_freq)
cat("effect size (Cohen's d): ", cohensD(d.admin$bold_freq, d.nonadmin$bold_freq))
t.test(d.higheigen$bold_freq, d.loweigen$bold_freq)
cat("effect size (Cohen's d): ", cohensD(d.higheigen$bold_freq, d.loweigen$bold_freq))

In [None]:
# Use of links
t.test(d.admin$link_freq, d.nonadmin$link_freq)
cat("effect size (Cohen's d): ", cohensD(d.admin$link_freq, d.nonadmin$link_freq))
t.test(d.higheigen$link_freq, d.loweigen$link_freq)
cat("effect size (Cohen's d): ", cohensD(d.higheigen$link_freq, d.loweigen$link_freq))

### Interim summary
 
-  admins and highly-central users tend to post longer messages (p < 0.001, but low effect size: 0.08 and 0.15 for admins and highly central, respectively). There is a weak positive correlation between centrality and post length
-  admins and highly-central users use significantly fewer italics, boldface, and URLs in their posts than non-admins and non-highly-central users (p < 0.001, strong effect size of around 0.5 in all cases). There are clear negative correlations between centrality and use of italics, boldface, and links.


In [None]:
# Is there a difference in the length of posts by admins and highly central users?
t.test(d.admin$avg_length_tokens, d.higheigen$avg_length_tokens)
cat("effect size (Cohen's d): ", cohensD(d.admin$avg_length_tokens, d.higheigen$avg_length_tokens))

In [None]:
# Is there a difference in the freq of use of links by admins and highly central users?
t.test(d.admin$link_freq, d.higheigen$link_freq)
cat("effect size (Cohen's d): ", cohensD(d.admin$link_freq, d.higheigen$link_freq))

In [None]:
# Is there a difference in the freq of use of italics by admins and highly central users?
t.test(d.admin$italics_freq, d.higheigen$italics_freq)
cat("effect size (Cohen's d): ", cohensD(d.admin$italics_freq, d.higheigen$italics_freq))

In [None]:
# Is there a difference in the freq of use of boldface by admins and highly central users?
t.test(d.admin$bold_freq, d.higheigen$bold_freq)
cat("effect size (Cohen's d): ", cohensD(d.admin$bold_freq, d.higheigen$bold_freq))

In [None]:
# Admins vs non-admins within the class of highly central users

# post length: 
t.test(d.higheigen.admin$avg_length_tokens, d.higheigen.nonadmin$avg_length_tokens)
cat("effect size (Cohen's d): ", cohensD(d.higheigen.admin$avg_length_tokens, d.higheigen.nonadmin$avg_length_tokens))
# itlaics
t.test(d.higheigen.admin$italics_freq, d.higheigen.nonadmin$italics_freq)
cat("effect size (Cohen's d): ", cohensD(d.higheigen.admin$italics_freq, d.higheigen.nonadmin$italics_freq))
# boldface
t.test(d.higheigen.admin$bold_freq, d.higheigen.nonadmin$bold_freq)
cat("effect size (Cohen's d): ", cohensD(d.higheigen.admin$bold_freq, d.higheigen.nonadmin$bold_freq))
# links
t.test(d.higheigen.admin$link_freq, d.higheigen.nonadmin$link_freq)
cat("effect size (Cohen's d): ", cohensD(d.higheigen.admin$link_freq, d.higheigen.nonadmin$link_freq))

In [None]:
# Admins vs non-admins within the class of low centrality users

# post length: 
t.test(d.loweigen.admin$avg_length_tokens, d.loweigen.nonadmin$avg_length_tokens)
cat("effect size (Cohen's d): ", cohensD(d.loweigen.admin$avg_length_tokens, d.loweigen.nonadmin$avg_length_tokens))
# italics
t.test(d.loweigen.admin$italics_freq, d.loweigen.nonadmin$italics_freq)
cat("effect size (Cohen's d): ", cohensD(d.loweigen.admin$italics_freq, d.loweigen.nonadmin$italics_freq))
# boldface
t.test(d.loweigen.admin$bold_freq, d.loweigen.nonadmin$bold_freq)
cat("effect size (Cohen's d): ", cohensD(d.loweigen.admin$bold_freq, d.loweigen.nonadmin$bold_freq))
# links
t.test(d.loweigen.admin$link_freq, d.loweigen.nonadmin$link_freq)
cat("effect size (Cohen's d): ", cohensD(d.loweigen.admin$link_freq, d.loweigen.nonadmin$link_freq))

In [None]:
# High vs. low centrality with the class of admins

# post length: 
t.test(d.higheigen.admin$avg_length_tokens, d.loweigen.admin$avg_length_tokens)
cat("effect size (Cohen's d): ", cohensD(d.higheigen.admin$avg_length_tokens, d.higheigen.nonadmin$avg_length_tokens))
# itlaics
t.test(d.higheigen.admin$italics_freq, d.loweigen.admin$italics_freq)
cat("effect size (Cohen's d): ", cohensD(d.higheigen.admin$italics_freq, d.higheigen.nonadmin$italics_freq))
# boldface
t.test(d.higheigen.admin$bold_freq, d.loweigen.admin$bold_freq)
cat("effect size (Cohen's d): ", cohensD(d.higheigen.admin$bold_freq, d.higheigen.nonadmin$bold_freq))
# links
t.test(d.higheigen.admin$link_freq, d.loweigen.admin$link_freq)
cat("effect size (Cohen's d): ", cohensD(d.higheigen.admin$link_freq, d.higheigen.nonadmin$link_freq))

In [None]:
# High vs. low centrality with the class of non-admins

# post length: 
t.test(d.loweigen.nonadmin$avg_length_tokens, d.higheigen.nonadmin$avg_length_tokens)
cat("effect size (Cohen's d): ", cohensD(d.loweigen.admin$avg_length_tokens, d.loweigen.nonadmin$avg_length_tokens))
# italics
t.test(d.loweigen.nonadmin$italics_freq, d.higheigen.nonadmin$italics_freq)
cat("effect size (Cohen's d): ", cohensD(d.loweigen.admin$italics_freq, d.loweigen.nonadmin$italics_freq))
# boldface
t.test(d.loweigen.nonadmin$bold_freq, d.higheigen.nonadmin$bold_freq)
cat("effect size (Cohen's d): ", cohensD(d.loweigen.admin$bold_freq, d.loweigen.nonadmin$bold_freq))
# links
t.test(d.loweigen.nonadmin$link_freq, d.higheigen.nonadmin$link_freq)
cat("effect size (Cohen's d): ", cohensD(d.loweigen.admin$link_freq, d.loweigen.nonadmin$link_freq))

### Interim summary

-  the posts of highly-central users are not significantly longer than those of admins
-  but highly-central users use fewer URLs, italics, and boldface than admins (p < 0.001, effect size between 0.2 and 0.4)
-  within the class of users with low centrality, the effect of adminship is present (admins use fewer italics, boldface, and links: p < 0.05, with effects between 0.2 and 0.5)
-  similarly, among non-admins the effect of centrality is present.
-  However, while within the class of highly-central users, adminship does not have an effect (p > 0.05 for all linguistic style features: length, italics, bold, links), among admins, highly-central users use significantly fewer italics, boldface, and links


In [None]:
d = read.csv("../data/wiki/post_data.csv", header = TRUE)
library(lsr)
head(d)

In [None]:
plot(d$len, d$ppl, ylim=c(0,500), xlim=c(0,500))

In [None]:
d.alignedto <- subset(d, align == "True")
d.notalignedto <- subset(d, align == "False")

In [None]:
t.test(d.alignedto$ppl, d.notalignedto$ppl)
cohensD(d.alignedto$ppl, d.notalignedto$ppl)

In [None]:
head(d.notalignedto)