In [1]:
# Libraries
library(tidyverse)
library(viridis)
devtools::install_github("thomasp85/patchwork")
devtools::install_github("jokergoo/circlize")

library(patchwork)
library(circlize)
library(readxl)

# ### Load the package or install if not present
# if (!require("RColorBrewer")) {
install.packages("RColorBrewer")
library(RColorBrewer)
# }

“running command 'timedatectl' had status 1”
── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.4     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: viridisLite

Skipping install of 'patchwork' from a github remote, the SHA1 (79223d30) has not changed since last install.
  Use `force = TRUE` to force installation

Skipping install of 'circlize' from a github remote, the SHA1 (620a1ad8) has not changed since last install.
  Use `fo

In [2]:
##### Set up #####

# Define the names of the collaboration survey data and the Primary Category/group data
collab_excel_file = "DMCBH Members Survey 2020_as of September 20, 2021.xlsx"
group_excel_file = "EDITED_Primary_Category_for_each_PI.xlsx"


# Define the title, Primary categroy names, and the colour pallete you want to use 
# Colors palletes: https://www.r-graph-gallery.com/38-rcolorbrewers-palettes.html
title = "Brain Injury & Repair"
Primary_c = c("Mental Health & Addictions",
              "Brain Development & Neurodevelopmental Disorders",
              "Learning/ Memory & Dementias",
              "Sensory/ Motor Systems & Movement Disorders",
              "Brain Injury & Repair")
c_pallete <- brewer.pal(12,"Paired")

# load in collaboration survey data
df = read_xlsx(collab_excel_file)


# renaming the columns that include the first and last names of participants for simplicity
df = rename(df, first_name = Q36_1, last_name = Q36_2)

# # Use this line of code for publication collaborations OR active collaborations
# # This line of code must be altered depending on if you want to see the publications or 
# # the active collaborators by using Q4 for publications and Q7_2 for active collaborations
#  df = rename(df, collab = Q7_2)

# Use this instead of above for combined publication and active collabs. 
df$combined = paste(df$Q4,df$Q7_2)
df = rename(df, collab = combined)

# creating a subset of our survey data that extracts the useful columns. 
df_collab = subset(df, select = c(first_name, last_name, collab))

# remove the uneccesary first row and get rid of rows containing NAs
df_collab = df_collab[-c(1),]
df_collab=df_collab[rowSums(is.na(df_collab)) != ncol(df_collab), ]

##### create an edge list using for loop. ####
origin = c()
destination = c()

for (i in 1:nrow(df_collab)) {
  x = df_collab$last_name[i]
  y = df_collab$first_name[i]
  for (n in 1:nrow(df_collab)) {
    if(is.na(df_collab$collab[n]) == FALSE) {
      if(str_detect(df_collab$collab[n], x) == TRUE) {
        origin = append(origin, paste(paste(substr(df_collab$first_name[n], 1, 1),
                                            ".", sep = ""), df_collab$last_name[n]))
        destination = append(destination, paste(paste(substr(y, 1, 1), ".", sep = ""), x)) 
      }
    }
  }
}

edge_l = data.frame(origin, destination)

# cleaning up the edge list by removing duplicates
edge_l = unique(edge_l)
edge_l$temp = apply(edge_l, 1, function(x) paste(sort(x), collapse=""))
edge_l = edge_l[!duplicated(edge_l$temp), 1:2]



##### download and set up the group names #####
# loading in the group names
df_group = read_xlsx(group_excel_file)

# creating a group column by pivoting 
df_group = df_group %>%
  pivot_longer(Primary_c,
               names_to = "group",
               values_to = "junk")


df_group = na.omit(df_group)


# Assigning a colour to each group. 
color =  c()

for (i in 1:nrow(df_group)) {
    j=i%%12
    if(j==0){
        j= 12
        }
      color = append(color,c_pallete[j])
}

# adding the color column to the dataframe.
df_group$color = color
                    
    

##### Integrating the grouping data into the collaboration data #####

# Making the naming of the groups dataframe the same the naming 
#of the collaboration dataframe
df_group$name = paste(substr(df_group$`First Name`, 1, 1), df_group$`Last Name`, sep=". ")
df_group =  subset(df_group, select = c("name", "group"
                                         ,"color"
                                       ))
# !!! Filter for subgroup
# This line of code should be modified depending on the intended subgroup.
# Subgroups are: "Mental Health & Addictions",
#                "Brain Development & Neurodevelopmental Disorders",
#                "Learning/ Memory & Dementias",
#                "Sensory/ Motor Systems & Movement Disorders",
#                "Brain Injury & Repair"
df_group = filter(df_group, group == "Brain Injury & Repair")

# removing any names from our group list not found in the collaboration list.
all_edges = data.frame(stack(edge_l))
nodes = semi_join(df_group, all_edges, by = c("name"="values"))
nodes = df_group
                    

# Removing any colaborations that involve names not found on the group list. 
links = semi_join(edge_l, nodes, by = c("origin"="name"))
links = semi_join(links, nodes, by = c("destination"="name"))

# Creating a data frame that counts the number of times a person appears in the links data frame                    
names = c(nodes$name)
count = c()
for (i in 1:length(names)){
    count[i]=sum(links$origin == names[i]) + sum(links$destination == names[i])
}
link_count= data.frame(names, count)


                    
# add a self-link for PIs with no links.
for(i in 1:nrow(nodes)){
    if (is.na(match(nodes$name[i], c(links$origin, links$destination)))) {
        links = rbind(links, c(nodes$name[i], nodes$name[i]))
     }
}
                    
# Creating a data frame that counts the number of times a PI appears in the links data frame                    
names = c(nodes$name)
count = c()
for (i in 1:length(names)){
    count[i]=sum(links$origin == names[i]) + sum(links$destination == names[i])
}
link_count= data.frame(names, count)
                    
# cleaning up the edge list by removing self connections for nodes with more than one connection**
# because if we remove a self connection for a node that only has a self-connection, they will be removed from the diagram.\# Colouring remaining self connections (representing people with no connections) white.  
# Colouring remaining self connections (representing people with no connections) white.  
i=1
while (i<=nrow(links)) {
       if (identical(links$origin[i],links$destination[i])) {
           if (!is.na(count[match(links$origin[i], names)]) && count[match(links$origin[i], names)]!=2) {
               links = links[-i,]
               i=i-1
           }
             else if(!is.na(count[match(links$origin[i], names)]) && count[match(links$origin[i], names)]==2){
                 nodes$color[which(df_group[,1] == links$origin[i])] = "white"
              }
      }
    i=i+1
}

                    
# cleaning up nodes
nodes = data.frame(nodes)
                    

# creating the groupings
group_ind = structure(nodes$group, names = nodes$name)

# creating colors for the groupings
color_ind = structure(nodes$color, names = nodes$name)

# create an adjacency list. 
adjacencyData = data.frame(with(links, table(origin, destination)))
                    

##### creating the chord diagram #####

# !!! Modify the name of the output PDF
pdf("BrainInjury_CombinedCollab-Oct4.pdf") 

# set up the parameters
circos.clear()
circos.par(start.degree = 90,gap.degree = 1, 
           track.margin = c(-0.1, 0.1), 
           points.overflow.warning = FALSE, canvas.xlim = c(-1.3,1.3),
           canvas.ylim = c(-1.3,1.3))
par(mar = c(0,0,2,0),xpd = TRUE, cex.main = 1.5)

# create the chord diagram
chordDiagram(adjacencyData, group = group_ind, 
              grid.col = color_ind,
             transparency = 0.25,
             diffHeight  = -0.04,
             annotationTrack = "grid", 
             annotationTrackHeight = c(0.05, 0.1),
             link.sort = TRUE, 
             link.largest.ontop = FALSE,
              self.link = 1, 
              small.gap = 1,
              big.gap = 1
)
                    

# Add the text and the axis surrounding the diagram.
circos.trackPlotRegion(
  track.index = 1, 
  bg.border = NA, 
  panel.fun = function(x, y) {
    
    xlim = get.cell.meta.data("xlim")
    sector.index = get.cell.meta.data("sector.index")
    
    # Add names to the sector. 
    circos.text(
      x = mean(xlim), 
      y = 5.2, 
      labels = sector.index, 
      facing = "clockwise", 
      niceFacing = TRUE,
      cex = 0.7,
    )
    
     #Add graduation on axis
    circos.axis(
      h = "top", 
      labels.cex = 0.001,
      minor.ticks = 2, 
      major.tick.length = 0.1, 
      labels.niceFacing = FALSE)
      
  }
)

# Add a title
title(title,outer=FALSE)
                    
                    
dev.off()
                    


Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(Primary_c)` instead of `Primary_c` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m

