## I. Modules

In [1]:
library(mongolite) 
library(jsonlite)
library(data.table)
library(dplyr) 
library(tidyr)
library(readr)
library(stringi)
library(plotly)

"package 'dplyr' was built under R version 3.6.3"
Attaching package: 'dplyr'

The following objects are masked from 'package:data.table':

    between, first, last

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

"package 'readr' was built under R version 3.6.3"Loading required package: ggplot2
Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout



## process_human.R

In [2]:

# PART 1. Read data from MongoDB
#-------------------------------------------------------------------------------
search_individuals <- function(db, vec){
   # Input
   #  db: MongoDB object
   #  vec: a vector of individualCode
   # Output:
   #  df
   # Usage:
   # df <- search_individuals(db, vec=c('CPI555', 'CPI515'))
   
   vec <- toJSON(vec)
   query = paste0('{"individualCode": {"$in": ', vec, '}}')
   df <- db$find(query)
   return (df)    
}


search_individuals_nin <- function(db, vec){
   # Input
   #  db: MongoDB object
   #  vec: a vector of individualCode
   # Output:
   #  df
   # Usage:
   # df <- search_individuals_nin(db, vec=c('CPI555', 'CPI515'))
   
   vec <- toJSON(vec)
   query = paste0('{"individualCode": {"$nin": ', vec, '}}')
   df <- db$find(query)
   return (df)    
}


search_all_control <- function(db, 
      query='{"individualCode": {"$regex" : "^HBD|^APOC", "$options" : "i"}}'){
   # Input
   #  db: MongoDB object
   #  query: a query to get all individualCode of control
   # Output:
   #  df
   # Usage:
   #  df <- search_all_control(db, 
   #     query='{"individualCode": {"$regex" : "^HBD|^APOC", "$options" : "i"}}')
   
   df <- db$find(query)
   return (df)    
}


search_control_via_runId <- function(db, vec_runIds){
   # Input
   #  db: MongoDB object
   #  vec_runIds: a vector of runId
   # Output:
   #  df
   # Usage:
   #  df <- search_control_via_runId(db, vec_runIDs = c('CPI_20181031','CPI_20201104'))
   #     
   
   if (length(vec_runIds) == 0){ 
      df <- data.frame()
      return (df)
   } else {
      query_control_study_code <- '{"individualCode": {"$regex" : "^HBD|^APOC", "$options" : "i"}'
      
      runIds_str <- create_str_dQuotes(vec = vec_runIds)
      query_runIds <- paste('"runId": {"$in": [', runIds_str, ']} }')
      
      query = paste(query_control_study_code, query_runIds, sep = ",")
      df <- db$find(query)
      return (df) 
   }
}

## heatmap.R

In [3]:
# PART 1: Dataframe manipulation
#-------------------------------------------------------------------------------
transform_df <- function(df_one_study_code){
   # Input 
   #  df_one_study_code: a dataframe of only one study_code
   # Output: 
   #  new dataframe after transforming df_one_study_code
   # Note: meaning of study_code and individualCode is the same.
   # Usage:
   #  df <- search_individuals(db, vec=c('CPI203', 'CPI248', 'CPI515'))
   #  df_one_study_code <- df %>%
   #                          filter(individualCode == 'CPI515')
   # df_markers <- transform_df(df_one_study_code)
   # Example of df_markers
   #     name	    value	studyCode
   #  NK (%LC)	    22.13	CPI515
   #  NK- 1 (%LC)	 0.75	   CPI515
   
   if (dim(df_one_study_code)[1] == 0){ # Empty dataframe
      return(df_one_study_code)
   }
   
   study_code <- unique(df_one_study_code$individualCode)[1]
   
   list_Samples <- df_one_study_code$Samples
   df_Samples <- rbindlist(list_Samples, use.names=TRUE, fill=TRUE)
   list_markers <- df_Samples$markers
   df_markers <- rbindlist(list_markers, use.names=TRUE, fill=TRUE)
   df_markers$studyCode <- study_code
   
   # remove "confidence" and "interpretation" columns
   df_markers <- subset(df_markers, select = -c(confidence, interpretation))
   return (df_markers)
}


filter_transform_df <- function(study_code, df){
   # Input
   #  study_code: one study code. Eg. study_code = 'CPI515'
   #  df: a dataframe
   # Output:
   #  new df
   # Usage:
   # df <- search_individuals(db, vec=c('CPI203', 'CPI248', 'CPI515'))
   # df_filter <- filter_transform_df(study_code = 'CPI515', df = df)
   # Example of df_filter
   #     name	    value	studyCode
   #  NK (%LC)	    22.13	CPI515
   #  NK- 1 (%LC)	 0.75	   CPI515
   
   if (dim(df)[1] == 0){ 
      return(df)
   }
   df_filter <- df %>%
      filter(individualCode == study_code) %>%
      transform_df()
   return (df_filter)
}


concat_df <- function(study_codes, df){
   # Input
   #  study_codes: a vector. Eg. study_codes <- c('CPI248', 'CPI515')
   #  df: a dataframe
   # Output:
   #  new df after transforming and concat
   # Usage:
   #   df <- search_individuals(db, vec=c('CPI203', 'CPI248', 'CPI515'))
   #   study_codes <- c('CPI248', 'CPI515')
   #   df_concat <- concat_df(study_codes, df)
   # Example of df_concat
   #   name	        value	  studyCode
   #   NK (%LC)	  22.13	  CPI515
   #   NK- 1 (%LC)  0.75	  CPI515
   # Note that for one study code, we may have several value of a marker
   # with different runs (it sometimes happens in control group but not much
   # in treatment one). For instance:
   #   name	value	studyCode
   #   NK- 1 (%LC)	1.35	HBD001
   #   NK- 1 (%LC)	1.06	HBD001
   #   NK- 1 (%LC)	1.13	HBD001
   #   NK- 1 (%LC)	1.15	HBD001
   #   Classical Monocytes(%APC)	47.9	HBD001
   
   
   list_dfs <- lapply(study_codes, filter_transform_df, df)
   df_concat <- do.call("rbind", list_dfs)
   return (df_concat)
}


concat_agg_mean_df <- function(study_codes, df){
   # Input
   #  study_codes: a vector. Eg. study_codes <- c('CPI248', 'CPI515')
   #  df: a dataframe
   # Output:
   #  new df
   # Usage:
   #   df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))
   #   study_codes <- c('APO014', 'CPI248', 'CPI515')
   #   df_agg <- concat_agg_mean_df(study_codes, df)
   # Usage:
   #   df <- search_individuals(db, vec=c('HBD001'))
   #   study_codes <- c('HBD001')
   #   df_agg <- concat_agg_mean_df(study_codes, df)
   # Example of df_agg
   #   studyCode	name	       value
   #   HBD001	   NK- 1 (%LC)	 1.1725 <- mean of the example at concat_df()
   #   HBD001	   Bm (%B)	    5.79
   
   list_dfs <- lapply(study_codes, filter_transform_df, df)
   df_agg <- bind_rows(list_dfs) %>%
      group_by_at(vars(one_of(c( "studyCode", "name")))) %>%
      summarize(value = mean(value))            
   return (df_agg)
}


concat_pivot_df_DEPRECATED <- function(study_codes, df){
   # Note: it is DEPRECATED: to be removed
   # Input
   #  study_codes: a vector. Eg. study_codes <- c('CPI248', 'CPI515')
   #  df: a dataframe
   # Output:
   #  new df
   # Usage:
   #   df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))
   #   study_codes <- c('APO014', 'CPI248', 'CPI515')
   #   df_pivot <- concat_pivot_df(study_codes, df)
   
   list_dfs <- lapply(study_codes, filter_transform_df, df)
   df_pivot <- bind_rows(list_dfs) %>%
      pivot_wider(names_from = studyCode, values_from = value)    
   return (df_pivot)
}


concat_pivot_df_name <- function(study_codes, df){
   # Input
   #  study_codes: a vector. Eg. study_codes <- c('CPI248', 'CPI515')
   #  df: a dataframe
   # Output:
   #  new df
   # Usage:
   #   df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))
   #   study_codes <- c('APO014', 'CPI248', 'CPI515')
   #   df_pivot <- concat_pivot_df_name(study_codes, df)
   # Ref: https://rdrr.io/github/tidyverse/tidyr/man/pivot_wider.html
   # Note: Add id column to dataframe to have a unique pivot for each cell
   #       instead of aggregate a mean
   
   list_dfs <- lapply(study_codes, filter_transform_df, df)
   df_all <- bind_rows(list_dfs)
   df_all <- cbind(df_all, id = as.numeric(rownames(df_all)))
   df_pivot <- pivot_wider(data = df_all,
                           id_cols = c(id, studyCode),
                           names_from = name, 
                           values_from = value, 
                           ## values_fn = mean # do not need
   )    
   return (df_pivot)
}


concat_agg_mean_pivot_df_studyCode <- function(study_codes, df){
   # Input
   #  study_codes: a vector. Eg. study_codes <- c('CPI248', 'CPI515')
   #  df: a dataframe
   # Output:
   #  new df
   # Usage:
   #   df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))
   #   study_codes <- c('APO014', 'CPI248', 'CPI515')
   #   df_pivot <- concat_agg_mean_pivot_df_studyCode(study_codes, df)
   
   list_dfs <- lapply(study_codes, filter_transform_df, df)
   df_pivot <- bind_rows(list_dfs) %>%
      group_by_at(vars(one_of(c( "studyCode", "name")))) %>%
      summarize(value = mean(value)) %>%  
      pivot_wider(names_from = studyCode, values_from = value)    
   return (df_pivot)
}


concat_agg_mean_pivot_df_name <- function(study_codes, df){
   # Input
   #  study_codes: a vector. Eg. study_codes <- c('CPI248', 'CPI515')
   #  df: a dataframe
   # Output:
   #  new df
   # Usage:
   #   df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))
   #   study_codes <- c('APO014', 'CPI248', 'CPI515')
   #   df_pivot <- concat_agg_mean_pivot_df_name(study_codes, df)
   
   list_dfs <- lapply(study_codes, filter_transform_df, df)
   df_pivot <- bind_rows(list_dfs) %>%
      group_by_at(vars(one_of(c( "studyCode", "name")))) %>%
      summarize(value = mean(value)) %>%  
      pivot_wider(names_from = name, values_from = value)    
   return (df_pivot)
}


# PART 2: Percentile calculation and normalization
#-------------------------------------------------------------------------------
# length of vector without NA
len <- function(x) { 
   # Input: 
   #  x: a vector
   # Output:
   #  length of x without NA
   # Usage:
   #  len(x = c(10, 20, NA, 99.99, NA))  -> Output: 3
   return (length(x[!is.na(x)])) 
} 


# percentile by JCSMR
percentile_JCSMR <- function(vec){
   # Input: 
   #  vec: a vector
   # Output:
   #  JCSMR percentile of vec
   # Usage:
   #  vec = c(50.0, 80.0, 90.0, 119.0, 119.0, 120, NA)
   #  percentiles <- percentile_JCSMR(vec)
   
   percentiles <- rank(vec, na.last = 'keep', ties.method = c("average")) * (1. / (len(vec) + 1))
   return (percentiles)
}

# common percentile
percentile <- function(vec){
   # Input: 
   #  vec: a vector
   # Output:
   #  JCSMR percentile of vec
   # Usage:
   #  vec = c(50.0, 80.0, 90.0, 119.0, 119.0, 120, NA)
   #  percentiles <- percentile(vec)
   
   percentiles <- rank(vec, na.last = 'keep', ties.method = c("average")) * (1. / len(vec))
   return (percentiles)    
}


# Calculate percentile of a value w.r.t a vector
percentile_norm <- function(value, vec, method='JCSMR'){
   # Input: 
   #  value:  a value to calculate its percentile w.r.t vec
   #  vec: a vector of values
   #  method: 'JCSMR' or a normal way
   # Output:
   #  percentile of value
   # Usage:
   #  vec = c(50.0, 80.0, 90.0, 119.0, 119.0, 120, NA)
   #  per <- percentile_norm(39, vec, method='JCSMR') -> output=0
   #  per <- percentile_norm(90, vec, method='JCSMR') -> output=0.49
   #  per <- percentile_norm(120, vec, method='JCSMR') -> output=0.857
   #  per <- percentile_norm(121, vec, method='JCSMR') -> output=1
   
   # method == 'JCSMR' or a normal way
   if (method == 'JCSMR'){
      epsilon <- 1e-6
      maxVal <- max(vec, na.rm = TRUE)
      vec <- c(vec, maxVal + epsilon)
   }   
   return (ecdf(vec)(value))   
} 


# normalize percentile one column
norm_perc_one_col <- function(col_name, df_control, df_data){
   # Input: 
   #  col_name: A colunm name exists in both dataframes
   #  df_control: A dataframe of control
   #  df_data: A dataframe of treatment
   # Output:
   #  df_data in which col_name has been norm percentile
   # Usage:
   #  df_data <- norm_perc_one_col(col_name, df_control, df_data)
   
   df_data[[col_name]] <- unlist(lapply(df_data[[col_name]], percentile_norm, df_control[[col_name]]))
   return (df_data)
}


# normalize percentile of a df w.r.t other df
normalize_percentile <- function(df_control, df_data, col_not_cal_per="studyCode"){
   # Input: 
   #  df_control: A dataframe of control
   #  df_data: A dataframe of treatment
   #  col_not_cal_per: A colunm name to be ignore when calculating percentile
   # Output:
   #  data: a R list (a Python dict)
   #  Access to: 
   #     df_data: data$df
   #     columns (calculate percentile by itself): data$cols_cal_self_percentile
   # Usage:
   #  data <- normalize_percentile(df_control, df_data, col_not_cal_per)
   
   # columns existed both in df_control and df_data
   common_cols <- intersect(names(df_control), names(df_data))
   # Remove not related col 
   common_cols <- common_cols[common_cols != col_not_cal_per]
   
   # If df_control[col] has less than 2 real values, it cannot use for percentile_norm()
   cols_len_smaller_2 <- c()
   for (col in common_cols){
      if (len(df_control[[col]]) < 2){
         cols_len_smaller_2 <- c(cols_len_smaller_2, col)
      }
   }
   
   # Get columns and calculate normalized percentile (w.r.t control data) 
   selected_cols <- setdiff(common_cols, cols_len_smaller_2)
   for (col_name in selected_cols){
      df_data[[col_name]] <- unlist(lapply(df_data[[col_name]], percentile_norm, df_control[[col_name]]))
   }
   
   # columns exist in df_data ONLY or existed in both but it is cols_len_smaller_2 
   # -> Calculate percentile by itself
   cols_in_data_only <- setdiff(names(df_data), names(df_control))
   cols_cal_self_percentile <- c(cols_in_data_only, cols_len_smaller_2)    
   for (col_name in cols_cal_self_percentile){
      df_data[[col_name]] <- percentile_JCSMR(df_data[[col_name]])
   }
   
   return (list(df=df_data, cols_cal_self_percentile=cols_cal_self_percentile))  
}


# normalize percentile of a df by itself
self_normalize_percentile <- function(df_data, col_not_cal_per="studyCode"){
   # Input: 
   #  df_data: A dataframe of treatment
   #  col_not_cal_per: A colunm name to be ignore when calculating percentile
   # Output:
   #  data: a R list (a Python dict)
   #  Access to: 
   #     df_data: data$df
   #     columns (calculate percentile by itself): data$cols_cal_self_percentile
   # Usage:
   #  data <- normalize_percentile(df_control, df_data, col_not_cal_per)
   
   cols <- names(df_data)
   # Remove not related col 
   cols <- cols[cols != col_not_cal_per]
   
   for (col_name in cols){
      df_data[[col_name]] <- percentile_JCSMR(df_data[[col_name]])
   }
   
   return (list(df=df_data, cols_cal_self_percentile=cols))
}


get_heatmap_data <- function(df_control, df_data){
   # Input:
   #   df_control: dataframe of control group
   #   df_data: dataframe of treatment group
   # Ouput:
   #   data: a list with "df" and "cols_cal_self_percentile" keys
   # Usage example:
   # data <- get_heatmap_data(
   #           df_control = search_individuals(db, vec = c('HBD056', 'HBD044', 'HBD039')),
   #           df_data = search_individuals(db, vec = c('CPI515', 'CPI545')))
   # df <- data$df
   # cols_cal_self_percentile <- data$cols_cal_self_percentile
   
   # Control data  
   study_codes_control <- unique(df_control$individualCode)
   df_control <- concat_pivot_df_name(study_codes_control, df_control)
   ## df_control <- concat_agg_mean_pivot_df_name(study_codes_control, df_control)
   
   # Treatment data
   study_codes_data <- unique(df_data$individualCode)      
   df_data <- concat_agg_mean_pivot_df_name(study_codes_data, df_data)
   
   # Percentile
   data <- normalize_percentile(df_control, df_data, col_not_cal_per="studyCode")
   df <- transpose(data$df, keep.names="marker", make.names="studyCode", fill=NA, ignore.empty=FALSE)
   return (list(df = df, cols_cal_self_percentile = data$cols_cal_self_percentile))   
}


get_self_heatmap_data <- function(df_data){
   # Input:
   #   df_data: dataframe of treatment group
   # Ouput:
   #   data: a list with "df" and "cols_cal_self_percentile" keys
   # Usage example:
   #   data <- get_self_heatmap_data(                          
   #              df_data = search_individuals(db, vec = c('CPI515', 'CPI545')))
   #   df <- data$df
   #   cols_cal_self_percentile <- data$cols_cal_self_percentile
   
   study_codes_data <- unique(df_data$individualCode)      
   df_data <- concat_agg_mean_pivot_df_name(study_codes_data, df_data)
   
   # Percentile
   data <- self_normalize_percentile(df_data, col_not_cal_per="studyCode")
   df <- transpose(data$df, keep.names="marker", make.names="studyCode", fill=NA, ignore.empty=FALSE)
   return (list(df = df, cols_cal_self_percentile = data$cols_cal_self_percentile))   
}


get_heatmap_data_by_vecs <- function(db, vec_control, vec_data){
   # This function is very similar to get_heatmap_data() -> keep it for reference 
   # Input:
   #   db: MongoDB object
   #   vec_control: array of study codes in control group
   #   vec_data: array of study codes in data group
   # Ouput:
   #   data: a list with "df" and "cols_cal_self_percentile" keys
   # Usage example:
   # data <- get_heatmap_data_by_vecs(db, 
   #                          vec_control = c('HBD056', 'HBD044', 'HBD039', 'HBD040'), 
   #                          vec_data = c('CPI515', 'CPI545')) 
   # df <- data$df
   # cols_cal_self_percentile <- data$cols_cal_self_percentile
   
   # Control data  
   df_control <- search_individuals(db, vec_control)
   study_codes_control <- unique(df_control$individualCode)
   df_control <- concat_pivot_df_name(study_codes_control, df_control)
   ## df_control <- concat_agg_mean_pivot_df_name(study_codes_control, df_control)
   
   # Real data
   df_data <- search_individuals(db, vec_data)
   study_codes_data <- unique(df_data$individualCode)      
   df_data <- concat_agg_mean_pivot_df_name(study_codes_data, df_data)
   
   # Percentile
   data <- normalize_percentile(df_control, df_data, col_not_cal_per="studyCode")
   df <- transpose(data$df, keep.names="marker", make.names="studyCode", fill=NA, ignore.empty=FALSE)
   return (list(df = df, cols_cal_self_percentile = data$cols_cal_self_percentile))   
}


# PART 3: Co-efficients of variation (CV) and Ranking
#-------------------------------------------------------------------------------
calculate_CV <- function(df){
   # Input:
   #    df: a dataframe has a format as below example
   #                 name	                  value	 studyCode
   #       CD3 T cells (% Lymphocytes/live)	36.70	 GEM177
   #       CD4+ T cells (%Lymphocytes/live)	9.38	 GEM177
   #       Activated CD4+ T cells(% CD4)	   6.03	 GEM177
   # Output:
   #   new dataframe has a format as below example
   #                 names	                  CVs
   #       low density neutrophils (%APC)	   247.5
   #       TEMRA (% CD4)	                  167.0
   #       CD4-TEMRA (%CD4)	               139.3
   #       NK-4 (%LC)	                     110.9
   #       R5 Th1-17 (%CD4)	               103.3
   
   names <- c()
   CVs <- c()
   markers <- unique(df[["name"]])
   
   for (marker in markers){
      df_one_maker <- df[df$name == marker, ]
      CV <- sd(df_one_maker[["value"]], na.rm=TRUE) / mean(df_one_maker[["value"]], na.rm=TRUE) * 100 # percentage
      
      # Update name and CV
      names <- c(names, unique(df_one_maker[["name"]]))
      CVs <- c(CVs, CV)
   }
   return (data.frame(names, CVs))
}


get_df_CV <- function(df, sort_desc=TRUE){
   # Input:
   #   df: a dataframe getting from MongoDB
   #     Eg. df <- search_individuals(db, vec=c('CPI018', 'CPI043', 'CPI063'))
   #   sort_desc: if TRUE sort dataframe in descending order else keep as usual
   # Output:
   #   df_CV: a dataframe has a format as below example
   #                 names	                    CVs
   #       low density neutrophils (%APC)	  315.5030
   #       Activated CD8+ T cells (% CD8)	  218.1074
   #       PBs (%B)	                       162.2459
   #       Tfh effector (%CD4)	           154.0654
   #       TEMRA (% CD4)	                 152.6803
   # Usage:
   # df_CV <- get_df_CV(df = search_individuals(db, vec=c('CPI018', 'CPI043', 'CPI063')))
   
   if (dim(df)[1] == 0){ # Empty dataframe
      return (empty_df(columns = c("names", "CVs")))
   }
   
   df <- concat_df(study_codes = unique(df$individualCode),
                   df = df)
   
   df_CV <- calculate_CV(df)
   
   if (sort_desc == TRUE){
      df_CV <- df_CV[order(- df_CV$CVs), ]
   }
   return (df_CV)
}


rank_CV_Disease_Control <- function(df_control, df_data, sort_desc=TRUE){
   # Input:
   #   df_control: dataframe of control group
   #   df_data: dataframe of treatment (disease) group
   # Ouput:
   #   a dataframe of Disease (treatment) and Control CV
   # Usage example:
   # df_CV_rank <- rank_CV_Disease_Control(
   #           df_control = search_individuals(db, vec = c('HBD056', 'HBD044', 'HBD039')),
   #           df_data = search_individuals(db, vec = c('CPI515', 'CPI545')))
   # Example of df_CV_rank is as below:
   #  Rank     marker 	            Control_sample_CV	 Disease_CV	 Disease_over_Control
   #   1	  Trans-a (%B)	            26.42399	          137.1557469	5.190576016
   #   2	  MZ B (%Lymphocytes/live)	22.06382	          110.7516645	5.019604389
   #   3	  MZ B (%B)	               33.91442	          115.0823818	3.393316914
   
   # get df_CV for control and data (treatment)
   df_control_CV <- get_df_CV(df = df_control)
   df_data_CV <- get_df_CV(df = df_data)
   
   # Get value > 0  (in this case: remove 0 and NA)
   df_control_CV <- df_control_CV %>% filter(CVs > 0)
   df_data_CV <- df_data_CV %>% filter(CVs > 0)
   
   # Change column name: names -> marker
   df_control_CV <- df_control_CV %>% rename(marker = names,
                                             Control_sample_CV = CVs)
   df_data_CV <- df_data_CV %>% rename(marker = names,
                                       Disease_CV = CVs)
   
   # Merge: inner join
   df_CV <- merge(x = df_control_CV, 
                  y = df_data_CV,
                  by = "marker")
   
   # Calculate column: Disease/Control
   df_CV <- transform(df_CV, Disease_over_Control = Disease_CV/Control_sample_CV)
   
   # Sorted desc by Disease_over_Control
   if (sort_desc == TRUE){
      df_CV <- df_CV[order(-df_CV$Disease_over_Control), ]
   }
   
   # Rank df_CV[Disease_over_Control] in descending order
   df_CV <- cbind(Rank = rank(-df_CV$Disease_over_Control), df_CV)
   
   return (df_CV)
}


filter_rank_top_n <- function(df_control, df_data, top_n = 10, set_index=TRUE){
   # To filter heatmap data corresponding to the top n ranking of CV Disease/Control
   # Input:
   #   df_control: dataframe of control group
   #   df_data: dataframe of treatment (disease) group
   #   top_n: top n of ranking. Default top ten: top_n = 10
   #   set_index: if TRUE, set marker column as index
   # Ouput: 
   #   df_heatmap: heatmap data of the top n ranking of CV Disease/Control
   # Usage example:
   # df_heatmap_top_n <- filter_rank_top_n(
   #           df_control = search_individuals(db, vec = c('HBD056', 'HBD044', 'HBD039')),
   #           df_data = search_individuals(db, vec = c('CPI515', 'CPI545', 'CPI255')),
   #           top_n = 10)
   # Example of df_heatmap_top_n is as below:
   #   marker	      CPI255	   CPI515	 CPI545
   #  Bm (%B)	      0.00	      0.667	    0.00
   #  IgA smB (%B)	0.00	      0.444	    0.00
   
   # Get CV Disease/Control
   df_CV_rank <- rank_CV_Disease_Control(df_control, df_data, sort_desc=TRUE)
   
   # Get top n marker
   marker_top_n <- df_CV_rank[1:top_n, ][["marker"]]
   
   # Get heatmap data
   data <- get_heatmap_data(df_control, df_data)
   df_heatmap = data$df
   
   # Get top n for heatmap
   df_heatmap_top_n <- df_heatmap %>% filter(marker %in% marker_top_n)
   
   # Set 'marker' column as index
   if (set_index == TRUE){
      row.names(df_heatmap_top_n) <- df_heatmap_top_n$marker
      df_heatmap_top_n <- subset(df_heatmap_top_n, select = -c(marker))
   }
   
   return (df_heatmap_top_n)
}

## utils.R

In [4]:
# Customization R functions
not_all_na <- function(x) any(!is.na(x))


parseInput <- function(string){
  # split new line
  str <- strsplit(string, '\\s')
  # remove empty string and NA
  str <- stri_remove_empty(str[[1]], TRUE)
  # remove , or . or 'NA'
  str <- str[! str %in% c(',', '.', 'NA')]
  # remove trailing ,
  str <- gsub(',$', "", str)
  # remove leading and trailing ' or " of each word in string
  str <- gsub("^\\'|\\'$", "", str)
  str <- gsub('^\\"|\\"$', "", str)
  
  return (str)
}


chunk <- function(x, n){split(x, sort(rep_len(1:n, length(x))))}


empty_df <- function(columns){
  # Input:
  #   columns: a vector of column names
  # Output:
  # An empty dataframe
  # Usage:
  #   Eg1. Empty datafram with column names
  #        df_empty_1 <- empty_df(columns= c("id", "names", "address")) 
  #   Eg2. Empty datafram without any column. Same as df_empty_2 <- data.frame()
  #        df_empty_2 <- empty_df(columns= c()) 
  
  # pass this columns length to ncol parameter and nrow with 0
  df_empty = data.frame(matrix(nrow = 0, ncol = length(columns)))
  # assign column names
  colnames(df_empty) = columns
  return (df_empty)
}


create_str_dQuotes <- function(vec){
  # Input:
  #   vec: a vector 
  # Output:
  #   a string with double quote for every element of vec
  # Usage:
  #   vec_str <- create_str_dQuotes(vec = c('AA', 'BB', 99, 'CC'))
  #   Output: ' "AA", "BB", "99", "CC" '     
  vec_str <- sapply(strsplit(paste(vec, collapse = ","), ","), function(x) toString(dQuote(x)))
  return (vec_str)
} 

### Connect to MongoDB

In [5]:
# Template: mongodb://[username:password@]host1[:port1][,host2[:port2],...[/[database][?options]]
# Eg. m <- mongo("mtcars", url = "mongodb://a_user_name:a_password@mongo.org:2021/test")
# Ref: https://jeroen.github.io/mongolite/connecting-to-mongodb.html
# Eg. Simple way
# db <- mongo(collection = "markers", 
#             db = "facs",
#             url = "mongodb://localhost:27017")

# Envirovment file (.env) example
# MONGODB_HOST="127.0.0.1"  
# MONGODB_PORT=27017
# MONGODB_USER="an username or an empty string"
# MONGODB_PASSWORD="a password or an empty string"
# MONGODB_DB_NAME='facs'
# MONGODB_COLLECTION_NAME='markers'

# MongoDB instance
readRenviron(".env")  #  read Environment file

mongo_host <- Sys.getenv("MONGODB_HOST", "127.0.0.1")
mongo_port <- Sys.getenv("MONGODB_PORT", 27017)
mongo_db <- Sys.getenv("MONGODB_DB_NAME", "facs")
mongo_collection <- Sys.getenv("MONGODB_COLLECTION_NAME", "markers")
mongo_user <- Sys.getenv("MONGODB_USER", "")
mongo_password <- Sys.getenv("MONGODB_PASSWORD", "")

if (!stri_isempty(mongo_user) & !stri_isempty(mongo_password)){
   db <- mongo(url = paste("mongodb://", 
                           mongo_user, ":", mongo_password, "@", 
                           mongo_host, ":", toString(mongo_port), sep = ""),
               db = mongo_db,
               collection = mongo_collection)
} else {
   db <- mongo(url = paste("mongodb://", 
                           mongo_host, ":", toString(mongo_port), sep = ""),
               db = mongo_db,
               collection = mongo_collection)
}
db

Registered S3 method overwritten by 'openssl':
  method      from
  print.bytes Rcpp


<Mongo collection> 'markers' 
 $aggregate(pipeline = "{}", options = "{\"allowDiskUse\":true}", handler = NULL, pagesize = 1000, iterate = FALSE) 
 $count(query = "{}") 
 $disconnect(gc = TRUE) 
 $distinct(key, query = "{}") 
 $drop() 
 $export(con = stdout(), bson = FALSE, query = "{}", fields = "{}", sort = "{\"_id\":1}") 
 $find(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0, handler = NULL, pagesize = 1000) 
 $import(con, bson = FALSE) 
 $index(add = NULL, remove = NULL) 
 $info() 
 $insert(data, pagesize = 1000, stop_on_error = TRUE, ...) 
 $iterate(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0) 
 $mapreduce(map, reduce, query = "{}", sort = "{}", limit = 0, out = NULL, scope = NULL) 
 $remove(query, just_one = FALSE) 
 $rename(name, db = NULL) 
 $replace(query, update = "{}", upsert = FALSE) 
 $run(command = "{\"ping\": 1}", simplify = TRUE) 
 $update(query, update = "{\"$set\":{}}", filters = NULL, upsert = FALSE, multiple = FALSE

# II. Test modules 

### search_individuals()

In [6]:
df <- search_individuals(db, vec=c('CPI555', 'CPI515'))
dim(df)
df

individualCode,runId,type,folder,Samples
CPI555,CPI_20180808,FACS,/jcsmr/CPI/Fulcher/FACS files/CPI_20180808,"PBMCs_B cell CPI555 19Jul18_051·fcs , PBMCs_T cell CPI555 19Jul18_021·fcs , PBMCs_APCCPI555 19Jul18_066·fcs , PBMCs_Th cell CPI555 19Jul18_036·fcs, 2018-08-08 , 2018-08-08 , 2018-08-08 , 2018-08-08 , AylaL , AylaL , AylaL , AylaL , LSRII , LSRII , LSRII , LSRII , 1 , 1 , 1 , 1 , CPI , CPI , CPI , CPI , BD FACSDiva Software Version 8.0.1 , BD FACSDiva Software Version 8.0.1 , BD FACSDiva Software Version 8.0.1 , BD FACSDiva Software Version 8.0.1 , B cells (%Lymphocytes/live) , Anergic B (%B) , Bm (%B) , Bm (%Lymphocytes/live) , MZ B (%B) , MZ B (%Lymphocytes/live) , smB (%B) , smB (%Lymphocytes/live) , IgA smB (%B) , IgG smB (%B) , PBs (%B) , Trans-a (%B) , Trans-b (%B) , Trans-c (%B) , Trans-d (%B) , 10.3 , 4.31 , 15.2 , 1.57 , 9.56 , 0.99 , 3.29 , 0.34 , 1.47 , 1.52 , 0.19 , 7.94 , 6.41 , 11.2 , 11.1 , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , CD3 T cells (% Lymphocytes/live) , CD4+ T cells (%Lymphocytes/live) , Activated CD4+ T cells(% CD4) , Naive (%CD4) , TCM (%CD4) , TEM (% CD4) , TEMRA (% CD4) , CD8+ T cells (% of Lymphocytes/live), Activated CD8+ T cells (% CD8) , Naïve (% CD8) , TCM (% CD8) , TEM (% CD8) , TEMRA (% CD8) , 80.1 , 41.5 , 3.15 , 56.8 , 28.1 , 13.1 , 2.1 , 33.4 , 5.01 , 26.1 , 10.5 , 20.6 , 42.8 , , , , , , , , , , , , , , , , , , , , , , , , , , , NK (%LC) , NK- 1 (%LC) , NK-2 (%LC) , NK-3 (%LC) , NK-4 (%LC) , Classical Monocytes(%APC) , mDCs(%APC) , CD16+ mDCs (%APC) , CD16neg mDCs (%APC) , pDCs (%APC) , low density neutrophils (%APC) , non-classical monocytes (%APC) , 6.352 , 0.24 , 4.17 , 1.88 , 0.062 , 63.3 , 12.7 , 9.52 , 3.13 , 1.41 , 0.045 , 7.05 , , , , , , , , , , , , , , , , , , , , , , , , , Exhausted (%CD4) , R5 Th1 (%CD4) , R5 Th1-17 (%CD4) , R5 Th2 (%CD4) , R5 Th17 (%CD4) , Tfh effector (%CD4) , Tfh effector (%CXCR5) , Tfh memory (CD4%) , Tfh memory (%CXCR5) , Th1 (%CD4) , Th1-17 (CD4) , Th2 (CD4) , Th17 (%CD4) , Treg (%CD4) , Tfh (%CD4) , 2.06 , 3.6 , 1.19 , 0.56 , 1.47 , 1.35 , 19.7 , 3.18 , 46.6 , 15.8 , 5.9 , 2.14 , 1.97 , 8.51 , 6.83 , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,"
CPI515,CPI_20181031,FACS,/jcsmr/CPI/Fulcher/FACS files/CPI_20181031,"PBMCs_APC CPI515 04Jul18_109·fcs , PBMCs_Th cell CPI515 04Jul18_049·fcs, PBMCs_B cell CPI515 04Jul18_079·fcs , PBMCs_T cell CPI515 04Jul18_019·fcs , 2018-11-01 , 2018-11-01 , 2018-11-01 , 2018-11-01 , AylaL , AylaL , AylaL , AylaL , LSRII , LSRII , LSRII , LSRII , 1 , 1 , 1 , 1 , CPI , CPI , CPI , CPI , BD FACSDiva Software Version 8.0.1 , BD FACSDiva Software Version 8.0.1 , BD FACSDiva Software Version 8.0.1 , BD FACSDiva Software Version 8.0.1 , NK (%LC) , NK- 1 (%LC) , NK-2 (%LC) , NK-3 (%LC) , NK-4 (%LC) , Classical Monocytes(%APC) , mDCs(%APC) , CD16+ mDCs (%APC) , CD16neg mDCs (%APC) , pDCs (%APC) , low density neutrophils (%APC) , non-classical monocytes (%APC) , 22.13 , 0.75 , 16.1 , 1.44 , 3.84 , 62.7 , 19.7 , 11.8 , 7.88 , 5.03 , 0.23 , 1.39 , , , , , , , , , , , , , , , , , , , , , , , , , Exhausted (%CD4) , R5 Th1 (%CD4) , R5 Th1-17 (%CD4) , R5 Th2 (%CD4) , R5 Th17 (%CD4) , Tfh effector (%CD4) , Tfh effector (%CXCR5) , Tfh memory (CD4%) , Tfh memory (%CXCR5) , Th1 (%CD4) , Th1-17 (CD4) , Th2 (CD4) , Th17 (%CD4) , Treg (%CD4) , Tfh (%CD4) , 0.28 , 1.83 , 0.48 , 2.47 , 1.98 , 0.13 , 1.95 , 5.82 , 86.1 , 3.05 , 4 , 6.49 , 3.65 , 7.05 , 6.76 , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , B cells (%Lymphocytes/live) , Anergic B (%B) , Bm (%B) , Bm (%Lymphocytes/live) , MZ B (%B) , MZ B (%Lymphocytes/live) , smB (%B) , smB (%Lymphocytes/live) , IgA smB (%B) , IgG smB (%B) , PBs (%B) , Trans-a (%B) , Trans-b (%B) , Trans-c (%B) , Trans-d (%B) , 9.91 , 13.9 , 24.8 , 2.45 , 14.9 , 1.48 , 8.37 , 0.83 , 4.7 , 3.58 , 0.26 , 0.13 , 0.12 , 0.19 , 0.49 , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , CD3 T cells (% Lymphocytes/live) , CD4+ T cells (%Lymphocytes/live) , Activated CD4+ T cells(% CD4) , Naive (%CD4) , TCM (%CD4) , TEM (% CD4) , TEMRA (% CD4) , CD8+ T cells (% of Lymphocytes/live), Activated CD8+ T cells (% CD8) , Naïve (% CD8) , TCM (% CD8) , TEM (% CD8) , TEMRA (% CD8) , 67.8 , 33.3 , 0.37 , 73.8 , 21.8 , 3.51 , 0.87 , 27.9 , 0.31 , 46.4 , 4.25 , 24.4 , 25 , , , , , , , , , , , , , , , , , , , , , , , , , ,"


### search_individuals_nin

In [7]:
df_nin <- search_individuals_nin(db, vec=c('CPI555', 'CPI515'))
dim(df_nin)

### search_all_control()

In [8]:
df_control <- search_all_control(db, query='{"individualCode": {"$regex" : "^HBD|^APOC", "$options" : "i"}}')
dim(df_control)

### search_control_via_runId()

In [9]:
df_control_via_runId <- search_control_via_runId(db, vec_runIds = c('CPI_20181031','CPI_20201104'))
dim(df_control_via_runId)

In [10]:
df <- search_individuals(db, vec=c('CPI555', 'CPI515'))
df_control_via_runId <- search_control_via_runId(db, vec_runIds = df$runId)
df$runId
dim(df_control_via_runId)

In [11]:
df_treatment_init <- search_individuals(db, 
                      vec = c('CPI203', 'CPI204', 'CPI236', 'CPI237', 'CPI238', 
                              'CPI248', 'CPI249', 'CPI255', 'CPI270', 'CPI280', 
                              'CPI282', 'CPI302', 'CPI317', 'CPI318', 'CPI464'))
print('dim of df_treatment_init')
dim(df_treatment_init)

print('vec_runIds')
vec_runIds = df_treatment_init$runId
print(vec_runIds)

df_control_init <- search_control_via_runId(db, vec_runIds = vec_runIds)
print('dim of df_control_init')
dim(df_control_init)

[1] "dim of df_treatment_init"


[1] "vec_runIds"
 [1] "CPI_20180919" "CPI_20181031" "CPI_20181031" "CPI_20181031" "CPI_20181031"
 [6] "CPI_20181031" "CPI_20181031" "CPI_20181031" "CPI_20181128" "CPI_20181205"
[11] "CPI_20181128" "CPI_20181205" "CPI_20181205" "CPI_20181128" "CPI_20181128"
[16] "CPI_20181205" "CPI_20181128" "CPI_20181205" "CPI_20181205" "CPI_20181128"
[21] "CPI_20181128" "CPI_20181205"
[1] "dim of df_control_init"


### transform_df(): using for one study code only

In [12]:
df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))
df_one_study_code <- df %>% filter(individualCode == 'CPI515')

df_markers <- transform_df(df_one_study_code)
dim(df_markers)
head(df_markers, 4)
tail(df_markers, 4)

name,value,studyCode
NK (%LC),22.13,CPI515
NK- 1 (%LC),0.75,CPI515
NK-2 (%LC),16.1,CPI515
NK-3 (%LC),1.44,CPI515


name,value,studyCode
Naïve (% CD8),46.4,CPI515
TCM (% CD8),4.25,CPI515
TEM (% CD8),24.4,CPI515
TEMRA (% CD8),25.0,CPI515


In [13]:
df_one_study_code <- df %>% filter(individualCode == 'APO014')

df_markers <- transform_df(df_one_study_code)
dim(df_markers)
head(df_markers, 2)
tail(df_markers, 2)

name,value,studyCode
singlets/Single Cells/live/Lymphocytes/CD3+ | Freq· of Lymphocytes,53.4,APO014
singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell | Freq· of Lymphocytes,31.6,APO014


name,value,studyCode
Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th17 | Freq· of CD4+ T cells,14.6,APO014
Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/Treg | Freq· of CD4+ T cells,3.25,APO014


### filter_transform_df()

In [14]:
df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))

df_filter <- filter_transform_df(study_code = 'CPI515', df = df)
dim(df_filter)
head(df_filter, 2)
tail(df_filter, 2)

name,value,studyCode
NK (%LC),22.13,CPI515
NK- 1 (%LC),0.75,CPI515


name,value,studyCode
TEM (% CD8),24.4,CPI515
TEMRA (% CD8),25.0,CPI515


### concat_df()

In [15]:
df_control <- search_all_control(db)
dim(df_control)
study_codes_control <- unique(df_control$individualCode) # Get study code from df_control

df_control_concat <- concat_df(study_codes_control, df_control)
dim(df_control_concat)
write.csv(df_control_concat, "df_control_concat.csv")

In [16]:
df <- search_individuals(db, vec=c('CPI203', 'CPI248', 'CPI515'))
study_codes <- c('CPI248', 'CPI515') # Select an array of study code
df_concat <- concat_df(study_codes, df)
dim(df_concat)
head(df_concat, 3)
tail(df_concat, 3)
write.csv(df_concat, "df_concat_example.csv")

name,value,studyCode
B cells (%Lymphocytes/live),8.95,CPI248
Anergic B (%B),3.22,CPI248
Bm (%B),5.28,CPI248


name,value,studyCode
TCM (% CD8),4.25,CPI515
TEM (% CD8),24.4,CPI515
TEMRA (% CD8),25.0,CPI515


### concat_agg_mean_df

In [17]:
df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))
study_codes <- c('APO014', 'CPI248', 'CPI515')

df_agg <- concat_agg_mean_df(study_codes, df)

dim(df_agg)
head(df_agg, 2)
tail(df_agg, 2)

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


studyCode,name,value
APO014,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 1 | Freq· of LC,0.33
APO014,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 2 | Freq· of LC,13.8


studyCode,name,value
CPI515,Trans-d (%B),0.49
CPI515,Treg (%CD4),7.05


In [18]:
df <- search_individuals(db, vec=c('HBD001'))
study_codes <- c('HBD001')

df_agg <- concat_agg_mean_df(study_codes, df)

dim(df_agg)
head(df_agg, 2)
tail(df_agg, 2)
write.csv(df_agg, "df_concat_agg_mean_HBD001.csv")

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


studyCode,name,value
HBD001,Activated CD4+ T cells(% CD4),0.8175
HBD001,Activated CD8+ T cells (% CD8),0.5425


studyCode,name,value
HBD001,Trans-d (%B),5.3875
HBD001,Treg (%CD4),5.7175


### concat_pivot_df_DEPRECATED()

#### Example 1

In [19]:
options(repr.matrix.max.cols=150, repr.matrix.max.rows=1000)

In [20]:
study_codes <- c('APO014', 'CPI203', 'CPI248', 'CPI515')
length(study_codes)

df <- search_individuals(db, vec = study_codes)

df_pivot <- concat_pivot_df_DEPRECATED(study_codes, df)
# write.csv(df_pivot, 'df_concat_pivot_treatment.csv')
dim(df_pivot)
head(df_pivot, 2)
tail(df_pivot, 2)

name,APO014,CPI203,CPI248,CPI515
singlets/Single Cells/live/Lymphocytes/CD3+ | Freq· of Lymphocytes,53.4,,,
singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell | Freq· of Lymphocytes,31.6,,,


name,APO014,CPI203,CPI248,CPI515
Th2 (CD4),,,49.6,6.49
Th17 (%CD4),,,8.29,3.65


#### Example 2: one study code and one marker may have several values (measurements)

In [21]:
df_control = search_individuals(db, vec = c('HBD056', 'HBD044', 'HBD039', 'HBD040'))
df_control_pivot <- concat_pivot_df_DEPRECATED(
    study_codes = df_control$individualCode, 
    df = df_control
)
dim(df_control_pivot)
head(df_control_pivot, 2)
tail(df_control_pivot, 2)

"Values are not uniquely identified; output will contain list-cols.
* Use `values_fn = length` to identify where the duplicates arise
* Use `values_fn = {summary_fun}` to summarise duplicates"

name,HBD056,HBD044,HBD039,HBD040
Exhausted (%CD4),"0.80, 0.87, 0.54, 0.64","0.39, 0.53, 0.39, 0.53, 0.39, 0.53","0.89, 1.09, 0.89, 1.09, 0.89, 1.09, 0.89, 1.09",0.98
R5 Th1 (%CD4),"4.23, 3.18, 2.46, 0.37","2.57, 2.57, 2.57","2.18, 1.89, 2.18, 1.89, 2.18, 1.89, 2.18, 1.89",4.26


name,HBD056,HBD044,HBD039,HBD040
Th1-17 (%CD4),,,"8.96, 8.96, 8.96, 8.96",
Th2 (%CD4),,,"8.78, 8.78, 8.78, 8.78",


### concat_pivot_df_name()

In [22]:
df_control <- search_all_control(db)
dim(df_control)
study_codes_control <- unique(df_control$individualCode) # Get study code from df_control

df_control_concat_pivot <- concat_pivot_df_name(study_codes_control, df_control)
dim(df_control_concat_pivot)
head(df_control_concat_pivot, 3)
write.csv(df_control_concat_pivot, "df_control_concat_pivot.csv")

id,studyCode,NK (%LC),NK- 1 (%LC),NK-2 (%LC),NK-3 (%LC),NK-4 (%LC),Classical Monocytes(%APC),mDCs(%APC),CD16+ mDCs (%APC),CD16neg mDCs (%APC),pDCs (%APC),low density neutrophils (%APC),non-classical monocytes (%APC),Exhausted (%CD4),R5 Th1 (%CD4),R5 Th1-17 (%CD4),R5 Th2 (%CD4),R5 Th17 (%CD4),Tfh effector (%CD4),Tfh effector (%CXCR5),Tfh memory (CD4%),Tfh memory (%CXCR5),Th1 (%CD4),Th1-17 (CD4),Th2 (CD4),Th17 (%CD4),Treg (%CD4),Tfh (%CD4),B cells (%Lymphocytes/live),Anergic B (%B),Bm (%B),Bm (%Lymphocytes/live),MZ B (%B),MZ B (%Lymphocytes/live),smB (%B),smB (%Lymphocytes/live),IgA smB (%B),IgG smB (%B),PBs (%B),Trans-a (%B),Trans-b (%B),Trans-c (%B),Trans-d (%B),CD3 T cells (% Lymphocytes/live),CD4+ T cells (%Lymphocytes/live),Activated CD4+ T cells(% CD4),Naive (%CD4),TCM (%CD4),TEM (% CD4),TEMRA (% CD4),CD8+ T cells (% of Lymphocytes/live),Activated CD8+ T cells (% CD8),Naïve (% CD8),TCM (% CD8),TEM (% CD8),TEMRA (% CD8),CD19 (%LC),B-CD21lo (%B),B-mem (%B),B-mem (%LC),B-MZ (%B),B-MZ (%LC),B-SM (%B),B-SM (%LC),B-SM-IgA (%B),B-SM-IgG (%B),B-trans-b (%B),Mono-class (%APC),mDCs (%APC),mDC-CD16+ (%APC),mDC-CD16- (%APC),Mono-non-class (%APC),CD3 (%LCs),CD4 (%LCs),...,CD4-Naive (%CD4),CD4-TCM (%CD4),CD4-TEM (%CD4),CD4-TEMRA (%CD4),CD8 (%LC),CD8-Act (%CD8),CD8-Naive (%CD8),CD8-TCM (%CD8),CD8-TEM (%CD8),CD8-TEMRA (%CD8),CD4-Exh (%CD4),R5-Th1 (%CD4),R5-Th1/17 (%CD4),R5-Th2 (%CD4),R5-Th17 (%CD4),cTfh (%CD4),Tfh-eff (%CD4),Tfh-eff (%CXCR5),Tfh-mem (%CD4),Tfh-mem (%CXCR5),Th1-17 (%CD4),Th2 (%CD4),Single Cells/Single Cells/live/Lymphocytes/CD19+ | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Anergic | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/MZ | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/MZ | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB/IgA+ smB | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB/IgG+ smB | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/PBs | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-a | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-b | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-c | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-d | Freq․ of CD19+,singlets/Single Cells/live/Lymphocytes/CD3+ | Freq․ of Lymphocytes,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell | Freq․ of Lymphocytes,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/Activated CD4+ T cells | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/Naive | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/TCM | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/TEM | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/TEMRA | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells | Freq․ of Lymphocytes,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/Activated CD8+ T cell | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/Naive | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/TCM | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/TEM | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/TEMRA | Freq․ of CD8+ T cells,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 1 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 2 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/Nk 3 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 4 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/Classical monocytes | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/mDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/mDC/CD16+ mDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/mDC/CD16- mDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/pDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/LDN | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/non-classical monocytes | Freq․ of APC,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/Exhausted | Freq․ of Parent,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th1 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th1-17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th2 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh eff | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh eff | Freq․ of R5+,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh mem | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh mem | Freq․ of R5+,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th1 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th1-17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th2 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/Treg | Freq․ of CD4+ T cells
1,HBD001,17.92,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,HBD001,,1.35,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,HBD001,,,13.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### concat_agg_mean_pivot_df_studyCode()

In [23]:
df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))
study_codes <- c('APO014', 'CPI248', 'CPI515')

df_pivot <- concat_agg_mean_pivot_df_studyCode(study_codes, df)

dim(df_pivot)
head(df_pivot, 2)
tail(df_pivot, 2)

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


name,APO014,CPI248,CPI515
Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 1 | Freq· of LC,0.33,,
Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 2 | Freq· of LC,13.8,,


name,APO014,CPI248,CPI515
Trans-d (%B),,3.02,0.49
Treg (%CD4),,5.85,7.05


### concat_agg_mean_pivot_df_name()

In [24]:
df <- search_individuals(db, vec=c('APO014', 'CPI203', 'CPI248', 'CPI515'))
study_codes <- c('APO014', 'CPI248', 'CPI515')

df_pivot <- concat_agg_mean_pivot_df_name(study_codes, df)

dim(df_pivot)
head(df_pivot, 2)
tail(df_pivot, 2)

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


studyCode,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 1 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 2 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/Nk 3 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 4 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/Classical monocytes | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/mDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/mDC/CD16- mDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/mDC/CD16+ mDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/pDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/LDN | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/non-classical monocytes | Freq․ of APC,Single Cells/Single Cells/live/Lymphocytes/CD19+ | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Anergic | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/MZ | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/MZ | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB/IgA+ smB | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB/IgG+ smB | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/PBs | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-a | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-b | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-c | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-d | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/Exhausted | Freq․ of Parent,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th1-17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th1 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th2 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th1-17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th1 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th2 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh eff | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh eff | Freq․ of R5+,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh mem | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh mem | Freq․ of R5+,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/Treg | Freq․ of CD4+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+ | Freq․ of Lymphocytes,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell | Freq․ of Lymphocytes,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/Activated CD4+ T cells | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/Naive | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/TCM | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/TEM | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/TEMRA | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells | Freq․ of Lymphocytes,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/Activated CD8+ T cell | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/Naive | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/TCM | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/TEM | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/TEMRA | Freq․ of CD8+ T cells,Activated CD4+ T cells(% CD4),Activated CD8+ T cells (% CD8),Anergic B (%B),B cells (%Lymphocytes/live),Bm (%B),Bm (%Lymphocytes/live),CD16+ mDCs (%APC),CD16neg mDCs (%APC),CD3 T cells (% Lymphocytes/live),CD4+ T cells (%Lymphocytes/live),CD8+ T cells (% of Lymphocytes/live),Classical Monocytes(%APC),Exhausted (%CD4),IgA smB (%B),IgG smB (%B),low density neutrophils (%APC),mDCs(%APC),MZ B (%B),MZ B (%Lymphocytes/live),Naïve (% CD8),Naive (%CD4),NK- 1 (%LC),NK-2 (%LC),NK-3 (%LC),NK-4 (%LC),NK (%LC),non-classical monocytes (%APC),PBs (%B),pDCs (%APC),R5 Th1-17 (%CD4),R5 Th1 (%CD4),R5 Th17 (%CD4),R5 Th2 (%CD4),smB (%B),smB (%Lymphocytes/live),TCM (% CD8),TCM (%CD4),TEM (% CD4),TEM (% CD8),TEMRA (% CD4),TEMRA (% CD8),Tfh (%CD4),Tfh effector (%CD4),Tfh effector (%CXCR5),Tfh memory (%CXCR5),Tfh memory (CD4%),Th1-17 (CD4),Th1 (%CD4),Th17 (%CD4),Th2 (CD4),Trans-a (%B),Trans-b (%B),Trans-c (%B),Trans-d (%B),Treg (%CD4)
APO014,0.33,13.8,0.69,5.83,23.9,18.6,15.8,2.77,4.42,31.8,1.72,20.9,34.2,23.4,4.9,6.57,1.38,12.3,2.57,5.0,6.57,0.21,4.4,2.94,4.96,7.49,0.29,2.29,2.55,14.6,20.4,0.28,0.79,1.72,3.95,0.32,4.79,5.25,77.7,3.25,53.4,31.6,0.19,41.2,46.4,11.6,0.89,14.6,0.55,60.1,16.7,12.7,10.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
CPI248,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.51,6.73,3.22,8.95,5.28,0.47,11.1,3.65,44.2,23.5,18.2,66.1,3.42,1.38,1.74,0.38,14.8,1.04,0.093,5.78,14.3,1.46,36.9,2.67,1.78,42.81,6.44,1.18,2.01,0.97,2.74,0.89,1.52,3.2,0.29,9.59,37.5,42.3,37.5,5.88,47.1,6.11,0.43,7.08,62.0,3.79,2.52,6.78,8.29,49.6,2.0,1.78,1.75,3.02,5.85


studyCode,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 1 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 2 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/Nk 3 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/LC/non BT/CD15neg/NK 4 | Freq․ of LC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/Classical monocytes | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/mDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/mDC/CD16- mDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/mDC/CD16+ mDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/DC/pDC | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/LDN | Freq․ of APC,Single Cells/Single Cells/live/LC and Mono/non BT/CD56-/APC/non-classical monocytes | Freq․ of APC,Single Cells/Single Cells/live/Lymphocytes/CD19+ | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Anergic | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/MZ | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/MZ | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB | Freq․ of Lymphocytes,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB/IgA+ smB | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Bm/smB/IgG+ smB | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/PBs | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-a | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-b | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-c | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD19+/Trans-d | Freq․ of CD19+,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/Exhausted | Freq․ of Parent,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th1-17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th1 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5-/Th2 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th1-17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th1 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th17 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/R5 Th2 | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh eff | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh eff | Freq․ of R5+,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh mem | Freq․ of CD4+ T cells,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/non Treg/R5+/Tfh mem | Freq․ of R5+,Single Cells/Single Cells/live/Lymphocytes/CD4+ T cells/Treg | Freq․ of CD4+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+ | Freq․ of Lymphocytes,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell | Freq․ of Lymphocytes,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/Activated CD4+ T cells | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/Naive | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/TCM | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/TEM | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD4+ T cell/TEMRA | Freq․ of CD4+ T cell,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells | Freq․ of Lymphocytes,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/Activated CD8+ T cell | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/Naive | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/TCM | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/TEM | Freq․ of CD8+ T cells,singlets/Single Cells/live/Lymphocytes/CD3+/CD8+ T cells/TEMRA | Freq․ of CD8+ T cells,Activated CD4+ T cells(% CD4),Activated CD8+ T cells (% CD8),Anergic B (%B),B cells (%Lymphocytes/live),Bm (%B),Bm (%Lymphocytes/live),CD16+ mDCs (%APC),CD16neg mDCs (%APC),CD3 T cells (% Lymphocytes/live),CD4+ T cells (%Lymphocytes/live),CD8+ T cells (% of Lymphocytes/live),Classical Monocytes(%APC),Exhausted (%CD4),IgA smB (%B),IgG smB (%B),low density neutrophils (%APC),mDCs(%APC),MZ B (%B),MZ B (%Lymphocytes/live),Naïve (% CD8),Naive (%CD4),NK- 1 (%LC),NK-2 (%LC),NK-3 (%LC),NK-4 (%LC),NK (%LC),non-classical monocytes (%APC),PBs (%B),pDCs (%APC),R5 Th1-17 (%CD4),R5 Th1 (%CD4),R5 Th17 (%CD4),R5 Th2 (%CD4),smB (%B),smB (%Lymphocytes/live),TCM (% CD8),TCM (%CD4),TEM (% CD4),TEM (% CD8),TEMRA (% CD4),TEMRA (% CD8),Tfh (%CD4),Tfh effector (%CD4),Tfh effector (%CXCR5),Tfh memory (%CXCR5),Tfh memory (CD4%),Th1-17 (CD4),Th1 (%CD4),Th17 (%CD4),Th2 (CD4),Trans-a (%B),Trans-b (%B),Trans-c (%B),Trans-d (%B),Treg (%CD4)
CPI248,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.51,6.73,3.22,8.95,5.28,0.47,11.1,3.65,44.2,23.5,18.2,66.1,3.42,1.38,1.74,0.38,14.8,1.04,0.093,5.78,14.3,1.46,36.9,2.67,1.78,42.81,6.44,1.18,2.01,0.97,2.74,0.89,1.52,3.2,0.29,9.59,37.5,42.3,37.5,5.88,47.1,6.11,0.43,7.08,62.0,3.79,2.52,6.78,8.29,49.6,2.0,1.78,1.75,3.02,5.85
CPI515,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.37,0.31,13.9,9.91,24.8,2.45,11.8,7.88,67.8,33.3,27.9,62.7,0.28,4.7,3.58,0.23,19.7,14.9,1.48,46.4,73.8,0.75,16.1,1.44,3.84,22.13,1.39,0.26,5.03,0.48,1.83,1.98,2.47,8.37,0.83,4.25,21.8,3.51,24.4,0.87,25.0,6.76,0.13,1.95,86.1,5.82,4.0,3.05,3.65,6.49,0.13,0.12,0.19,0.49,7.05


### len()

In [25]:
# len <- function(x) { return (length(x[!is.na(x)])) } 
len(c(10, 20, NA, 99.99, NA))

```
Eg. df = pd.DataFrame({'Values': [119, np.nan, 80, 50, 120, 90, 119]}).sort_values('Values')
        Values	Rank	Rank_Percentile_Pandas	Rank_Percentile_Manual	Rank_Percentile_JCSMR
    0	50.0	1.0	            0.166667	            0.166667	        0.142857
    1	80.0	2.0	            0.333333	            0.333333	        0.285714
    2	90.0	3.0	            0.500000	            0.500000	        0.428571
    3	119.0	4.5	            0.750000	            0.750000	        0.642857
    4	119.0	4.5	            0.750000	            0.750000	        0.642857
    5	120.0	6.0	            1.000000	            1.000000	        0.857143
    6	NaN	    NaN	            NaN	                    NaN	                NaN
```

### percentile_JCSMR()

In [26]:
vec = c(50.0, 80.0, 90.0, 119.0, 119.0, 120, NA)

percentiles <- percentile_JCSMR(vec)
percentiles

### percentile()

In [27]:
vec = c(50.0, 80.0, 90.0, 119.0, 119.0, 120, NA)

percentiles <- percentile(vec)
percentiles

### percentile_norm()

In [28]:
vec = c(50.0, 80.0, 90.0, 119.0, 119.0, 120, NA)

per <- percentile_norm(value = 39, vec = vec, method='JCSMR')
per

In [29]:
per <- percentile_norm(value = 90, vec = vec, method='JCSMR')
per

In [30]:
per <- percentile_norm(value = 120, vec = vec, method='JCSMR')
per

In [31]:
per <- percentile_norm(value = 121, vec = vec, method='JCSMR')
per

### norm_perc_one_col(): Normalize percentile at one column only

In [45]:
col_name <- "Treg (%CD4)"

# control
df_control <- search_individuals(db, vec = c('HBD056', 'HBD044', 'HBD039', 'HBD040'))
study_codes_control <- unique(df_control$individualCode)
df_control <- concat_agg_mean_pivot_df_name(study_codes_control, df_control)

# treatment
df_data <- search_individuals(db, vec = c('CPI515', 'CPI545'))
   study_codes_data <- unique(df_data$individualCode)      
   df_data <- concat_agg_mean_pivot_df_name(study_codes_data, df_data)

print('At control')
df_control[, col_name]
print('At treatment: Before calling norm_perc_one_col')
df_data[, col_name]  # output is a dataframe
df_data[[col_name]]  # output is a vector

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


[1] "At control"


Treg (%CD4)
6.435
6.28
5.95
4.2975


[1] "At treatment: Before calling norm_perc_one_col"


Treg (%CD4)
7.05
7.43


In [46]:
df_data <- norm_perc_one_col(col_name, df_control, df_data)
print('At control')
df_control[, col_name]
print('At treatment: AFTER calling norm_perc_one_col')
df_data

[1] "At control"


Treg (%CD4)
6.435
6.28
5.95
4.2975


[1] "At treatment: AFTER calling norm_perc_one_col"


studyCode,Activated CD4+ T cells(% CD4),Activated CD8+ T cells (% CD8),Anergic B (%B),B cells (%Lymphocytes/live),Bm (%B),Bm (%Lymphocytes/live),CD16+ mDCs (%APC),CD16neg mDCs (%APC),CD3 T cells (% Lymphocytes/live),CD4+ T cells (%Lymphocytes/live),CD8+ T cells (% of Lymphocytes/live),Classical Monocytes(%APC),Exhausted (%CD4),IgA smB (%B),IgG smB (%B),low density neutrophils (%APC),mDCs(%APC),MZ B (%B),MZ B (%Lymphocytes/live),Naïve (% CD8),Naive (%CD4),NK- 1 (%LC),NK-2 (%LC),NK-3 (%LC),NK-4 (%LC),NK (%LC),non-classical monocytes (%APC),PBs (%B),pDCs (%APC),R5 Th1-17 (%CD4),R5 Th1 (%CD4),R5 Th17 (%CD4),R5 Th2 (%CD4),smB (%B),smB (%Lymphocytes/live),TCM (% CD8),TCM (%CD4),TEM (% CD4),TEM (% CD8),TEMRA (% CD4),TEMRA (% CD8),Tfh (%CD4),Tfh effector (%CD4),Tfh effector (%CXCR5),Tfh memory (%CXCR5),Tfh memory (CD4%),Th1-17 (CD4),Th1 (%CD4),Th17 (%CD4),Th2 (CD4),Trans-a (%B),Trans-b (%B),Trans-c (%B),Trans-d (%B),Treg (%CD4)
CPI515,0.37,0.31,13.9,9.91,24.8,2.45,11.8,7.88,67.8,33.3,27.9,62.7,0.28,4.7,3.58,0.23,19.7,14.9,1.48,46.4,73.8,0.75,16.1,1.44,3.84,22.13,1.39,0.26,5.03,0.48,1.83,1.98,2.47,8.37,0.83,4.25,21.8,3.51,24.4,0.87,25.0,6.76,0.13,1.95,86.1,5.82,4.0,3.05,3.65,6.49,0.13,0.12,0.19,0.49,1
CPI545,0.57,0.58,7.68,11.8,8.4,1.0,5.1,8.89,79.0,53.4,20.3,55.4,0.36,1.42,3.82,0.11,14.0,1.53,0.18,83.8,82.3,2.08,3.55,1.67,1.14,8.44,1.73,2.02,4.6,,,,,5.32,0.63,2.72,13.9,3.5,9.5,0.3,3.96,3.38,0.19,5.7,63.4,2.14,,,,,8.49,11.4,7.02,7.35,1


### normalize_percentile(): Normalize percentile at all columns execept one

In [34]:
col_not_cal_per <- "studyCode"  # column not calculate percentile

# control
df_control <- search_individuals(db, vec = c('HBD056', 'HBD044', 'HBD039', 'HBD040'))
study_codes_control <- unique(df_control$individualCode)
df_control <- concat_agg_mean_pivot_df_name(study_codes_control, df_control)

# treatment
df_data <- search_individuals(db, vec = c('CPI515', 'CPI545'))
   study_codes_data <- unique(df_data$individualCode)      
   df_data <- concat_agg_mean_pivot_df_name(study_codes_data, df_data)

print('At control')
head(df_control, 3)
print('At treatment: Before calling normalize_percentile')
head(df_data, 3)

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


[1] "At control"


studyCode,Activated CD4+ T cells(% CD4),Activated CD8+ T cells (% CD8),Anergic B (%B),B-CD21lo (%B),B-mem (%B),B-mem (%LC),B-MZ (%B),B-MZ (%LC),B-SM-IgA (%B),B-SM-IgG (%B),B-SM (%B),B-SM (%LC),B-trans-b (%B),B cells (%Lymphocytes/live),Bm (%B),Bm (%Lymphocytes/live),CD16+ mDCs (%APC),CD16neg mDCs (%APC),CD19 (%LC),CD3 (%LCs),CD3 T cells (% Lymphocytes/live),CD4-Act (%CD4),CD4-Exh (%CD4),CD4-Naive (%CD4),CD4-TCM (%CD4),CD4-TEM (%CD4),CD4-TEMRA (%CD4),CD4 (%LCs),CD4+ T cells (%Lymphocytes/live),CD8-Act (%CD8),CD8-Naive (%CD8),CD8-TCM (%CD8),CD8-TEM (%CD8),CD8-TEMRA (%CD8),CD8 (%LC),CD8+ T cells (% of Lymphocytes/live),Classical Monocytes(%APC),cTfh (%CD4),Exhausted (%CD4),IgA smB (%B),IgG smB (%B),low density neutrophils (%APC),mDC-CD16- (%APC),mDC-CD16+ (%APC),mDCs (%APC),mDCs(%APC),Mono-class (%APC),Mono-non-class (%APC),MZ B (%B),MZ B (%Lymphocytes/live),Naïve (% CD8),Naive (%CD4),NK- 1 (%LC),NK-2 (%LC),NK-3 (%LC),NK-4 (%LC),NK (%LC),non-classical monocytes (%APC),PBs (%B),pDCs (%APC),R5-Th1 (%CD4),R5-Th1/17 (%CD4),R5-Th17 (%CD4),R5-Th2 (%CD4),R5 Th1-17 (%CD4),R5 Th1 (%CD4),R5 Th17 (%CD4),R5 Th2 (%CD4),smB (%B),smB (%Lymphocytes/live),TCM (% CD8),TCM (%CD4),TEM (% CD4),TEM (% CD8),TEMRA (% CD4),TEMRA (% CD8),Tfh-eff (%CD4),Tfh-eff (%CXCR5),Tfh-mem (%CD4),Tfh-mem (%CXCR5),Tfh (%CD4),Tfh effector (%CD4),Tfh effector (%CXCR5),Tfh memory (%CXCR5),Tfh memory (CD4%),Th1-17 (%CD4),Th1-17 (CD4),Th1 (%CD4),Th17 (%CD4),Th2 (%CD4),Th2 (CD4),Trans-a (%B),Trans-b (%B),Trans-c (%B),Trans-d (%B),Treg (%CD4)
HBD039,0.535,0.72,10.64,9.76,13.85,1.455,4.015,0.42,2.5,5.685,8.81,0.925,3.31,10.18,15.7,1.605,6.135,2.72,10.5,58.5,64.75,0.895,1.53,55.8,29.1,13.6,1.47,38.65,43.7,1.33,44.5,25.15,25.15,5.16,17.65,17.45,70.15,4.925,0.99,2.805,5.82,0.005185,6.125,8.04,14.2,8.86,73.0,1.31,5.265,0.54,51.3,58.75,0.745,23.375,1.475,0.195,25.9975,3.585,0.7675,2.6875,1.77,0.4,1.09,1.5,0.199,2.035,0.795,2.055,9.0,0.92,21.95,29.8,10.425,20.05,1.055,6.72,0.885,18.05,4.035,81.9,5.085,0.315,6.125,79.65,4.05,8.96,9.67,12.93333,4.826667,8.78,5.68,1.915,3.72,3.12,3.62,6.435
HBD040,0.8,1.15,12.8,,,,,,,,,,,11.1,27.0,3.01,35.5,7.59,,,64.5,,,,,,,,30.1,,,,,,,24.9,14.1,,0.98,3.51,14.6,0.13,,,,43.1,,,5.78,0.64,26.8,37.6,0.66,19.7,1.1,0.82,22.28,9.21,0.41,6.37,,,,,0.87,4.26,2.52,2.92,19.4,2.16,8.62,42.8,15.3,22.3,4.31,42.4,,,,,10.6,0.55,5.18,81.5,8.61,,12.4,22.8,5.58,,6.3,2.48,2.33,3.67,2.77,6.28
HBD044,0.51,0.345,8.78,,,,,,,,,,,7.235,22.75,1.645,9.135,7.94,,,78.05,,,,,,,,44.2,,,,,,,26.1,50.85,,0.46,2.24,7.675,0.0755,,,,17.05,,,8.805,0.635,66.45,67.05,1.935,10.885,1.625,0.52,14.965,4.37,0.695,5.055,,,,,0.77,2.57,1.72,2.02,11.0,0.795,6.98,27.15,5.195,11.7,0.62,14.9,,,,,6.045,0.25,4.125,82.0,4.925,,7.97,6.0,5.42,,6.9,1.49,2.0,1.96,2.595,5.95


[1] "At treatment: Before calling normalize_percentile"


studyCode,Activated CD4+ T cells(% CD4),Activated CD8+ T cells (% CD8),Anergic B (%B),B cells (%Lymphocytes/live),Bm (%B),Bm (%Lymphocytes/live),CD16+ mDCs (%APC),CD16neg mDCs (%APC),CD3 T cells (% Lymphocytes/live),CD4+ T cells (%Lymphocytes/live),CD8+ T cells (% of Lymphocytes/live),Classical Monocytes(%APC),Exhausted (%CD4),IgA smB (%B),IgG smB (%B),low density neutrophils (%APC),mDCs(%APC),MZ B (%B),MZ B (%Lymphocytes/live),Naïve (% CD8),Naive (%CD4),NK- 1 (%LC),NK-2 (%LC),NK-3 (%LC),NK-4 (%LC),NK (%LC),non-classical monocytes (%APC),PBs (%B),pDCs (%APC),R5 Th1-17 (%CD4),R5 Th1 (%CD4),R5 Th17 (%CD4),R5 Th2 (%CD4),smB (%B),smB (%Lymphocytes/live),TCM (% CD8),TCM (%CD4),TEM (% CD4),TEM (% CD8),TEMRA (% CD4),TEMRA (% CD8),Tfh (%CD4),Tfh effector (%CD4),Tfh effector (%CXCR5),Tfh memory (%CXCR5),Tfh memory (CD4%),Th1-17 (CD4),Th1 (%CD4),Th17 (%CD4),Th2 (CD4),Trans-a (%B),Trans-b (%B),Trans-c (%B),Trans-d (%B),Treg (%CD4)
CPI515,0.37,0.31,13.9,9.91,24.8,2.45,11.8,7.88,67.8,33.3,27.9,62.7,0.28,4.7,3.58,0.23,19.7,14.9,1.48,46.4,73.8,0.75,16.1,1.44,3.84,22.13,1.39,0.26,5.03,0.48,1.83,1.98,2.47,8.37,0.83,4.25,21.8,3.51,24.4,0.87,25.0,6.76,0.13,1.95,86.1,5.82,4.0,3.05,3.65,6.49,0.13,0.12,0.19,0.49,7.05
CPI545,0.57,0.58,7.68,11.8,8.4,1.0,5.1,8.89,79.0,53.4,20.3,55.4,0.36,1.42,3.82,0.11,14.0,1.53,0.18,83.8,82.3,2.08,3.55,1.67,1.14,8.44,1.73,2.02,4.6,,,,,5.32,0.63,2.72,13.9,3.5,9.5,0.3,3.96,3.38,0.19,5.7,63.4,2.14,,,,,8.49,11.4,7.02,7.35,7.43


In [35]:
data <- normalize_percentile(df_control, df_data, col_not_cal_per)

print('At treatment: AFTER calling normalize_percentile')
head(data$df, 3)

print('List column names to be calculated percentile by itself')
data$cols_cal_self_percentile

[1] "At treatment: AFTER calling normalize_percentile"


studyCode,Activated CD4+ T cells(% CD4),Activated CD8+ T cells (% CD8),Anergic B (%B),B cells (%Lymphocytes/live),Bm (%B),Bm (%Lymphocytes/live),CD16+ mDCs (%APC),CD16neg mDCs (%APC),CD3 T cells (% Lymphocytes/live),CD4+ T cells (%Lymphocytes/live),CD8+ T cells (% of Lymphocytes/live),Classical Monocytes(%APC),Exhausted (%CD4),IgA smB (%B),IgG smB (%B),low density neutrophils (%APC),mDCs(%APC),MZ B (%B),MZ B (%Lymphocytes/live),Naïve (% CD8),Naive (%CD4),NK- 1 (%LC),NK-2 (%LC),NK-3 (%LC),NK-4 (%LC),NK (%LC),non-classical monocytes (%APC),PBs (%B),pDCs (%APC),R5 Th1-17 (%CD4),R5 Th1 (%CD4),R5 Th17 (%CD4),R5 Th2 (%CD4),smB (%B),smB (%Lymphocytes/live),TCM (% CD8),TCM (%CD4),TEM (% CD4),TEM (% CD8),TEMRA (% CD4),TEMRA (% CD8),Tfh (%CD4),Tfh effector (%CD4),Tfh effector (%CXCR5),Tfh memory (%CXCR5),Tfh memory (CD4%),Th1-17 (CD4),Th1 (%CD4),Th17 (%CD4),Th2 (CD4),Trans-a (%B),Trans-b (%B),Trans-c (%B),Trans-d (%B),Treg (%CD4)
CPI515,0.0,0.0,0.6,0.2,0.4,0.4,0.6,0.6,0.6,0.4,1.0,0.4,0,0.6,0,0.6,0.6,1,1,0.4,1,0.4,0.2,0.2,1,0.2,0,0,0.4,0.2,0.0,0.4,0.4,0,0.2,0,0,0,0.6,0.4,0.6,0.4,0,0.0,1,0.4,0.0,0.0,0.0,0.4,0,0,0,0,1
CPI545,0.4,0.2,0.0,0.6,0.0,0.0,0.2,1.0,1.0,1.0,0.4,0.4,0,0.0,0,0.4,0.4,0,0,1.0,1,1.0,0.0,0.6,1,0.0,0,1,0.4,,,,,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0.4,0,0.0,,,,,1,1,1,1,1


[1] "List column names to be calculated percentile by itself"


### self_normalize_percentile()

In [36]:
col_not_cal_per <- "studyCode"  # column not calculate percentile

# treatment
df_data <- search_individuals(db, vec = c('CPI515', 'CPI545'))
study_codes_data <- unique(df_data$individualCode)      
df_data <- concat_agg_mean_pivot_df_name(study_codes_data, df_data)

print('At treatment: Before calling self_normalize_percentile()')
head(df_data, 3)

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


[1] "At treatment: Before calling self_normalize_percentile()"


studyCode,Activated CD4+ T cells(% CD4),Activated CD8+ T cells (% CD8),Anergic B (%B),B cells (%Lymphocytes/live),Bm (%B),Bm (%Lymphocytes/live),CD16+ mDCs (%APC),CD16neg mDCs (%APC),CD3 T cells (% Lymphocytes/live),CD4+ T cells (%Lymphocytes/live),CD8+ T cells (% of Lymphocytes/live),Classical Monocytes(%APC),Exhausted (%CD4),IgA smB (%B),IgG smB (%B),low density neutrophils (%APC),mDCs(%APC),MZ B (%B),MZ B (%Lymphocytes/live),Naïve (% CD8),Naive (%CD4),NK- 1 (%LC),NK-2 (%LC),NK-3 (%LC),NK-4 (%LC),NK (%LC),non-classical monocytes (%APC),PBs (%B),pDCs (%APC),R5 Th1-17 (%CD4),R5 Th1 (%CD4),R5 Th17 (%CD4),R5 Th2 (%CD4),smB (%B),smB (%Lymphocytes/live),TCM (% CD8),TCM (%CD4),TEM (% CD4),TEM (% CD8),TEMRA (% CD4),TEMRA (% CD8),Tfh (%CD4),Tfh effector (%CD4),Tfh effector (%CXCR5),Tfh memory (%CXCR5),Tfh memory (CD4%),Th1-17 (CD4),Th1 (%CD4),Th17 (%CD4),Th2 (CD4),Trans-a (%B),Trans-b (%B),Trans-c (%B),Trans-d (%B),Treg (%CD4)
CPI515,0.37,0.31,13.9,9.91,24.8,2.45,11.8,7.88,67.8,33.3,27.9,62.7,0.28,4.7,3.58,0.23,19.7,14.9,1.48,46.4,73.8,0.75,16.1,1.44,3.84,22.13,1.39,0.26,5.03,0.48,1.83,1.98,2.47,8.37,0.83,4.25,21.8,3.51,24.4,0.87,25.0,6.76,0.13,1.95,86.1,5.82,4.0,3.05,3.65,6.49,0.13,0.12,0.19,0.49,7.05
CPI545,0.57,0.58,7.68,11.8,8.4,1.0,5.1,8.89,79.0,53.4,20.3,55.4,0.36,1.42,3.82,0.11,14.0,1.53,0.18,83.8,82.3,2.08,3.55,1.67,1.14,8.44,1.73,2.02,4.6,,,,,5.32,0.63,2.72,13.9,3.5,9.5,0.3,3.96,3.38,0.19,5.7,63.4,2.14,,,,,8.49,11.4,7.02,7.35,7.43


In [37]:
df_data <- self_normalize_percentile(df_data, col_not_cal_per="studyCode")
print('At treatment: AFTER calling self_normalize_percentile()')
head(df_data$df, 3)

[1] "At treatment: AFTER calling self_normalize_percentile()"


studyCode,Activated CD4+ T cells(% CD4),Activated CD8+ T cells (% CD8),Anergic B (%B),B cells (%Lymphocytes/live),Bm (%B),Bm (%Lymphocytes/live),CD16+ mDCs (%APC),CD16neg mDCs (%APC),CD3 T cells (% Lymphocytes/live),CD4+ T cells (%Lymphocytes/live),CD8+ T cells (% of Lymphocytes/live),Classical Monocytes(%APC),Exhausted (%CD4),IgA smB (%B),IgG smB (%B),low density neutrophils (%APC),mDCs(%APC),MZ B (%B),MZ B (%Lymphocytes/live),Naïve (% CD8),Naive (%CD4),NK- 1 (%LC),NK-2 (%LC),NK-3 (%LC),NK-4 (%LC),NK (%LC),non-classical monocytes (%APC),PBs (%B),pDCs (%APC),R5 Th1-17 (%CD4),R5 Th1 (%CD4),R5 Th17 (%CD4),R5 Th2 (%CD4),smB (%B),smB (%Lymphocytes/live),TCM (% CD8),TCM (%CD4),TEM (% CD4),TEM (% CD8),TEMRA (% CD4),TEMRA (% CD8),Tfh (%CD4),Tfh effector (%CD4),Tfh effector (%CXCR5),Tfh memory (%CXCR5),Tfh memory (CD4%),Th1-17 (CD4),Th1 (%CD4),Th17 (%CD4),Th2 (CD4),Trans-a (%B),Trans-b (%B),Trans-c (%B),Trans-d (%B),Treg (%CD4)
CPI515,0.3333333,0.3333333,0.6666667,0.3333333,0.6666667,0.6666667,0.6666667,0.3333333,0.3333333,0.3333333,0.6666667,0.6666667,0.3333333,0.6666667,0.3333333,0.6666667,0.6666667,0.6666667,0.6666667,0.3333333,0.3333333,0.3333333,0.6666667,0.3333333,0.6666667,0.6666667,0.3333333,0.3333333,0.6666667,0.5,0.5,0.5,0.5,0.6666667,0.6666667,0.6666667,0.6666667,0.6666667,0.6666667,0.6666667,0.6666667,0.6666667,0.3333333,0.3333333,0.6666667,0.6666667,0.5,0.5,0.5,0.5,0.3333333,0.3333333,0.3333333,0.3333333,0.3333333
CPI545,0.6666667,0.6666667,0.3333333,0.6666667,0.3333333,0.3333333,0.3333333,0.6666667,0.6666667,0.6666667,0.3333333,0.3333333,0.6666667,0.3333333,0.6666667,0.3333333,0.3333333,0.3333333,0.3333333,0.6666667,0.6666667,0.6666667,0.3333333,0.6666667,0.3333333,0.3333333,0.6666667,0.6666667,0.3333333,,,,,0.3333333,0.3333333,0.3333333,0.3333333,0.3333333,0.3333333,0.3333333,0.3333333,0.3333333,0.6666667,0.6666667,0.3333333,0.3333333,,,,,0.6666667,0.6666667,0.6666667,0.6666667,0.6666667


### get_heatmap_data()

In [38]:
data <- get_heatmap_data(df_control = search_individuals(db, vec = c('HBD056', 'HBD044', 'HBD039', 'HBD040')), 
                         df_data = search_individuals(db, vec = c('CPI515', 'CPI545')))
print('After calling get_heatmap_data')
head(data$df, 3)
print('Columns')
data$cols_cal_self_percentile

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


[1] "After calling get_heatmap_data"


marker,CPI515,CPI545
Activated CD4+ T cells(% CD4),0.0,0.3
Activated CD8+ T cells (% CD8),0.1,0.3
Anergic B (%B),0.5,0.1


[1] "Columns"


### get_self_heatmap_data()

In [39]:
data <- get_self_heatmap_data(df_data = search_individuals(db, vec = c('CPI515', 'CPI545')))
print('After calling get_heatmap_data')
head(data$df, 3)
print('Columns')
# data$cols_cal_self_percentile

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


[1] "After calling get_heatmap_data"


marker,CPI515,CPI545
Activated CD4+ T cells(% CD4),0.3333333,0.6666667
Activated CD8+ T cells (% CD8),0.3333333,0.6666667
Anergic B (%B),0.6666667,0.3333333


[1] "Columns"


### calculate_CV()

In [40]:
df <- search_individuals(db, vec=c('CPI018', 'CPI043', 'CPI063'))

df <- concat_df(study_codes = unique(df$individualCode),
                df = df)
dim(df)
head(df, 2)

name,value,studyCode
B cells (%Lymphocytes/live),12.0,CPI043
Anergic B (%B),12.9,CPI043


In [41]:
df_CV <- calculate_CV(df)
df_CV <- df_CV[order(- df_CV$CVs), ]
dim(df_CV)
head(df_CV, 3)

Unnamed: 0,names,CVs
18,Tfh effector (%CXCR5),141.4214
17,Tfh effector (%CD4),141.4214
11,PBs (%B),101.1045


### get_df_CV()

In [42]:
df <- search_individuals(db, vec=c('CPI018', 'CPI043', 'CPI063'))
df_CV <- get_df_CV(df)
dim(df_CV)
head(df_CV, 3)

Unnamed: 0,names,CVs
18,Tfh effector (%CXCR5),141.4214
17,Tfh effector (%CD4),141.4214
11,PBs (%B),101.1045


### rank_CV_Disease_Control()

In [43]:
study_codes_control <- c('HBD086', 'HBD064', 'HBD083', 'HBD084', 'HBD114')
study_codes_data <- c('CPI318', 'CPI319', 'CPI365', 'CPI366')                 

df_control <- search_individuals(db, study_codes_control)
df_data    <- search_individuals(db, study_codes_data)

df_CV_rank <- rank_CV_Disease_Control(df_control, df_data)
dim(df_CV_rank)
head(df_CV_rank, 3)
write.csv(df_CV_rank, "df_CV_rank.csv")

Unnamed: 0,Rank,marker,Control_sample_CV,Disease_CV,Disease_over_Control
8,1,B-SM (%B),41.262142,102.87885,2.493299
6,2,B-SM-IgA (%B),45.98991,113.87382,2.476061
45,3,Tfh-mem (%CXCR5),6.721727,15.91114,2.36712


### filter_rank_top_n()

In [44]:
df_heatmap_top_n <- filter_rank_top_n(
             df_control = search_individuals(db, vec = c('HBD056', 'HBD044', 'HBD039')),
             df_data = search_individuals(db, vec = c('CPI515', 'CPI545', 'CPI255')),
             top_n = 5,
             set_index=TRUE
)
df_heatmap_top_n

`summarise()` has grouped output by 'studyCode'. You can override using the `.groups` argument.


Unnamed: 0,CPI255,CPI515,CPI545
Bm (%B),0,0.6666667,0
MZ B (%B),0,1.0,0
MZ B (%Lymphocytes/live),0,1.0,0
TEMRA (% CD4),0,0.5555556,0
Trans-a (%B),1,0.0,1
