In [1]:
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)
library(reshape2)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
avgLog2 = read.csv('figures/data_utils/somatic_average_log.csv', stringsAsFactors = F, row.names=1)

colnames(avgLog2) = gsub('Germ.', '', colnames(avgLog2))
colnames(avgLog2) = gsub('Somatic.', '', colnames(avgLog2))
colnames(avgLog2)[ colnames(avgLog2) == 'preGC_II' ] = 'preGC_IIb'

In [3]:
TF_interspecie = read.csv('../human_v2/figures_somatic/granulosa_crosspecie_TF_measurements_humanTFs.csv', stringsAsFactors = F)

In [4]:
# Define genes and clusters of interest
CLofInterest = cluster_order = c('CoelEpi_LHX9',  'early_supporting', 
                 'preGC_I',  
                 'OSE', 'preGC_IIa', 'preGC_IIb','granulosa')
CLofInterest = intersect( CLofInterest, colnames(avgLog2))

TFofInterest = unique(TF_interspecie$TF)
TFofInterest_monkey = TF_interspecie[ ! duplicated(TF_interspecie$TF) , ]$gene

In [5]:
colnames(avgLog2)

In [6]:
# scale clusters of interest
avgLog2 = avgLog2[,CLofInterest]
rn = colnames(avgLog2)
avgLog2_scaled = apply(t(avgLog2), 2, scale) 
avgLog2_scaled = t(avgLog2_scaled)
colnames(avgLog2_scaled) = rn

In [7]:
# prepare matrix with all TFs
M = matrix(0, nrow = length(TFofInterest), ncol = length(CLofInterest), dimnames = list(TFofInterest, CLofInterest)  )

In [8]:
# Fill the matrix with the Tfs with mouse information
MavgLog2 = M

idx = TFofInterest_monkey %in% rownames(avgLog2)

all( toupper(TFofInterest_monkey[idx]) %in% TFofInterest[idx])
for (cl in CLofInterest)
    MavgLog2[ TFofInterest[idx], cl] <- avgLog2_scaled[ TFofInterest_monkey[idx], cl]

In [9]:
# plot
paletteLength <- 50
myColor <- colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(paletteLength)
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(min(avgLog2_scaled), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(avgLog2_scaled)/paletteLength, max(avgLog2_scaled), length.out=floor(paletteLength/2))) 


pheatmap(MavgLog2, cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, color=myColor, breaks=myBreaks,
        filename= 'figures_somatic/granulosa_heatmap_humanTFs_Zscores.pdf')