Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
yxr123321 committed Mar 28, 2024
1 parent 28cf665 commit 370f52a
Show file tree
Hide file tree
Showing 10 changed files with 479 additions and 0 deletions.
5 changes: 5 additions & 0 deletions script/ASV.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Rscript dada2.r
sed -i 's/"//g' table.rechim.xls
perl dada2normal.pl table.rechim.xls
sample_order.pl asv_table.tmp.xls raw.fq.list asv_table.xls
usearch11 -closed_ref asv_rep.fasta -db silva_MBPD.fasta -strand both -otutabout close/otu_rep.txt -tabbedout close/closed.txt
48 changes: 48 additions & 0 deletions script/dada2.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
library(Rcpp)
library(crayon)
library(withr)
library(ggplot2)
library(BiocGenerics)
library(S4Vectors)
library(IRanges)
library(XVector)
library(GenomeInfoDb)
library(matrixStats)
library(Biobase)
library(Matrix)
library(latticeExtra)
library(reshape2)
library(dada2)
##----filenames-------------
path <- "YourPath"
fns <- list.files(path, pattern=".fq.gz")
sample.names <- sapply(strsplit(basename(fns), "[.]"), `[`, 1)
sample.names
##----filter and trim-------
filtpath <- file.path(path, "filtered")
filterAndTrim(file.path(path,fns), file.path(filtpath,fns), maxEE=1, truncQ=2, rm.phix=TRUE, compress=TRUE, verbose=TRUE, multithread=TRUE)
filts <- list.files(filtpath, pattern="fq", full.names=TRUE)
names(filts) <- sample.names
##----learn Errors rate----------
set.seed(100)
err <- learnErrors(filts, nbases = 1e8, multithread=TRUE, randomize=TRUE)
##----Infer sequence variants----
dds <- vector("list", length(sample.names))
names(dds) <- sample.names
for(sam in sample.names) {
cat("Processing:", sam, "\n")
derep <- derepFastq(filts[[sam]]) ##Dereplication
dds[[sam]] <- dada(derep, err=err, multithread=TRUE) ##dada2
}
##----Construct sequence table---
seqtab <- makeSequenceTable(dds)
saveRDS(seqtab, "./seqtab.rds")
write.table(t(seqtab),"table.xls",sep="/ ")
# Merge multiple runs (if necessary)
#st1 <- readRDS("path/to/run1/output/seqtab.rds")
#st2 <- readRDS("path/to/run2/output/seqtab.rds")
#st3 <- readRDS("path/to/run3/output/seqtab.rds")
#st.all <- mergeSequenceTables(st1, st2, st3)
##----Remove chimeras------------
seqtabrechim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE)
write.table(t(seqtabrechim),"table.rechim.xls",sep=" ")
32 changes: 32 additions & 0 deletions script/effect_size.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
meta.df <- read.csv("data/RR.csv",row.names = 1)
ggplot(meta.df,mapping = aes(x=factor,y=estimate,fill=group))+
geom_hline(yintercept=0,linetype = "dashed",size=0.2)+
geom_errorbar(position=position_dodge(-0.8),aes(ymin = ci.lb, ymax = ci.ub), width=0.2,size=0.3)+
geom_point(position=position_dodge(-0.8), size=3, stroke = 0.3,shape=21)+scale_fill_manual(values = CS_cols)+
geom_text(aes(x = factor, y = ci.ub+0.015, label = size),
position = position_dodge(width = -0.8),vjust = 0.4, hjust=0.4, size = 2, check_overlap = FALSE)+
geom_text(aes(x = factor, y = ci.ub+0.04, label = star),
position = position_dodge(width = -0.8),vjust = 0.4, hjust=0.4, size = 2, check_overlap = FALSE)+
scale_x_discrete(limits=rev(c("Total","Dryland","Wetland","Gramineae","Leguminosae","Solanaceae")))+
labs(x = " ", y = "Response ratio",colour = 'black')+
theme(legend.title = element_blank(),
legend.position=c(0.2,0.94),
#legend.direction = "horizontal",
legend.key = element_rect(fill = "white",size = 2),
legend.key.width = unit(0.5,"lines"),
legend.key.height= unit(0.8,"lines"),
legend.background = element_blank(),
legend.text=element_text(size=6),
panel.background = element_rect(fill = 'white', colour = 'white'),
axis.title=element_text(size=9),
#axis.title.x = element_blank(),
#axis.text.x = element_blank(),
axis.text.y = element_text(colour = 'black', size = 8),
axis.text.x = element_text(colour = 'black', size = 8),
axis.line = element_line(colour = 'black',size=0.4),
axis.line.y = element_blank(),
axis.ticks = element_line(colour = 'black',size=0.4),
axis.ticks.y = element_blank())+
guides(fill = "none")+
coord_flip()+ggtitle("Bacterial diversity")+theme(plot.title = element_text(hjust = 0.5,size = 9,face = "bold"))#+
theme(axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())
23 changes: 23 additions & 0 deletions script/funnel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
library(meta)
library(metafor)

richness=read.csv("data/all_shannon.csv",header=T)

d1<-escalc(measure="ROM",data=richness,m1i=average.y,sd1i=sd.y,n1i=n.y,m2i=average.x,sd2i=sd.x,n2i=n.x)

total1<-rma(yi,vi, data=d1, method="DL")
funnel(total1,main="Bacterial shannon")

### Begg's test#####
ranktest(total1)[2]


###Trim and fill method
###Trim and fill: A simple funnel-plot-based method
###of testing and adjusting for publication bias
###in meta-analysis. Biometrics, 56(2), 455–463.

summary(total1)
t1<-trimfill(total1,estimator="R0")
funnel(t1)
summary(t1)
30 changes: 30 additions & 0 deletions script/meta_analysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
library(metafor)
library(magrittr)
library(tidyr)
library(dplyr)
make_pct <- function(x) (exp(x) - 1) * 100

richness=read.csv("data/all_shannon.csv",header=T)
d2<-escalc(measure="ROM",data=richness,m1i=average.y,sd1i=sd.y,n1i=n.y,m2i=average.x,sd2i=sd.x,n2i=n.x)
###Total####
total <- rma(yi,vi, data=d2,method = "DL")
total
total.n <- d2 %>% drop_na(.,yi) %>% summarise(n = n())
total.df <- coef(summary(total)) %>% mutate(type="",
factor="Total",
size=total.n$n)
####Land use#####
Land.use<-rma(yi, vi, mods=~Crop_Managment -1, data=d2,method = "DL")
Land.use.n <- d2 %>% drop_na(.,yi) %>% group_by(Crop_Managment) %>% summarise(n = n()) %>% drop_na()
Land.use.df <- coef(summary(Land.use)) %>% mutate(type="Land use",
factor=levels(as.factor(d2$Crop_Managment)),
size=Land.use.n$n)
##Crop#####
#d2 <- subset(d2,Crop_Managment!='Wetland')
Crop<-rma(yi, vi, mods=~Crop_Family -1, data=d2,method = "DL")
Crop.n <- d2 %>% drop_na(.,yi) %>% group_by(Crop_Family) %>% summarise(n = n()) %>% drop_na()
Crop.df <- coef(summary(Crop)) %>% mutate(type="Crops",
factor=levels(as.factor(Crop.n$Crop_Family)),
size=Crop.n$n)
meta.df <- bind_rows(total.df,Land.use.df,Crop.df)

7 changes: 7 additions & 0 deletions script/network.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
source("zi.pi.R")
source("network_analysis.R")
source("network_stat.R")
load("CK.RData")
CK1 <- network_analysis(CK,3,10,0.8,"spearman",F)
temp1 <- as.data.frame(CK1[1])
write.table(temp1,file = "temp1.txt",sep = "\t")
156 changes: 156 additions & 0 deletions script/network_analysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
####Version1.2.0###
###Author Wangningqi####
#' Conduct Network analysis
#' @description A convenient and fast network analysis function, with output results suitable for cytoscape and gephi
#' @param input Input dataframe with otu/gene/taxa in row and sample ID in column,at least 5 replicates(more than 8 replicates are recommened).
#' @param inputtype Input dataframe type
#'
#' 1:dataframe with first column of OTUID and last column of taxonomy
#'
#' 2:dataframe with first column of OTUID/taxonomy
#'
#' 3:dataframe of all numeric
#'
#' @param input2 A second input data frame with otu/gene/taxa in row and sample ID in column. Default:NULL
#' @param input2type The second input data frame type. Details the same as above. Default:NULL
#' @param n Only keep otu/gene/taxa appearing in n sample size
#' @param threshold Threshold of correlation r value
#' @param method A character string indicating which correlation coefficient is to be computed. One of "pearson" or "spearman"
#' @param display If display a preview plot of network based on igraph. F for the first attempt is recommended in case of too many vertices and edges.
#'
#' @details
#'
#' 1. We had optimized the correlation algorithm to achieve a faster running speed. It takes less than 2 minute to calculate dataframe correlation and p value which more than 400 samples and 10000 OTUs for computer with dual Core i5 processor.
#' However, too many vertices(>2000) or links(>10000) may slow the statistical process and visualization,so we recommend that in your first attempt,set `display` paramter as `F` to have a preview.
#' Then you can adjust your n/threshold/method paramter to generate a suitable visualization network
#'
#' 2. We display a preview plot so as to adjusting your network. Generally a global figure (like we show in examples) with less than 1000 vertices and 5000 edges/links
#' is recommended. Further more,we recommend you to output the statistics and adjacency table and use software like cytoscape or gephi for better visualization.
#'
#' 3.\code{\link{left_join}} from \code{\link{dplyr}} is available to match otu/gene/taxa annotaion with output results for further analysis.
#'
#' @note
#' 1. Replicates should be at least 5,more than 8 is recommend. See details in \code{\link{rcorr}}.
#'
#' 2. In case of too many edges/links or not a global network plot, you can stop the process immediately to provent wasting too much time.
#'
#' 3. Package magrittr,tidyr,\code{\link{Hmisc}}(correlation analysis),\code{\link{fdrtools}}(p value adjustment),\code{\link{igraph}}(create network graph for statistics and visualization) is required in this function.
#' @author Wang Ningqi <2434066068@qq.com>
#' @return One list contains a statistics table of network vertices/nodes and an adjacency table. One preview plot of network in the plot interface and an igraph object(named `igraph1`) in global environment.
#' @export
#'
#' @examples
#' ###data prepration###
#' data(testotu)
#' rownames(testotu)<-testotu[,1]
#' inputotu<-testotu[,-c(1,ncol(testotu))]
#' head(inputotu)
#'
#' ###one input network analysis###
#' network_result<-network_analysis(inputotu,3,10,0.9,"spearman",T)
#'
#' network_stat<- as.data.frame(network_result[1]) ##vertices table
#' head(network_stat)
#' network_adjacency<-as.data.frame(network_result[2]) ##adjacency table
#' head(network_adjacency)
#' network_matrix<-as.data.frame(network_result[3]) ##compelete adjacency matrix
#' network_matrix[1:10,1:10]
#' network_stat(igraph1) ##In case you want to see statistics again or do other analysis based on igraph.
#'
#' ###two inputs network analysis###
#' inputotu1<- inputotu[1:456,]
#' inputotu2<- inputotu[524:975,]
#' network_result<-network_analysis(input=inputotu1,inputtype=3,input2=inputotu2,input2type=3,n=10,threshold=0.85,method="spearman",display=T)
#'
#'
#'####incorrect demonstration!!##
#'###WARNING:it may takes too long to creat the graph!!###
#'
#'network_result<-network_analysis(inputotu,3,3,0.8,"spearman",T)
#'
#'#Total edges/links: 10199
#'#Total vertices: 826
#'##too many edges and not a global network####
#'
network_analysis<-function(input,inputtype,n,threshold,method,display,input2,input2type){
require(igraph);require(fdrtool);require(tidyr);require(Hmisc);require(magrittr)
if(inputtype==1){input1<-input[,-c(1,ncol(input))];rownames(input1)<-input[,1]}else
if(inputtype==2){input1<-input[,-1];rownames(input1)<-input[,1]}else
if(inputtype==3){input1<-input}else{print("Please choose correct inputtype(1,2,3)")}
zero_count=function(input){length(which(input==0)) %>% return()}
zerocount=apply(input1,1,zero_count)
input1=input1[which(zerocount<=(ncol(input1)-n)),]
input1=input1[which(rowSums(input1)>0),]
if(missing(input2)){
corr=rcorr(as.matrix(t(input1)),type=method)
cor.p=corr$P;cor.p[is.na(cor.p)]<- 0
fdr=fdrtool(as.numeric(cor.p), statistic="pvalue",plot=F,verbose = F) ##Global fdr correlation##
cor.r=corr$r
cor.q=matrix(fdr$qval,ncol=ncol(cor.p),nrow=nrow(cor.p));cor.q[is.nan(cor.q)]<- 0
cor.r[cor.q>0.05|abs(cor.r)<threshold] = 0 ##fliter via threshold##
cor.r[is.na(cor.r)]<-0 ##NA变0##
cor.r[which(cor.r>0.9999)]<-0 ###对角线的1变成0##
cor.r1=cor.r;cor.r1[which(cor.r1!=0)]<-1 ##无方向矩阵用于igraph##
cutoff=which(rowSums(cor.r1)== 0)
if(length(cutoff)==0){cor.r=cor.r;cor.r1=cor.r1}else{
cor.r=cor.r[-c(cutoff),-c(cutoff)];cor.r1=cor.r1[-c(cutoff),-c(cutoff)]}
cor.r1[which(cor.r>0)]<-1;cor.r1[which(cor.r<0)]<- -1;
adj_matrix=cor.r1
cor.r1[lower.tri(cor.r1,diag = T)]<-NA}else{
#####input&input2####
if(input2type==1){input3<-input2[,-c(1,ncol(input2))];rownames(input3)<-input2[,1]}else
if(input2type==2){input3<-input2[,-1];rownames(input3)<-input2[,1]}else
if(input2type==3){input3<-input2}else{print("Please choose correct inputtype(1,2,3)")}
zero_count=function(input){length(which(input==0)) %>% return()}
zerocount=apply(input3,1,zero_count)
input3=input3[which(zerocount<=(ncol(input3)-n)),]
input3=input3[which(rowSums(input3)>0),]
corr=rcorr(x=as.matrix(t(input1)),y=as.matrix(t(input3)),type=method)
cor.p=corr$P[1:nrow(input1),(nrow(input1)+1):(nrow(input1)+nrow(input3))];cor.p[is.na(cor.p)]<- 0
fdr=fdrtool(as.numeric(cor.p), statistic="pvalue",plot=F,verbose = F) ##Global fdr correlation##
cor.q=matrix(fdr$qval,ncol=ncol(cor.p),nrow=nrow(cor.p));cor.q[is.nan(cor.q)]<- 0
cor.r=corr$r[1:nrow(input1),(nrow(input1)+1):(nrow(input1)+nrow(input3))]
cor.r[cor.q>0.05|abs(cor.r)<threshold] = 0 ##fliter via threshold##
cor.r[is.na(cor.r)]<-0 ##NA变0##
cor.r[which(cor.r>0.9999)]<-0 ###1变成0##
cor.r1=cor.r;cor.r1[which(cor.r1!=0)]<-1 ##无方向矩阵用于igraph##
cutoff1=which(rowSums(cor.r1)== 0);cutoff2=which(colSums(cor.r1)== 0)
cor.r=cor.r[-c(cutoff1),-c(cutoff2)];cor.r1=cor.r1[-c(cutoff1),-c(cutoff2)]
cor.r1[which(cor.r>0)]<-1;cor.r1[which(cor.r<0)]<- -1
adj_matrix=cor.r1
}
###创造邻接表###
source<-rownames(cor.r1)
adjacency<-cor.r1%>%cbind(source,.) %>%as.data.frame %>% gather("target","value",-source) %>% subset(.,.$value!=0)
adjacency$value=as.numeric(adjacency$value)
igraph1<<-graph_from_data_frame(adjacency,directed = F)##全局输出##
network_stat(igraph1)
if(length(E(igraph1))>10000){cat("
Warning:too many edges/links!Better STOP the process")}
if(display==T){
plot(igraph1,main="Co-occurrence network",vertex.frame.color=NA,vertex.label=NA,edge.width=1,
vertex.size=5,edge.lty=1,edge.curved=TRUE,margin=c(0,0,0,0))}
V(igraph1)$degree<-igraph::degree(igraph1)
set.seed(999)
V(igraph1)$modularity <- membership(cluster_fast_greedy(igraph1))%>%as.numeric()
nodes_list <- data.frame(nodes_id = V(igraph1)$name, degree = V(igraph1)$degree, modularity = V(igraph1)$modularity)
nodes_list<-nodes_list[order(nodes_list$nodes_id,decreasing = F),]
if(missing(input2)){
cor.tempr=cor.r[order(rownames(cor.r),decreasing = F),];cor.tempr=cor.tempr[,order(colnames(cor.tempr),decreasing = F)]
rownames(nodes_list)=rownames(cor.tempr)
zi_pi8 <- zi.pi(nodes_list, cor.tempr, degree = 'degree', modularity_class = 'modularity')}else
{cor.tempr=matrix(rep(0,nrow(nodes_list)^2),nrow=nrow(nodes_list),ncol=nrow(nodes_list),byrow=T)
cor.tempr[1:nrow(cor.r),1:ncol(cor.r)]=cor.r
rownames(cor.tempr)=c(rownames(cor.r),colnames(cor.r))
colnames(cor.tempr)=c(colnames(cor.r),rownames(cor.r))
cor.tempr=cor.tempr[order(rownames(cor.tempr),decreasing = F),];cor.tempr=cor.tempr[,order(colnames(cor.tempr),decreasing = F)]
rownames(nodes_list)=rownames(cor.tempr)
zi_pi8 <- zi.pi(nodes_list, cor.tempr, degree = 'degree', modularity_class = 'modularity')
}

output=data.frame(nodes_id = V(igraph1)$name, node_degree = V(igraph1)$degree, node_betw=betweenness(igraph1),
node_evcent=evcent(igraph1,scale = F)$vector,Clustering_coefficient=transitivity(igraph1,type="local"),
No.module = V(igraph1)$modularity,Zi=zi_pi8$Zi,Pi=zi_pi8$Pi)
outlist=c(list(output),list(adjacency),list(adj_matrix)) %>%return()
}
42 changes: 42 additions & 0 deletions script/network_stat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
####networkstat.Version1.0.0###
###Author:Wang Ningqi####
####度数量 total degree###
#' Igraph network statistics
#'
#' @param input Igraph graph object
#'
#' @return network statistics
#' @export
#' @author Wang Ningqi <2434066068@qq.com>
#' @examples network_stat(igraph).See details in \code{\link{network_analysis}}
network_stat=function(input){require(igraph)
cat("Total degree:",sum(igraph::degree(input))) #totaldegree=sum(degree(igraph))#
# The size of the graph (number of edges)
cat("
Total edges/links:",length(E(input)))##num.edges = length(E(igraph1)) ##
# Order (number of vertices) of a graph
cat("
Total vertices:",length(V(input)))#num.vertices = length(V(igraph1))#
# connectance
cat("
Connectance:",edge_density(input,loops=FALSE))
# (Average degree) degree/vertices
cat("
Average degree:",mean(igraph::degree(input)))
# Diameter
cat("
Diameter:",diameter(input, directed = FALSE, unconnected = TRUE, weights = NULL))
# Average path length
cat("
Average path length:",average.path.length(input))
# Clustering coefficient
cat("
Global clustering coefficient:",transitivity(input))
cat("
Number of clusters:",no.clusters(input))
# Betweenness centralization
cat("
Betweenness centralization:",centralization.betweenness(input)$centralization)
# Degree centralization
cat("
Degree centralization:",centralization.degree(input)$centralization,"\n")}
18 changes: 18 additions & 0 deletions script/wilcox_and_regression.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
####wilcox test####
aa <- data.frame()
for (i in 1:ncol(CK)) {
bb=data.frame(CK=mean(CK[,i]),Treat=mean(Treat[,i]),p=wilcox.test(CK[,i],Treat[,i],paired =F)[[3]])
aa <- rbind(aa,bb)
}
row.names(aa) <- species$Group.1
aa <- subset(aa,aa$p<0.05)

###exponential decay regression####
paired_data %>%subset(.,pd!=0) %>%
ggplot(aes(x=all_shan,y=pd,color=type))+
geom_point()+theme_zg()+
geom_smooth(method = "lm",formula = y~exp(-x),size=.5,alpha=.2)+
scale_color_manual(values = cols)+
stat_poly_eq(formula = y ~exp(-x),aes(label = paste(..rr.label.., ..p.value.label..,sep = "~`,`~")),size=2.5)+
labs(y="Relative abundance",x="Bacterial diversity",title = "Total pathogens")

0 comments on commit 370f52a

Please sign in to comment.