https://forgemia.inra.fr/migale/metabarfood/
. The
code used is available in the code chunks.
source("functions.R")
plot_divergence <- function(dataset, divergence_file){
p1<-ggplot(as_tibble(read.table(divergence_file, sep="\t", header=TRUE)) %>% mutate(SampleID=paste0(sample,"-",its)) %>% filter(tool == "DADA2_FROGS") %>% mutate(its=replace(its, its=="D1D2", "D1/D2")) %>% mutate(sample=str_replace(sample,"ADN","DNA")) %>% mutate(its=factor(its, levels=c("ITS1","ITS2","D1/D2","RPB2"))) , aes(x=sample, color=data, y=Divergence, fill=SampleID)) +
geom_boxplot() + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), legend.position = "none",axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
#stat_summary(fun = "median", geom = "point", aes(color = tool)) +
#scale_fill_manual(values=method_palette_wo_real, aesthetics = c("colour", "fill")) +
#stat_compare_means(ref.group = "FROGS", method = "wilcox.test", label="p.signif" ) +
ggtitle("Divergence (%) at species level", subtitle = "") +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) #+
#expand_limits(y = 0)
return(p1)
}
plot_metrics <- function(dataset, divergence_file){
p2<-ggplot(as_tibble(read.table(divergence_file, sep="\t", header=TRUE)) %>% mutate(SampleID=paste0(sample,"-",its)) %>% filter(tool == "DADA2_FROGS") %>% mutate(its=replace(its, its=="D1D2", "D1/D2")) %>% mutate(sample=str_replace(sample,"ADN","DNA")) %>% mutate(data=str_replace(data,"ADN","DNA")), aes(x=sample, color=data,y=False.Negative, fill=SampleID)) + facet_grid(~its)+
geom_boxplot() + guides(fill = FALSE) +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), legend.position = "none") #+
#expand_limits(y = 0)
p3<-ggplot(as_tibble(read.table(divergence_file, sep="\t", header=TRUE)) %>% mutate(SampleID=paste0(sample,"-",its)) %>% filter(tool == "DADA2_FROGS") %>% mutate(its=replace(its, its=="D1D2", "D1/D2")) %>% mutate(sample=str_replace(sample,"ADN","DNA")), aes(x=sample, color=data, y=False.Positive, fill=SampleID)) + facet_grid(~its)+
geom_boxplot() + guides(fill = FALSE) +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), legend.position = "none") #+
#expand_limits(y = 0)
p4<-ggplot(as_tibble(read.table(divergence_file, sep="\t", header=TRUE)) %>% mutate(SampleID=paste0(sample,"-",its)) %>% filter(tool == "DADA2_FROGS") %>% mutate(its=replace(its, its=="D1D2", "D1/D2")) %>% mutate(sample=str_replace(sample,"ADN","DNA")), aes(x=sample, color=data, y=True.Positive, fill=SampleID)) + facet_grid(~its)+
geom_boxplot() + guides(fill = FALSE) +
scale_y_continuous(expand = expansion(0, 0)) +
scale_x_discrete(expand = expansion(0, 0)) +
theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), legend.position = "none") #+
#expand_limits(y = 0)
p <- ggarrange(p2, p3, p4, ncol=3, nrow=1, common.legend = TRUE, legend="right")
p <- annotate_figure(p,
top = text_grob("Metrics at species level")
)
return(p)
}
plot_precision_and_recall <- function(dataset, divergence_file){
infos <- as_tibble(read.table(divergence_file, sep="\t", header=TRUE)) %>% mutate(SampleID=paste0(sample,"-",its)) %>% filter(tool == "DADA2_FROGS") %>% mutate(its=replace(its, its=="D1D2", "D1/D2")) %>% mutate(sample=str_replace(sample,"ADN","DNA")) %>% mutate(data=str_replace(data,"ADN","DNA")) %>% mutate(Precision = (True.Positive/(True.Positive + False.Positive))) %>% mutate(Recall.Rate = (True.Positive/(True.Positive + False.Negative)))
p1<-ggplot(infos, aes(x=sample, color=data, y=Recall.Rate, fill=SampleID)) + facet_grid(~its)+
geom_boxplot() + guides(fill = FALSE) + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), legend.position = "none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) #+
#theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank()) #+
#expand_limits(y = 0)
p2<-ggplot(infos, aes(x=sample, color=data, y=Precision, fill=SampleID)) + facet_grid(~its)+
geom_boxplot() + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), legend.position = "none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))# +
#theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank()) #+
#expand_limits(y = 0)
return(ggarrange(p1, p2, ncol=2, nrow=1, common.legend = TRUE, legend="right"))
}
metabarfood <- TRUE
amplicons <- c("ITS1","ITS2", "D1D2", "RPB2")
types <- c("ADN","PCR")
absent_color <- "#F6CFB8"
absent_color <- "black"
partially_color <- "#b1c5c4"
partially_color <- "#DAF7A6"
perfect_color <- "#cce5ff"
perfect_color <- "#86a5a4"
perfect_color <- "#cce5ff"
#tools <- c("DADA2_FROGS", "FROGS", "USEARCH", "DADA2-pe", "DADA2-se", "QIIME-se", "QIIME-pe")
tools <- c("DADA2_FROGS")
dedicated_palette <- c("#cccc00", "#8eadac", "#000000", "#56B4E9", "#8a6479", "#8a8964", "#ad8e9f", "#adac8e")
method_palette <- setNames(
object = dedicated_palette,
nm = c("EXPECTED", "FROGS", "DADA2_FROGS", "USEARCH", "DADA2-se", "QIIME-se", "DADA2-pe", "QIIME-pe")
)
dedicated_palette_wo_real <- c("#8eadac", "#000000", "#56B4E9", "#8a6479", "#8a8964", "#ad8e9f", "#adac8e")
method_palette_wo_real <- setNames(
object = dedicated_palette_wo_real,
nm = c("FROGS", "DADA2_FROGS", "USEARCH", "DADA2-se", "QIIME-se", "DADA2-pe", "QIIME-pe")
)
This report aims at comparing amplicons (ITS1, ITS2, D1/D2 and RPB2) on different metrics.
if(file.exists("data/all_heatmap.rds")){
all_hmp <- readRDS("data/all_heatmap.rds")
draw(all_hmp, annotation_legend_side = "bottom", row_title = "", width = unit(20, "cm"),
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.2, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent (0)"), legend_gp = gpar(fill=absent_color)),
Legend(at = 2, title = "", labels=c("Partially reconstructed (0.75)"), legend_gp = gpar(fill=partially_color)),
Legend(at = 3, title = "", labels=c("Perfect (1)"), legend_gp = gpar(fill=perfect_color))
)
)
}
Heatmap of expected species for each mock community and barcode marker using the DADA2_FROGS approach. Dark green represents perfectly reconstructed sequences (score of 1), light green partially reconstructed sequences (score of 0.75) and orange species that are missing (0). Density of results are indicated for each line, representing the ability of markers to be efficient.
sp <- "MEAT"
divergence_file <- file.path(sp,"divergence.tsv")
tool <- "DADA2_FROGS"
#cat(" \n###",tool, " \n\n")
htmp_list <- c()
htmp_list_presence <- c()
min <- 0
max <- 0
num <- 0
for(its in amplicons){
for(type in types){
path <- file.path(sp,its,type)
mat_file <- file.path(path,paste0(sp,"_",its,"_",type,"_abundmat.rds"))
mat_presence_file <- file.path(path,paste0(sp,"_",its,"_",type,"_presencemat.rds"))
mat <- readRDS(mat_file)
mat_presence <- readRDS(mat_presence_file)
submat <- mat %>% as_tibble() %>% select(tool) %>% as.matrix()
submat_presence <- mat_presence %>% as_tibble() %>% select(tool) %>% as.matrix()
rownames(submat) <- rownames(mat)
rownames(submat_presence) <- rownames(mat_presence)
submat[submat==0] <- NA
#submat_presence[submat_presence==0] <- NA
max <- max(max,max(submat,na.rm = TRUE))
its_refac <- its
if(its_refac=="D1D2"){
its_refac<-"D1/D2"
}
type_refac <- type
if(type_refac=="ADN"){
type_refac<-"DNA"
}
colnames(submat) <- paste0(its_refac," ",type_refac)
colnames(submat_presence) <- paste0(its_refac," ",type_refac)
ht <- build_heatmap_one_tool(submat, min, max)
ht_presence <- build_heatmap_one_tool_presence(submat_presence, 0, 1)
htmp_list <- c(htmp_list,ht)
htmp_list_presence <- c(htmp_list_presence,ht_presence)
if(num>0){
meat_mp <- cbind(meat_mp,submat_presence)
}else{
meat_mp <- submat_presence
}
num <- num+1
}
}
meat_hmp <- build_heatmap_one_tool_presence_amplicons(t(meat_mp), 0, 1, sp)
ha <- rowAnnotation(bar = anno_density(t(meat_mp),
#type = "violin",
#gp = gpar(fill = "#fa995f"),
border = FALSE,
axis_param = list(gp = gpar(fontsize = 5), at = c(0,0.75, 1), labels = c("0","0.75", "1"))
),
width = unit(1.5, "cm"),
show_annotation_name = FALSE,
border = FALSE)
meat_hmp <- meat_hmp + ha
l <- c()
for(i in htmp_list){
l <- l + i
}
l_presence <- c()
for(i in htmp_list_presence){
l_presence <- l_presence + i
}
cat(" \n### Abundance \n\n")
draw(l, row_title = "Expected species",
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color))
))
cat(" \n")
meat_presence <- l_presence
cat(" \n### Reconstruction \n\n")
draw(l_presence, row_title = "Expected species",
annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent (0)"), legend_gp = gpar(fill=absent_color)),
Legend(at = 2, title = "", labels=c("Partially reconstructed (0.75)"), legend_gp = gpar(fill=partially_color)),
Legend(at = 3, title = "", labels=c("Perfect (1)"), legend_gp = gpar(fill=perfect_color))
)
)
cat(" \n")
cat(" \n###"," Divergence, Precion, Recall rate", " \n")
print(plot_divergence(type, divergence_file) + facet_grid(~its))
cat(" \n")
print(plot_metrics(type, divergence_file))
cat(" \n")
print(plot_precision_and_recall(type, divergence_file))
cat(" \n")
sp <- "CHEESE"
divergence_file <- file.path(sp,"divergence.tsv")
tool <- "DADA2_FROGS"
#cat(" \n###",tool, " \n\n")
htmp_list <- c()
htmp_list_presence <- c()
min <- 0
max <- 0
num <- 0
for(its in amplicons){
for(type in types){
path <- file.path(sp,its,type)
mat_file <- file.path(path,paste0(sp,"_",its,"_",type,"_abundmat.rds"))
mat_presence_file <- file.path(path,paste0(sp,"_",its,"_",type,"_presencemat.rds"))
mat <- readRDS(mat_file)
mat_presence <- readRDS(mat_presence_file)
submat <- mat %>% as_tibble() %>% select(tool) %>% as.matrix()
submat_presence <- mat_presence %>% as_tibble() %>% select(tool) %>% as.matrix()
rownames(submat) <- rownames(mat)
rownames(submat_presence) <- rownames(mat_presence)
submat[submat==0] <- NA
#submat_presence[submat_presence==0] <- NA
max <- max(max,max(submat,na.rm = TRUE))
its_refac <- its
if(its_refac=="D1D2"){
its_refac<-"D1/D2"
}
type_refac <- type
if(type_refac=="ADN"){
type_refac<-"DNA"
}
colnames(submat) <- paste0(its_refac," ",type_refac)
colnames(submat_presence) <- paste0(its_refac," ",type_refac)
ht <- build_heatmap_one_tool(submat, min, max)
ht_presence <- build_heatmap_one_tool_presence(submat_presence, 0, 1)
htmp_list <- c(htmp_list,ht)
htmp_list_presence <- c(htmp_list_presence,ht_presence)
if(num>0){
cheese_mp <- cbind(cheese_mp,submat_presence)
}else{
cheese_mp <- submat_presence
}
num <- num+1
}
}
cheese_hmp <- build_heatmap_one_tool_presence_amplicons(t(cheese_mp), 0, 1, sp)
ha <- rowAnnotation(bar = anno_density(t(cheese_mp),
#type = "violin",
#gp = gpar(fill = "#fa995f"),
border = FALSE,
axis_param = list(gp = gpar(fontsize = 5), at = c(0,0.75, 1), labels = c("0","0.75", "1"))
),
width = unit(1.5, "cm"),
show_annotation_name = FALSE,
border = FALSE)
cheese_hmp <- cheese_hmp + ha
l <- c()
for(i in htmp_list){
l <- l + i
}
l_presence <- c()
for(i in htmp_list_presence){
l_presence <- l_presence + i
}
cat(" \n### Abundance \n\n")
draw(l, row_title = "Expected species",
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color))
))
cat(" \n")
cheese_presence <- l_presence
cat(" \n### Reconstruction \n\n")
draw(l_presence, row_title = "Expected species",
annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent (0)"), legend_gp = gpar(fill=absent_color)),
Legend(at = 2, title = "", labels=c("Partially reconstructed (0.75)"), legend_gp = gpar(fill=partially_color)),
Legend(at = 3, title = "", labels=c("Perfect (1)"), legend_gp = gpar(fill=perfect_color))
)
)
cat(" \n")
cat(" \n###"," Divergence, Precion, Recall rate", " \n")
print(plot_divergence(type, divergence_file) + facet_grid(~its))
cat(" \n")
print(plot_metrics(type, divergence_file))
cat(" \n")
print(plot_precision_and_recall(type, divergence_file))
cat(" \n")
sp <- "BREAD"
divergence_file <- file.path(sp,"divergence.tsv")
tool <- "DADA2_FROGS"
#cat(" \n###",tool, " \n\n")
htmp_list <- c()
htmp_list_presence <- c()
min <- 0
max <- 0
num <- 0
for(its in amplicons){
for(type in types){
path <- file.path(sp,its,type)
mat_file <- file.path(path,paste0(sp,"_",its,"_",type,"_abundmat.rds"))
mat_presence_file <- file.path(path,paste0(sp,"_",its,"_",type,"_presencemat.rds"))
mat <- readRDS(mat_file)
mat_presence <- readRDS(mat_presence_file)
submat <- mat %>% as_tibble() %>% select(tool) %>% as.matrix()
submat_presence <- mat_presence %>% as_tibble() %>% select(tool) %>% as.matrix()
rownames(submat) <- rownames(mat)
rownames(submat_presence) <- rownames(mat_presence)
submat[submat==0] <- NA
#submat_presence[submat_presence==0] <- NA
max <- max(max,max(submat,na.rm = TRUE))
its_refac <- its
if(its_refac=="D1D2"){
its_refac<-"D1/D2"
}
type_refac <- type
if(type_refac=="ADN"){
type_refac<-"DNA"
}
colnames(submat) <- paste0(its_refac," ",type_refac)
colnames(submat_presence) <- paste0(its_refac," ",type_refac)
ht <- build_heatmap_one_tool(submat, min, max)
ht_presence <- build_heatmap_one_tool_presence(submat_presence, 0, 1)
htmp_list <- c(htmp_list,ht)
htmp_list_presence <- c(htmp_list_presence,ht_presence)
if(num>0){
bread_mp <- cbind(bread_mp,submat_presence)
}else{
bread_mp <- submat_presence
}
num <- num+1
}
}
bread_hmp <- build_heatmap_one_tool_presence_amplicons(t(bread_mp), 0, 1, sp)
ha <- rowAnnotation(bar = anno_density(t(bread_mp),
#type = "violin",
#gp = gpar(fill = "#fa995f"),
border = FALSE,
axis_param = list(gp = gpar(fontsize = 5), at = c(0,0.75, 1), labels = c("0","0.75", "1"))
),
width = unit(1.5, "cm"),
show_annotation_name = FALSE,
border = FALSE)
bread_hmp <- bread_hmp + ha
l <- c()
for(i in htmp_list){
l <- l + i
}
l_presence <- c()
for(i in htmp_list_presence){
l_presence <- l_presence + i
}
cat(" \n### Abundance \n\n")
draw(l, row_title = "Expected species",
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color))
))
cat(" \n")
bread_presence <- l_presence
cat(" \n### Reconstruction \n\n")
draw(l_presence, row_title = "Expected species",
annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent (0)"), legend_gp = gpar(fill=absent_color)),
Legend(at = 2, title = "", labels=c("Partially reconstructed (0.75)"), legend_gp = gpar(fill=partially_color)),
Legend(at = 3, title = "", labels=c("Perfect (1)"), legend_gp = gpar(fill=perfect_color))
)
)
cat(" \n")
cat(" \n###"," Divergence, Precion, Recall rate", " \n")
print(plot_divergence(type, divergence_file) + facet_grid(~its))
cat(" \n")
print(plot_metrics(type, divergence_file))
cat(" \n")
print(plot_precision_and_recall(type, divergence_file))
cat(" \n")
sp <- "WINE"
divergence_file <- file.path(sp,"divergence.tsv")
tool <- "DADA2_FROGS"
#cat(" \n###",tool, " \n\n")
htmp_list <- c()
htmp_list_presence <- c()
min <- 0
max <- 0
num <- 0
for(its in amplicons){
for(type in types){
path <- file.path(sp,its,type)
mat_file <- file.path(path,paste0(sp,"_",its,"_",type,"_abundmat.rds"))
mat_presence_file <- file.path(path,paste0(sp,"_",its,"_",type,"_presencemat.rds"))
mat <- readRDS(mat_file)
mat_presence <- readRDS(mat_presence_file)
submat <- mat %>% as_tibble() %>% select(tool) %>% as.matrix()
submat_presence <- mat_presence %>% as_tibble() %>% select(tool) %>% as.matrix()
rownames(submat) <- rownames(mat)
rownames(submat_presence) <- rownames(mat_presence)
submat[submat==0] <- NA
#submat_presence[submat_presence==0] <- NA
max <- max(max,max(submat,na.rm = TRUE))
its_refac <- its
if(its_refac=="D1D2"){
its_refac<-"D1/D2"
}
type_refac <- type
if(type_refac=="ADN"){
type_refac<-"DNA"
}
colnames(submat) <- paste0(its_refac," ",type_refac)
colnames(submat_presence) <- paste0(its_refac," ",type_refac)
ht <- build_heatmap_one_tool(submat, min, max)
ht_presence <- build_heatmap_one_tool_presence(submat_presence, 0, 1)
htmp_list <- c(htmp_list,ht)
htmp_list_presence <- c(htmp_list_presence,ht_presence)
if(num>0){
wine_mp <- cbind(wine_mp,submat_presence)
}else{
wine_mp <- submat_presence
}
num <- num+1
}
}
wine_hmp <- build_heatmap_one_tool_presence_amplicons(t(wine_mp), 0, 1, sp)
ha <- rowAnnotation(bar = anno_density(t(wine_mp),
#type = "violin",
#gp = gpar(fill = "#fa995f"),
border = FALSE,
axis_param = list(gp = gpar(fontsize = 5), at = c(0,0.75, 1), labels = c("0","0.75", "1"))
),
width = unit(1.5, "cm"),
show_annotation_name = FALSE,
border = FALSE)
wine_hmp <- wine_hmp + ha
l <- c()
for(i in htmp_list){
l <- l + i
}
l_presence <- c()
for(i in htmp_list_presence){
l_presence <- l_presence + i
}
cat(" \n### Abundance \n\n")
draw(l, row_title = "Expected species",
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.1, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent"), legend_gp = gpar(fill=absent_color))
))
cat(" \n")
wine_presence <- l_presence
cat(" \n### Reconstruction \n\n")
draw(l_presence, row_title = "Expected species",
annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent (0)"), legend_gp = gpar(fill=absent_color)),
Legend(at = 2, title = "", labels=c("Partially reconstructed (0.75)"), legend_gp = gpar(fill=partially_color)),
Legend(at = 3, title = "", labels=c("Perfect (1)"), legend_gp = gpar(fill=perfect_color))
)
)
cat(" \n")
cat(" \n###"," Divergence, Precion, Recall rate", " \n")
print(plot_divergence(type, divergence_file) + facet_grid(~its))
cat(" \n")
print(plot_metrics(type, divergence_file))
cat(" \n")
print(plot_precision_and_recall(type, divergence_file))
cat(" \n")
if(!file.exists("data/all_heatmap.rds")){
all_hmp <- meat_hmp + bread_hmp + cheese_hmp + wine_hmp
draw(all_hmp, row_title = "", width = unit(20, "cm"),
column_title_gp = gpar(fontsize = 16), ht_gap = unit(0.2, "cm"), annotation_legend_list = list(
Legend(at = 1, title = "", labels=c("Absent (0)"), legend_gp = gpar(fill=absent_color)),
Legend(at = 2, title = "", labels=c("Partially reconstructed (0.75)"), legend_gp = gpar(fill=partially_color)),
Legend(at = 3, title = "", labels=c("Perfect (1)"), legend_gp = gpar(fill=perfect_color))
)
)
saveRDS(all_hmp,"data/all_heatmap.rds")
}
divergence_file <- file.path("data/divergence_all_ecos.tsv")
divergence_file
a <- as_tibble(read.table(divergence_file, sep="\t", header=TRUE)) %>% mutate(SampleID=paste0(sample,"-",its)) %>% filter(tool == "DADA2_FROGS") %>% mutate(its=replace(its, its=="D1D2", "D1/D2")) %>% mutate(sample=str_replace(sample,"ADN","DNA"), SampleID=str_replace(SampleID,"ADN","DNA"), data=str_replace(data,"ADN","DNA"), SampleID=str_replace(SampleID,"D1D2","D1/D2")) %>% mutate(its=factor(its, levels=c("ITS1","ITS2","D1/D2","RPB2"))) %>% mutate(SAMPLEID = paste0(Ecosystem,"-",data))
p <- ggplot(a, aes(x=SAMPLEID, y=Divergence)) +
geom_boxplot() + facet_grid(Ecosystem + its ~ data,scales="free_x", margin=TRUE)
p
ggplot(a, aes(x=SampleID, color=data, y=Divergence)) +
geom_boxplot() +
stat_summary(fun = "median", geom = "point", aes(color = data)) + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), legend.position = "right")+ facet_grid(~Ecosystem+its, scales = "free_x") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank())
create_frogs_object <- function(path, marker, type, file, multifile){
biom <- file.path(path,marker,type,file)
physeq <- import_frogs(biom, taxMethod = "blast")
metadata_file <- file.path(path,marker,type,"metadata.tsv")
metadata <- read.table(metadata_file, row.names=1, header=TRUE, sep="\t", stringsAsFactors = FALSE)
sample_data(physeq) <- metadata
multiaff_file <- file.path(path,marker,type,multifile)
physeq <- update_frogs_physeq_with_multi(physeq, multiaff_file)
physeq = transform_sample_counts(physeq, function(x) x / sum(x) )
sample_names(physeq) <- glue::glue(paste("{sample_names(physeq)}","frogs",marker, sep="_"))
sample_data(physeq)$method <- "DADA2_FROGS"
sample_data(physeq)$Type <- type
sample_data(physeq)$Marker <- marker
physeq_aglom <- phyloseq.extended::fast_tax_glom(physeq, taxrank = "Species")
#physeq_aglom <- prune_taxa(tax_table(physeq_aglom)[,7] %in% references, physeq_aglom)
taxa_names(physeq_aglom) <- cbind(tax_table(physeq_aglom)[,7])
return(physeq_aglom)
}
update_frogs_physeq_with_multi <- function(physeq, multiaff_file){
infos <- readr::read_tsv(file = multiaff_file) %>%
as_tibble() %>%
distinct(`#observation_name`, .keep_all = T) %>%
select(`#observation_name`, blast_taxonomy) %>%
separate(blast_taxonomy, ";(?=[\\S])", into = c("Kingdom","Phylum","Class","Order","Family","Genus","Species")) %>%
#mutate(across(Kingdom:Species, ~ stringr::str_remove(.x, ".:"))) %>%
tibble::column_to_rownames(var = "#observation_name") %>%
as("matrix")
good <- tax_table(subset_taxa(physeq, Species != "Multi-affiliation" & Genus != "Multi-affiliation" & Family != "Multi-affiliation" & Order != "Multi-affiliation" & Class != "Multi-affiliation" & Phylum != "Multi-affiliation" & Kingdom != "Multi-affiliation" )) %>% as("matrix")
#na <- tax_table(subset_taxa(physeq, is.na(Species))) %>% as("matrix")
all_infos <- unique(rbind(infos,good))#,na) # case where two species are identical but are multi-affiliated
tax_table(physeq) <- all_infos
return(physeq)
}
keep_only_interest_species <- function(physeq, true_species){
tax_tab <- tax_table(physeq) %>% as("matrix")
tax_tab <- cbind(tax_tab[ , 1:7],
Status = ifelse(tax_tab[ , "Species"] %in% true_species, "true species", "artefact"))
tax_table(physeq) <- tax_tab
return(physeq)
}
path <- "MEAT/ITS1/ADN"
metadata_file <- file.path(path,"metadata.tsv")
path <- "MEAT"
metadata <- read.table(metadata_file, row.names=1, header=TRUE, sep="\t", stringsAsFactors = FALSE)
real_biom <- file.path("MEAT/ITS1/ADN/real.biom")
real <- create_phyloseq_object(real_biom, "real", metadata, "blast")
taxa_names(real) <- cbind(tax_table(real)[,7])
true_species <- as.character(tax_table(real)[ , "Species"])
tax_tab <- tax_table(real) %>% as("matrix")
tax_tab <- cbind(tax_tab[ , 1:7],
Status = ifelse(tax_tab[ , "Species"] %in% true_species, "true species", "artefact"))
tax_table(real) <- tax_tab
biomfile <- "dada2_frogs.biom"
multiafffile <- "dada2_frogs_multiaff.tsv"
frogs_its1_adn <- create_frogs_object(path,"ITS1", "ADN", biomfile, multiafffile)
frogs_its1_adn <- keep_only_interest_species(frogs_its1_adn, true_species)
frogs_its1_pcr <- create_frogs_object(path,"ITS1", "PCR", biomfile, multiafffile)
frogs_its1_pcr <- keep_only_interest_species(frogs_its1_pcr, true_species)
frogs_its2_adn <- create_frogs_object(path,"ITS2", "ADN", biomfile, multiafffile)
frogs_its2_adn <- keep_only_interest_species(frogs_its2_adn, true_species)
frogs_its2_pcr <- create_frogs_object(path,"ITS2", "PCR", biomfile, multiafffile)
frogs_its2_pcr <- keep_only_interest_species(frogs_its2_pcr, true_species)
frogs_d1d2_adn <- create_frogs_object(path,"D1D2", "ADN", biomfile, multiafffile)
frogs_d1d2_adn <- keep_only_interest_species(frogs_d1d2_adn, true_species)
frogs_d1d2_pcr <- create_frogs_object(path,"D1D2", "PCR", biomfile, multiafffile)
frogs_d1d2_pcr <- keep_only_interest_species(frogs_d1d2_pcr, true_species)
frogs_rpb2_adn <- create_frogs_object(path,"RPB2", "ADN", biomfile, multiafffile)
frogs_rpb2_adn <- keep_only_interest_species(frogs_rpb2_adn, true_species)
frogs_rpb2_pcr <- create_frogs_object(path,"RPB2", "PCR", biomfile, multiafffile)
frogs_rpb2_pcr <- keep_only_interest_species(frogs_rpb2_pcr, true_species)
sample_data(frogs_its1_adn)$Type <- "DNA"
sample_data(frogs_its2_adn)$Type <- "DNA"
sample_data(frogs_d1d2_adn)$Type <- "DNA"
sample_data(frogs_rpb2_adn)$Type <- "DNA"
all <- merge_phyloseq(real, frogs_its1_adn, frogs_its1_pcr, frogs_its2_adn, frogs_its2_pcr, frogs_d1d2_adn, frogs_d1d2_pcr, frogs_rpb2_adn, frogs_rpb2_pcr)
sample_data(all)$Type <- factor(sample_data(all)$Type, levels=c("DNA","PCR"))
sample_data(all)$Marker <- factor(sample_data(all)$Marker, levels=c("ITS1","ITS2","D1D2","RPB2"))
Method <- get_variable(all, "method")
p <- plot_heatmap(
all,
"NMDS", "bray", "Type", taxa.label = "Species",
low="#66CCFF", high="#000033", na.value="white", title = paste0("Relative abundances with ",tool), sample.order = row.names(sample_data(all)), taxa.order = sort(true_species, decreasing=TRUE))
p <- p + facet_grid(~Marker, scales = "free") + guides(fill=guide_legend(title="White: 0"))
cat("\n")
print(p)
cat("\n")