https://forgemia.inra.fr/migale/metabarfood/
. The
code used is available in the code chunks.
This report contains complementary analyses performed for the METABARFOOD project.
We want to check if there is a correlation between abundance and amplicon length. We look at DNA and PCR samples separately, by plotting a linear regression.
amplicons <- c("ITS1","ITS2", "D1D2", "RPB2")
types <- c("ADN","PCR")
for(marker in c("ITS1","ITS2")){
cat(" \n##",marker, " \n\n")
for(type in types){
if(type=="ADN"){
cat(" \nDNA", " \n\n")
}else{
cat(" \n",type, " \n\n")
}
eco <- "MEAT"
if(!file.exists(paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))){
biom <- paste(eco,marker,type,"dada2_frogs.biom",sep = "/")
physeq <- import_frogs(biom, taxMethod = "blast")
saveRDS(physeq,paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))
}else{
physeq <- readRDS(paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))
}
physeqr = transform_sample_counts(physeq, function(x) x / sum(x) )
physeqfr = filter_taxa(physeqr, function(x) sum(x) > .00005, TRUE)
tib <- psmelt(physeqfr) %>% as_tibble() %>% mutate(Length = nchar(OTU))
p1 <- ggplotRegression(lm(Abundance ~ Length, data = tib), eco, marker, type)
eco <- "CHEESE"
if(!file.exists(paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))){
biom <- paste(eco,marker,type,"dada2_frogs.biom",sep = "/")
physeq <- import_frogs(biom, taxMethod = "blast")
saveRDS(physeq,paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))
}else{
physeq <- readRDS(paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))
}
physeqr = transform_sample_counts(physeq, function(x) x / sum(x) )
physeqfr = filter_taxa(physeqr, function(x) sum(x) > .00005, TRUE)
tib <- psmelt(physeqfr) %>% as_tibble() %>% mutate(Length = nchar(OTU))
p2 <- ggplotRegression(lm(Abundance ~ Length, data = tib), eco, marker, type)
eco <- "BREAD"
if(!file.exists(paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))){
biom <- paste(eco,marker,type,"dada2_frogs.biom",sep = "/")
physeq <- import_frogs(biom, taxMethod = "blast")
saveRDS(physeq,paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))
}else{
physeq <- readRDS(paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))
}
physeqr = transform_sample_counts(physeq, function(x) x / sum(x) )
physeqfr = filter_taxa(physeqr, function(x) sum(x) > .00005, TRUE)
tib <- psmelt(physeqfr) %>% as_tibble() %>% mutate(Length = nchar(OTU))
p3 <- ggplotRegression(lm(Abundance ~ Length, data = tib), eco, marker, type)
eco <- "WINE"
if(!file.exists(paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))){
biom <- paste(eco,marker,type,"dada2_frogs.biom",sep = "/")
physeq <- import_frogs(biom, taxMethod = "blast")
saveRDS(physeq,paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))
}else{
physeq <- readRDS(paste(eco,marker,type,"dada2_frogs_physeq.rds",sep = "/"))
}
physeqr = transform_sample_counts(physeq, function(x) x / sum(x) )
physeqfr = filter_taxa(physeqr, function(x) sum(x) > .00005, TRUE)
tib <- psmelt(physeqfr) %>% as_tibble() %>% mutate(Length = nchar(OTU))
p4 <- ggplotRegression(lm(Abundance ~ Length, data = tib), eco, marker, type)
print(ggarrange(p1, p2, p3, p4, ncol=2, nrow=2, common.legend = TRUE, legend="right"))
cat("\n\n")
}
}
DNA
PCR
DNA
PCR
data <- readRDS("data/all_samples.rds")
data
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2566 taxa and 119 samples ]
## sample_data() Sample Data: [ 119 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 2566 taxa by 7 taxonomic ranks ]
sample_variables(data)
## [1] "Eco" "SampleID" "Marker" "Type"
nsamples(data)
## [1] 119
For each marker, only samples having >1000 reads were kept for the analysis.
data_ITS1 <- subset_samples(data, Marker == "ITS1")
data_ITS1
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2566 taxa and 35 samples ]
## sample_data() Sample Data: [ 35 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 2566 taxa by 7 taxonomic ranks ]
min(sample_sums(data_ITS1))
## [1] 266
data_ITS1_rare <- rarefy_even_depth(data_ITS1, sample.size = min(sample_sums(data_ITS1)), rngseed = 1234)
data_ITS1_rare
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 460 taxa and 35 samples ]
## sample_data() Sample Data: [ 35 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 460 taxa by 7 taxonomic ranks ]
The final object ITS1 contains 34 samples.
data_ITS2 <- subset_samples(data, Marker == "ITS2")
data_ITS2
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2566 taxa and 35 samples ]
## sample_data() Sample Data: [ 35 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 2566 taxa by 7 taxonomic ranks ]
min(sample_sums(data_ITS2))
## [1] 153
data_ITS2_rare <- rarefy_even_depth(data_ITS2, sample.size = min(sample_sums(data_ITS2)), rngseed = 1234)
data_ITS2_rare
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 322 taxa and 35 samples ]
## sample_data() Sample Data: [ 35 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 322 taxa by 7 taxonomic ranks ]
The final object ITS1 contains 33 samples.
data_D1D2 <- subset_samples(data, Marker == "D1D2")
data_D1D2
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2566 taxa and 29 samples ]
## sample_data() Sample Data: [ 29 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 2566 taxa by 7 taxonomic ranks ]
data_D1D2 <- subset_samples(data_D1D2, sample_names(data_D1D2) != "F4SR2_MEAT_D1D2")
min(sample_sums(data_D1D2))
## [1] 1085
data_D1D2_rare <- rarefy_even_depth(data_D1D2, sample.size = min(sample_sums(data_D1D2)), rngseed = 1234)
data_D1D2_rare
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 664 taxa and 28 samples ]
## sample_data() Sample Data: [ 28 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 664 taxa by 7 taxonomic ranks ]
The final object D1D2 contains only 21 samples.
data_RPB2 <- subset_samples(data, Marker == "RPB2")
data_RPB2
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2566 taxa and 20 samples ]
## sample_data() Sample Data: [ 20 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 2566 taxa by 7 taxonomic ranks ]
min(sample_sums(data_RPB2))
## [1] 14
data_RPB2_rare <- rarefy_even_depth(data_RPB2, sample.size = min(sample_sums(data_RPB2)), rngseed = 1234)
data_RPB2_rare
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 24 taxa and 20 samples ]
## sample_data() Sample Data: [ 20 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 24 taxa by 7 taxonomic ranks ]
For RPB2, only CHEESE samples and one sample from WINE have enough sequencing depth. So we stopped analyzing this marker.
dist_bc_ITS1 <- distance(data_ITS1_rare, method = "bray")
PCoA_bc_ITS1 <- ordinate(data_ITS1_rare, method = "PCoA", distance = "bray")
pITS1 <- plot_ordination(data_ITS1_rare,ordination = PCoA_bc_ITS1, color = "Eco")
pITS1 + ggtitle("ITS1")
adonis2(formula = dist_bc_ITS1~Eco, data = data.frame(sample_data(data_ITS1_rare)))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_bc_ITS1 ~ Eco, data = data.frame(sample_data(data_ITS1_rare)))
## Df SumOfSqs R2 F Pr(>F)
## Eco 3 8.3985 0.57276 13.853 0.001 ***
## Residual 31 6.2648 0.42724
## Total 34 14.6634 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dist_bc_ITS2 <- distance(data_ITS2_rare, method = "bray")
PCoA_bc_ITS2 <- ordinate(data_ITS2_rare, method = "PCoA", distance = "bray")
pITS2 <- plot_ordination(data_ITS2_rare,ordination = PCoA_bc_ITS2, color = "Eco")
pITS2 + ggtitle("ITS2")
adonis2(formula = dist_bc_ITS2~Eco, data = data.frame(sample_data(data_ITS2_rare)))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_bc_ITS2 ~ Eco, data = data.frame(sample_data(data_ITS2_rare)))
## Df SumOfSqs R2 F Pr(>F)
## Eco 3 8.6562 0.59332 15.075 0.001 ***
## Residual 31 5.9333 0.40668
## Total 34 14.5895 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dist_bc_D1D2 <- distance(data_D1D2_rare, method = "bray")
PCoA_bc_D1D2 <- ordinate(data_D1D2_rare, method = "PCoA", distance = "bray")
pD1D2 <- plot_ordination(data_D1D2_rare,ordination = PCoA_bc_D1D2, color = "Eco")
pD1D2 + ggtitle("D1/D2")
adonis2(formula = dist_bc_D1D2~Eco, data = data.frame(sample_data(data_D1D2_rare)))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_bc_D1D2 ~ Eco, data = data.frame(sample_data(data_D1D2_rare)))
## Df SumOfSqs R2 F Pr(>F)
## Eco 3 7.3066 0.65087 14.914 0.001 ***
## Residual 24 3.9194 0.34913
## Total 27 11.2260 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Conclusion : in terms of capacity to discriminate samples based on their ecosystem of origin we can rank the markers as follow: D1/D2>ITS2>ITS1. However, given the low number of samples with exploitable data obtained with D1/D2 on real samples (21 out of 36), we should not recommend this marker. ITS1 and ITS2 exhibited similar results with a little advantage for ITS2.