0 25 50 75 100
Sample_5Sample_9Sample_15Sample_21Sample_33Sample_45Sample_71Sample_91Sample_95Sample_97
Relative abundance (%)
Cluster 1
0 25 50 75 100
Sample_17Sample_25Sample_29Sample_43Sample_47Sample_57Sample_59Sample_63Sample_79Sample_11
Relative abundance (%)
Cluster 2
0 25 50 75 100
Sample_83Sample_13Sample_23Sample_37Sample_41Sample_53Sample_65Sample_99
Relative abundance (%)
Cluster 3
0 25 50 75 100
Sample_35Sample_51Sample_55Sample_67Sample_73Sample_75
Relative abundance (%)
Cluster 4
0 25 50 75 100
Sample_31Sample_49Sample_61Sample_69Sample_81Sample_87Sample_89
Relative abundance (%)
Cluster 5
Genus
Staphylococcus Alloiococcus Turicella
Propionibacterium Corynebacterium Streptococcus
Enhydrobacter Kocuria
Chryseobacterium Snodgrassella other genera unclassified genus
# Export
ggsave(grid.arrange(ototype_plot_gen),
filename = "Outputs/fig1_clustering_and_genus_relabunds.pdf", device = cairo_pdf, width = 180, height = 180, units = "mm")
This matches what was seen in the table of dominant taxa per cluster: Cluster 1 has mainlyStaphylococcus, Cluster 2 is Alloiococcus-dominated, Cluster 3 is Propionibacterium-dominated, Cluster 4 is Turicella- dominated, and Cluster 5 has bothStaphylococcus andTuricella.
Alpha diversity
Plot the alpha diversity values in all right ear samples to see how they are distributed:
# Make a data frame for the comparisons right_adiv <- data.frame(earclin_right,
estimate_richness(ear_ASV_right_R,measures = c("Observed", "InvSimpson", "Shannon")))
# Plot
grid.arrange(
ggplot(right_adiv, aes(x = Sample, y = Observed, label = Sample)) + geom_text(size = 2) +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(color = "black"), axis.text.x = element_blank(),
axis.ticks.x = element_blank()),
ggplot(right_adiv, aes(x = Sample, y = Shannon, label = Sample)) + geom_text(size = 2) +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(color = "black"), axis.text.x = element_blank(),
axis.ticks.x = element_blank()),
ggplot(right_adiv, aes(x = Sample, y = InvSimpson, label = Sample)) + geom_text(size = 2) +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(color = "black"), axis.text.x = element_blank(),
axis.ticks.x = element_blank()), nrow = 1)
Sample_5
Sample_9
Sample_11 Sample_13
Sample_15 Sample_17
Sample_21Sample_23
Sample_25Sample_29Sample_31 Sample_33
Sample_35 Sample_37
Sample_41
Sample_43 Sample_45
Sample_47 Sample_49
Sample_51 Sample_53
Sample_55 Sample_57
Sample_59 Sample_61
Sample_63 Sample_65
Sample_67
Sample_69
Sample_71 Sample_73
Sample_75 Sample_79Sample_81
Sample_83 Sample_87
Sample_89 Sample_91Sample_95Sample_97
Sample_99
50 100 150 200
Sample
Observed
Sample_5
Sample_9 Sample_11
Sample_13Sample_15 Sample_17Sample_21
Sample_23 Sample_25
Sample_29 Sample_31Sample_33
Sample_35 Sample_37
Sample_41 Sample_43
Sample_45
Sample_47 Sample_49
Sample_51 Sample_53
Sample_55 Sample_57
Sample_59Sample_61 Sample_63
Sample_65 Sample_67Sample_69
Sample_71 Sample_73
Sample_75 Sample_79
Sample_81 Sample_83
Sample_87 Sample_89
Sample_91
Sample_95 Sample_97
Sample_99
0 1 2 3
Sample
Shannon Sample_5
Sample_9 Sample_11
Sample_13 Sample_15Sample_17Sample_21Sample_23
Sample_25 Sample_29Sample_31
Sample_33 Sample_35
Sample_37Sample_41 Sample_43Sample_45
Sample_47 Sample_49
Sample_51 Sample_53
Sample_55
Sample_57 Sample_59Sample_61
Sample_63 Sample_65
Sample_67 Sample_69
Sample_71 Sample_73
Sample_75 Sample_79
Sample_81 Sample_83
Sample_87
Sample_89
Sample_91 Sample_95Sample_97
Sample_99
2 4 6
Sample
InvSimpson
One sample (53) seems like an outlier, with particularly high values, especially when looking at the evenness- containing indices (Shannon and inverse Simpson). Drop this sample from comparisons and redraw the plot:
# Trim outliers based on Shannon
right_adiv <- subset(right_adiv, Shannon < 3)
# Plot again grid.arrange(
ggplot(right_adiv, aes(x = Sample, y = Observed, label = Sample)) +
geom_text(size = 2) + theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(color = "black"), axis.text.x = element_blank(),
axis.ticks.x = element_blank()),
ggplot(right_adiv, aes(x = Sample, y = Shannon, label = Sample)) + geom_text(size = 2) +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(color = "black"), axis.text.x = element_blank(),
axis.ticks.x = element_blank()),
ggplot(right_adiv, aes(x = Sample, y = InvSimpson, label = Sample)) + geom_text(size = 2) +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(color = "black"), axis.text.x = element_blank(),
axis.ticks.x = element_blank()), nrow = 1)
Sample_5
Sample_9
Sample_11 Sample_13
Sample_15 Sample_17
Sample_21Sample_23
Sample_25Sample_29Sample_31 Sample_33
Sample_35 Sample_37
Sample_41
Sample_43 Sample_45
Sample_47 Sample_49
Sample_51
Sample_55 Sample_57
Sample_59
Sample_61 Sample_63
Sample_65 Sample_67
Sample_69
Sample_71 Sample_73
Sample_75
Sample_79Sample_81 Sample_83
Sample_87 Sample_89
Sample_91Sample_95Sample_97 Sample_99
50 100 150 200
Sample
Observed Sample_5
Sample_9 Sample_11
Sample_13Sample_15 Sample_17
Sample_21 Sample_23
Sample_25
Sample_29 Sample_31
Sample_33 Sample_35
Sample_37 Sample_41
Sample_43 Sample_45
Sample_47 Sample_49
Sample_51 Sample_55
Sample_57 Sample_59Sample_61
Sample_63
Sample_65 Sample_67Sample_69
Sample_71 Sample_73
Sample_75 Sample_79
Sample_81
Sample_83 Sample_87
Sample_89
Sample_91
Sample_95 Sample_97
Sample_99
0.0 0.5 1.0 1.5 2.0
Sample
Shannon
Sample_5
Sample_9 Sample_11
Sample_13 Sample_15Sample_17Sample_21Sample_23
Sample_25 Sample_29Sample_31
Sample_33 Sample_35
Sample_37Sample_41
Sample_43Sample_45
Sample_47 Sample_49
Sample_51 Sample_55
Sample_57 Sample_59Sample_61
Sample_63 Sample_65
Sample_67 Sample_69
Sample_71 Sample_73
Sample_75 Sample_79
Sample_81 Sample_83
Sample_87
Sample_89
Sample_91 Sample_95Sample_97
Sample_99
1 2 3 4 5 6
Sample
InvSimpson
Test for statistically significant differences in alpha diversity for variables of interest in the remaining samples:
# Run comparisons for the variables of interest (minus age, which is continuous) varlist_binary <- varlist[-grep("Age", varlist)]
adiv_res <- suppressWarnings(data.frame(
p_Observed = sapply(varlist_binary, function(x) round(wilcox.test(data = right_adiv,
as.formula(paste("Observed", "~", x)))$p.value, digits = 3)), p_Shannon = sapply(varlist_binary, function(x)
round(wilcox.test(data = right_adiv,
as.formula(paste("Shannon", "~", x)))$p.value, digits = 3)), p_InvSimpson = sapply(varlist_binary, function(x)
round(wilcox.test(data = right_adiv,
as.formula(paste("InvSimpson", "~", x)))$p.value, digits = 3)))) customKable(adiv_res, d = 3)
p_Observed p_Shannon p_InvSimpson
Sampling_season 0.660 0.210 0.457
Sex 0.910 0.408 0.424
Allergy 0.498 0.630 0.872
Dominant_side 0.785 0.791 0.526
Ear_cleaning_swab 0.463 0.229 0.262
Recent_common_cold 0.617 0.703 0.682
Staff_student 0.671 0.976 0.591
No significant differences in alpha diversity for any of the variables (when comparing right ears only).
Age is a continuous variable, so it needs to be tested separately. Run a significance test with the Pearson correlation and plot:
sapply(c("Observed", "Shannon", "InvSimpson"),
function(x) cor.test(right_adiv$Age, right_adiv[[x]])[c("p.value", "estimate")])
## Observed Shannon InvSimpson
## p.value 0.3985849 0.6929909 0.3287173
## estimate -0.1371896 -0.06440054 -0.1584764 grid.arrange(
ggplot(right_adiv, aes(x=Age, y=Observed)) + geom_point() +
theme_bw() +
theme(panel.grid = element_blank()), ggplot(right_adiv, aes(x=Age, y=Shannon)) +
geom_point() + theme_bw() +
theme(panel.grid = element_blank()),
ggplot(right_adiv, aes(x=Age, y=InvSimpson)) + geom_point() +
theme_bw() +
theme(panel.grid = element_blank()), nrow=1)
50 100 150 200
20 30 40 50 60
Age
Observed
0.0 0.5 1.0 1.5 2.0
20 30 40 50 60
Age
Shannon
1 2 3 4 5 6
20 30 40 50 60
Age
InvSimpson
Age does not seem to be significantly related to alpha diversity in ears.
Beta diversity
First, test each variable of interest individually with adonis to look for significant beta diversity differences.
# Metadata
ear_meta_bdivs <- as(sample_data(ear_ASV_right_R), "data.frame")
# Check for missing data
colSums(is.na(ear_meta_bdivs[,varlist]))
## Sampling_season Sex Age
## 0 0 0
## Allergy Dominant_side Ear_cleaning_swab
## 0 1 0
## Recent_common_cold Staff_student
## 1 0
# One subject has missing data for the common cold variable and another for the
# handedness/dominant side; exclude them from comparisons
ear_meta_bdivs <- subset(ear_meta_bdivs, !is.na(Recent_common_cold) &
!is.na(Dominant_side))
# Run ordination
ear_ASV_ord <- ordinate(subset_samples(ear_ASV_right_R,
!is.na(Recent_common_cold) &
!is.na(Dominant_side)),
"NMDS", "bray", try = 999, trace = FALSE)
# Make data frames out of these (for plotting)
ear_ASV_ord_df <- cbind(as.data.frame(ear_ASV_ord$points), ear_meta_bdivs)
# Calculate distance matrix ear_dist <- vegdist(as(otu_table(
subset_samples(ear_ASV_right_R, !is.na(Recent_common_cold) &
!is.na(Dominant_side))), "matrix"), method = "bray")
# Run adonis for each variable
ear_bdiv_res <- data.frame(Variable = varlist, pVal = sapply(varlist,
function(x) adonis(as.formula(paste("ear_dist ~", x)), ear_meta_bdivs,
perm = 9999)$aov.tab$'Pr(>F)'[1])) ear_bdiv_res <- ear_bdiv_res[order(ear_bdiv_res$pVal), ]
rownames(ear_bdiv_res) <- NULL Show results in a table:
customKable(ear_bdiv_res, d = 3, cn = c("Variable", "p-value"))
Variable p-value
Ear_cleaning_swab 0.016
Sex 0.069
Dominant_side 0.132
Recent_common_cold 0.165
Staff_student 0.272
Age 0.330
Allergy 0.785
Sampling_season 0.876
The variables with the lowestp-values for beta diversity differences are ear cleaning and sex. Plots for these variables:
# Ordination plotting function
betadivplot <- function(df, var, cols){
ggplot(df, aes(x=MDS1, y=MDS2, color=df[,var], shape=df[,var])) + theme_bw() +
geom_point(size = 2) + coord_fixed() +
stat_ellipse(level = 0.9) +
scale_color_manual(values = cols, name = gsub("_", " ", var)) +
scale_shape_manual(values = c(15, 16, 17, 18), name = gsub("_", " ", var)) + ggtitle(gsub("_", " ", var)) +
theme(panel.grid = element_blank(), legend.position = "bottom", legend.title = element_blank(),
legend.margin = margin(t = -3, l = -5, unit = "mm")) }
grid.arrange(betadivplot(ear_ASV_ord_df, "Ear_cleaning_swab", c("gray30", "aquamarine3")), betadivplot(ear_ASV_ord_df, "Sex", c("orchid", "darkblue")),
nrow = 1)
−0.4
−0.2 0.0 0.2 0.4
−0.6 −0.3 0.0 0.3
MDS1
MDS2
0 1
Ear cleaning swab
−0.4
−0.2 0.0 0.2 0.4
−0.6 −0.3 0.0 0.3
MDS1
MDS2
female male
Sex
Finally, a combined adonis2 model with the variables that seem the most important (and season/batch, since it is potentially a major confounder):
adonis2(ear_dist ~ .,
data = ear_meta_bdivs[, c("Sampling_season", "Sex", "Ear_cleaning_swab")], perm = 9999, by = "margin")
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 9999
#### adonis2(formula = ear_dist ~ ., data = ear_meta_bdivs[, c("Sampling_season", "Sex", "Ear_cleaning_swab")], permutations = 9999, by = "margin")
## Df SumOfSqs R2 F Pr(>F)
## Sampling_season 1 0.1450 0.01316 0.5208 0.8107
## Sex 1 0.3653 0.03315 1.3116 0.2392
## Ear_cleaning_swab 1 0.5718 0.05189 2.0532 0.0599 .
## Residual 35 9.7477 0.88453
## Total 38 11.0203 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The swab cleaning variable is close to significant in the combined model, which is corrected for sex. Unfortu- nately, this model is not very good, since most males did not report cleaning their ears. As an alternative classification, split samples into three groups: females who swab their ears, non-swabbing females, and non-swabbing males, excluding the three males who do swab their ears. Run adonis again with this variable.
# Alternate grouping variable: swabbing females / non-swabbing females / non-swabbing males sample_data(ear_ASV_right_R)$Swab_3 <- factor(paste(
sample_data(ear_ASV_right_R)$Sex,
sample_data(ear_ASV_right_R)$Ear_cleaning_swab, sep = "_"))
sample_data(ear_gen_right_R)$Swab_3 <- factor(paste( sample_data(ear_gen_right_R)$Sex,
sample_data(ear_gen_right_R)$Ear_cleaning_swab, sep = "_"))
# Recalculate distance matrix (dropping the "male_1" category):
ear_dist2 <- vegdist(as(
otu_table(subset_samples(ear_ASV_right_R, !is.na(Recent_common_cold)
& Swab_3 != "male_1")), "matrix"), method = "bray")
# Retrim metadata
ear_meta_bdivs2 <- as(sample_data(ear_ASV_right_R), "data.frame") ear_meta_bdivs2 <- subset(ear_meta_bdivs2,
!is.na(Recent_common_cold) & Swab_3 != "male_1") Test only the 3-group variable:
swab3_adonis <- adonis(ear_dist2 ~ Swab_3, data = ear_meta_bdivs2, perm = 9999, by = "margin") swab3_adonis
#### Call:
## adonis(formula = ear_dist2 ~ Swab_3, data = ear_meta_bdivs2, permutations = 9999, by = "margin")
#### Permutation: free
## Number of permutations: 9999
#### Terms added sequentially (first to last)
#### Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Swab_3 2 1.1214 0.56068 2.0237 0.10638 0.0274 *
## Residuals 34 9.4197 0.27705 0.89362
## Total 36 10.5411 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Test the 3-group variable corrected for season/batch:
adonis2(ear_dist2 ~ Sampling_season + Swab_3, data = ear_meta_bdivs2, perm = 9999, by = "margin")
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 9999
#### adonis2(formula = ear_dist2 ~ Sampling_season + Swab_3, data = ear_meta_bdivs2, permutations = 9999, by = "margin")
## Df SumOfSqs R2 F Pr(>F)
## Sampling_season 1 0.1602 0.01520 0.5709 0.7612
## Swab_3 2 1.1474 0.10885 2.0447 0.0238 *
## Residual 33 9.2595 0.87842
## Total 36 10.5411 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
This new three-group variable is significant both on its own and when corrected for sampling batch. Add the uncorrected value to the earlier table and export:
Table 2
ear_bdiv_res$Variable <- as.character(ear_bdiv_res$Variable) ear_bdiv_res <- rbind(ear_bdiv_res, c(Variable = "Swab_3",
pVal = swab3_adonis$aov.tab$'Pr(>F)'[1])) write.table(ear_bdiv_res, "Outputs/table2.txt")
NMDS ordination plot of the sex + ear-swabbing variable:
Figure 2
ear_ASV_ord2 <- ordinate(
subset_samples(ear_ASV_right_R, !is.na(Recent_common_cold) & Swab_3 != "male_1"),
"NMDS", "bray", try = 999, trace = FALSE)
ear_ASV_ord_df2 <- cbind(as.data.frame(ear_ASV_ord2$points), ear_meta_bdivs2) ear_ASV_ord_df2$Swab_3 <- factor(ear_ASV_ord_df2$Swab_3, levels =
c("female_1", "female_0", "male_0")) fig2 <- arrangeGrob(
ggplot(ear_ASV_ord_df2, aes(x=MDS1, y=MDS2,
color=Swab_3, shape=Swab_3)) + theme_bw() +
geom_point(size = 2) + coord_fixed() +
stat_ellipse(level = 0.9) +
scale_color_manual(values = c("seagreen3", "gray50", "black"), name = "Ear cleaning\nwith swabs",
labels = c("female, swab", "female, no swab", "male, no swab")) + scale_shape_manual(values = c(20, 17, 17),
name = "Ear cleaning\nwith swabs",
labels = c("female, swab", "female, no swab", "male, no swab")) + theme(panel.grid = element_blank(),
legend.position = "bottom",
legend.margin = margin(t = -1, b = -5, unit="mm"), legend.key.height = unit(4, "mm")) +
guides(color = guide_legend(nrow=3, byrow=TRUE)))
grid.arrange(fig2)
−0.25 0.00 0.25
−0.4 0.0 0.4
MDS1
MDS2
Ear cleaning with swabs
female, swab female, no swab male, no swab
ggsave(fig2, filename = "Outputs/fig2_nmds_plots.pdf", device = cairo_pdf, width = 80, height = 85, units = "mm")