Microbiota and clinical variables - R Code for Sjövall, A. et al.: Microbiome of the external a

0 25 50 75 100

Sample_5Sample_9Sample_15Sample_21Sample_33Sample_45Sample_71Sample_91Sample_95Sample_97

Relative abundance (%)

Cluster 1

0 25 50 75 100

Sample_17Sample_25Sample_29Sample_43Sample_47Sample_57Sample_59Sample_63Sample_79Sample_11

Relative abundance (%)

Cluster 2

0 25 50 75 100

Sample_83Sample_13Sample_23Sample_37Sample_41Sample_53Sample_65Sample_99

Relative abundance (%)

Cluster 3

0 25 50 75 100

Sample_35Sample_51Sample_55Sample_67Sample_73Sample_75

Relative abundance (%)

Cluster 4

0 25 50 75 100

Sample_31Sample_49Sample_61Sample_69Sample_81Sample_87Sample_89

Relative abundance (%)

Cluster 5

Genus

Staphylococcus Alloiococcus Turicella

Propionibacterium Corynebacterium Streptococcus

Enhydrobacter Kocuria

Chryseobacterium Snodgrassella other genera unclassified genus

# Export

ggsave(grid.arrange(ototype_plot_gen),

filename = "Outputs/fig1_clustering_and_genus_relabunds.pdf", device = cairo_pdf, width = 180, height = 180, units = "mm")

This matches what was seen in the table of dominant taxa per cluster: Cluster 1 has mainlyStaphylococcus, Cluster 2 is Alloiococcus-dominated, Cluster 3 is Propionibacterium-dominated, Cluster 4 is Turicella- dominated, and Cluster 5 has bothStaphylococcus andTuricella.

Alpha diversity

Plot the alpha diversity values in all right ear samples to see how they are distributed:

# Make a data frame for the comparisons right_adiv <- data.frame(earclin_right,

estimate_richness(ear_ASV_right_R,measures = c("Observed", "InvSimpson", "Shannon")))

# Plot

grid.arrange(

ggplot(right_adiv, aes(x = Sample, y = Observed, label = Sample)) + geom_text(size = 2) +

theme_bw() +

theme(panel.grid = element_blank(),

axis.text = element_text(color = "black"), axis.text.x = element_blank(),

axis.ticks.x = element_blank()),

ggplot(right_adiv, aes(x = Sample, y = Shannon, label = Sample)) + geom_text(size = 2) +

theme_bw() +

theme(panel.grid = element_blank(),

axis.text = element_text(color = "black"), axis.text.x = element_blank(),

axis.ticks.x = element_blank()),

ggplot(right_adiv, aes(x = Sample, y = InvSimpson, label = Sample)) + geom_text(size = 2) +

theme_bw() +

theme(panel.grid = element_blank(),

axis.text = element_text(color = "black"), axis.text.x = element_blank(),

axis.ticks.x = element_blank()), nrow = 1)

Sample_5

Sample_9

Sample_11 Sample_13

Sample_15 Sample_17

Sample_21Sample_23

Sample_25Sample_29Sample_31 Sample_33

Sample_35 Sample_37

Sample_41

Sample_43 Sample_45

Sample_47 Sample_49

Sample_51 Sample_53

Sample_55 Sample_57

Sample_59 Sample_61

Sample_63 Sample_65

Sample_67

Sample_69

Sample_71 Sample_73

Sample_75 Sample_79Sample_81

Sample_83 Sample_87

Sample_89 Sample_91Sample_95Sample_97

Sample_99

50 100 150 200

Sample

Observed

Sample_5

Sample_9 Sample_11

Sample_13Sample_15 Sample_17Sample_21

Sample_23 Sample_25

Sample_29 Sample_31Sample_33

Sample_35 Sample_37

Sample_41 Sample_43

Sample_45

Sample_47 Sample_49

Sample_51 Sample_53

Sample_55 Sample_57

Sample_59Sample_61 Sample_63

Sample_65 Sample_67Sample_69

Sample_71 Sample_73

Sample_75 Sample_79

Sample_81 Sample_83

Sample_87 Sample_89

Sample_91

Sample_95 Sample_97

Sample_99

0 1 2 3

Sample

Shannon ^Sample_5

Sample_9 Sample_11

Sample_13 Sample_15Sample_17Sample_21Sample_23

Sample_25 Sample_29Sample_31

Sample_33 Sample_35

Sample_37Sample_41 Sample_43Sample_45

Sample_47 Sample_49

Sample_51 Sample_53

Sample_55

Sample_57 Sample_59Sample_61

Sample_63 Sample_65

Sample_67 Sample_69

Sample_71 Sample_73

Sample_75 Sample_79

Sample_81 Sample_83

Sample_87

Sample_89

Sample_91 Sample_95Sample_97

Sample_99

2 4 6

Sample

InvSimpson

One sample (53) seems like an outlier, with particularly high values, especially when looking at the evenness- containing indices (Shannon and inverse Simpson). Drop this sample from comparisons and redraw the plot:

# Trim outliers based on Shannon

right_adiv <- subset(right_adiv, Shannon < 3)

# Plot again grid.arrange(

ggplot(right_adiv, aes(x = Sample, y = Observed, label = Sample)) +

geom_text(size = 2) + theme_bw() +

theme(panel.grid = element_blank(),

axis.text = element_text(color = "black"), axis.text.x = element_blank(),

axis.ticks.x = element_blank()),

ggplot(right_adiv, aes(x = Sample, y = Shannon, label = Sample)) + geom_text(size = 2) +

theme_bw() +

theme(panel.grid = element_blank(),

axis.text = element_text(color = "black"), axis.text.x = element_blank(),

axis.ticks.x = element_blank()),

ggplot(right_adiv, aes(x = Sample, y = InvSimpson, label = Sample)) + geom_text(size = 2) +

theme_bw() +

theme(panel.grid = element_blank(),

axis.text = element_text(color = "black"), axis.text.x = element_blank(),

axis.ticks.x = element_blank()), nrow = 1)

Sample_5

Sample_9

Sample_11 Sample_13

Sample_15 Sample_17

Sample_21Sample_23

Sample_25Sample_29Sample_31 Sample_33

Sample_35 Sample_37

Sample_41

Sample_43 Sample_45

Sample_47 Sample_49

Sample_51

Sample_55 Sample_57

Sample_59

Sample_61 Sample_63

Sample_65 Sample_67

Sample_69

Sample_71 Sample_73

Sample_75

Sample_79Sample_81 Sample_83

Sample_87 Sample_89

Sample_91Sample_95Sample_97 Sample_99

50 100 150 200

Sample

Observed ^Sample_5

Sample_9 Sample_11

Sample_13Sample_15 Sample_17

Sample_21 Sample_23

Sample_25

Sample_29 Sample_31

Sample_33 Sample_35

Sample_37 Sample_41

Sample_43 Sample_45

Sample_47 Sample_49

Sample_51 Sample_55

Sample_57 Sample_59Sample_61

Sample_63

Sample_65 Sample_67Sample_69

Sample_71 Sample_73

Sample_75 Sample_79

Sample_81

Sample_83 Sample_87

Sample_89

Sample_91

Sample_95 Sample_97

Sample_99

0.0 0.5 1.0 1.5 2.0

Sample

Shannon

Sample_5

Sample_9 Sample_11

Sample_13 Sample_15Sample_17Sample_21Sample_23

Sample_25 Sample_29Sample_31

Sample_33 Sample_35

Sample_37Sample_41

Sample_43Sample_45

Sample_47 Sample_49

Sample_51 Sample_55

Sample_57 Sample_59Sample_61

Sample_63 Sample_65

Sample_67 Sample_69

Sample_71 Sample_73

Sample_75 Sample_79

Sample_81 Sample_83

Sample_87

Sample_89

Sample_91 Sample_95Sample_97

Sample_99

1 2 3 4 5 6

Sample

InvSimpson

Test for statistically significant differences in alpha diversity for variables of interest in the remaining samples:

# Run comparisons for the variables of interest (minus age, which is continuous) varlist_binary <- varlist[-grep("Age", varlist)]

adiv_res <- suppressWarnings(data.frame(

p_Observed = sapply(varlist_binary, function(x) round(wilcox.test(data = right_adiv,

as.formula(paste("Observed", "~", x)))$p.value, digits = 3)), p_Shannon = sapply(varlist_binary, function(x)

round(wilcox.test(data = right_adiv,

as.formula(paste("Shannon", "~", x)))$p.value, digits = 3)), p_InvSimpson = sapply(varlist_binary, function(x)

round(wilcox.test(data = right_adiv,

as.formula(paste("InvSimpson", "~", x)))$p.value, digits = 3)))) customKable(adiv_res, d = 3)

p_Observed p_Shannon p_InvSimpson

Sampling_season 0.660 0.210 0.457

Sex 0.910 0.408 0.424

Allergy 0.498 0.630 0.872

Dominant_side 0.785 0.791 0.526

Ear_cleaning_swab 0.463 0.229 0.262

Recent_common_cold 0.617 0.703 0.682

Staff_student 0.671 0.976 0.591

No significant differences in alpha diversity for any of the variables (when comparing right ears only).

Age is a continuous variable, so it needs to be tested separately. Run a significance test with the Pearson correlation and plot:

sapply(c("Observed", "Shannon", "InvSimpson"),

function(x) cor.test(right_adiv$Age, right_adiv[[x]])[c("p.value", "estimate")])

## Observed Shannon InvSimpson

## p.value 0.3985849 0.6929909 0.3287173

## estimate -0.1371896 -0.06440054 -0.1584764 grid.arrange(

ggplot(right_adiv, aes(x=Age, y=Observed)) + geom_point() +

theme_bw() +

theme(panel.grid = element_blank()), ggplot(right_adiv, aes(x=Age, y=Shannon)) +

geom_point() + theme_bw() +

theme(panel.grid = element_blank()),

ggplot(right_adiv, aes(x=Age, y=InvSimpson)) + geom_point() +

theme_bw() +

theme(panel.grid = element_blank()), nrow=1)

50 100 150 200

20 30 40 50 60

Age

Observed

0.0 0.5 1.0 1.5 2.0

20 30 40 50 60

Age

Shannon

1 2 3 4 5 6

20 30 40 50 60

Age

InvSimpson

Age does not seem to be significantly related to alpha diversity in ears.

Beta diversity

First, test each variable of interest individually with adonis to look for significant beta diversity differences.

# Metadata

ear_meta_bdivs <- as(sample_data(ear_ASV_right_R), "data.frame")

# Check for missing data

colSums(is.na(ear_meta_bdivs[,varlist]))

## Sampling_season Sex Age

## 0 0 0

## Allergy Dominant_side Ear_cleaning_swab

## 0 1 0

## Recent_common_cold Staff_student

## 1 0

# One subject has missing data for the common cold variable and another for the

# handedness/dominant side; exclude them from comparisons

ear_meta_bdivs <- subset(ear_meta_bdivs, !is.na(Recent_common_cold) &

!is.na(Dominant_side))

# Run ordination

ear_ASV_ord <- ordinate(subset_samples(ear_ASV_right_R,

!is.na(Recent_common_cold) &

!is.na(Dominant_side)),

"NMDS", "bray", try = 999, trace = FALSE)

# Make data frames out of these (for plotting)

ear_ASV_ord_df <- cbind(as.data.frame(ear_ASV_ord$points), ear_meta_bdivs)

# Calculate distance matrix ear_dist <- vegdist(as(otu_table(

subset_samples(ear_ASV_right_R, !is.na(Recent_common_cold) &

!is.na(Dominant_side))), "matrix"), method = "bray")

# Run adonis for each variable

ear_bdiv_res <- data.frame(Variable = varlist, pVal = sapply(varlist,

function(x) adonis(as.formula(paste("ear_dist ~", x)), ear_meta_bdivs,

perm = 9999)$aov.tab$'Pr(>F)'[1])) ear_bdiv_res <- ear_bdiv_res[order(ear_bdiv_res$pVal), ]

rownames(ear_bdiv_res) <- NULL Show results in a table:

customKable(ear_bdiv_res, d = 3, cn = c("Variable", "p-value"))

Variable p-value

Ear_cleaning_swab 0.016

Sex 0.069

Dominant_side 0.132

Recent_common_cold 0.165

Staff_student 0.272

Age 0.330

Allergy 0.785

Sampling_season 0.876

The variables with the lowestp-values for beta diversity differences are ear cleaning and sex. Plots for these variables:

# Ordination plotting function

betadivplot <- function(df, var, cols){

ggplot(df, aes(x=MDS1, y=MDS2, color=df[,var], shape=df[,var])) + theme_bw() +

geom_point(size = 2) + coord_fixed() +

stat_ellipse(level = 0.9) +

scale_color_manual(values = cols, name = gsub("_", " ", var)) +

scale_shape_manual(values = c(15, 16, 17, 18), name = gsub("_", " ", var)) + ggtitle(gsub("_", " ", var)) +

theme(panel.grid = element_blank(), legend.position = "bottom", legend.title = element_blank(),

legend.margin = margin(t = -3, l = -5, unit = "mm")) }

grid.arrange(betadivplot(ear_ASV_ord_df, "Ear_cleaning_swab", c("gray30", "aquamarine3")), betadivplot(ear_ASV_ord_df, "Sex", c("orchid", "darkblue")),

nrow = 1)

−0.4

−0.2 0.0 0.2 0.4

−0.6 −0.3 0.0 0.3

MDS1

MDS2

0 1

Ear cleaning swab

−0.4

−0.2 0.0 0.2 0.4

−0.6 −0.3 0.0 0.3

MDS1

MDS2

female male

Sex

Finally, a combined adonis2 model with the variables that seem the most important (and season/batch, since it is potentially a major confounder):

adonis2(ear_dist ~ .,

data = ear_meta_bdivs[, c("Sampling_season", "Sex", "Ear_cleaning_swab")], perm = 9999, by = "margin")

## Permutation test for adonis under reduced model

## Marginal effects of terms

## Permutation: free

## Number of permutations: 9999

#### adonis2(formula = ear_dist ~ ., data = ear_meta_bdivs[, c("Sampling_season", "Sex", "Ear_cleaning_swab")], permutations = 9999, by = "margin")

## Df SumOfSqs R2 F Pr(>F)

## Sampling_season 1 0.1450 0.01316 0.5208 0.8107

## Sex 1 0.3653 0.03315 1.3116 0.2392

## Ear_cleaning_swab 1 0.5718 0.05189 2.0532 0.0599 .

## Residual 35 9.7477 0.88453

## Total 38 11.0203 1.00000

## ---

## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The swab cleaning variable is close to significant in the combined model, which is corrected for sex. Unfortu- nately, this model is not very good, since most males did not report cleaning their ears. As an alternative classification, split samples into three groups: females who swab their ears, non-swabbing females, and non-swabbing males, excluding the three males who do swab their ears. Run adonis again with this variable.

# Alternate grouping variable: swabbing females / non-swabbing females / non-swabbing males sample_data(ear_ASV_right_R)$Swab_3 <- factor(paste(

sample_data(ear_ASV_right_R)$Sex,

sample_data(ear_ASV_right_R)$Ear_cleaning_swab, sep = "_"))

sample_data(ear_gen_right_R)$Swab_3 <- factor(paste( sample_data(ear_gen_right_R)$Sex,

sample_data(ear_gen_right_R)$Ear_cleaning_swab, sep = "_"))

# Recalculate distance matrix (dropping the "male_1" category):

ear_dist2 <- vegdist(as(

otu_table(subset_samples(ear_ASV_right_R, !is.na(Recent_common_cold)

& Swab_3 != "male_1")), "matrix"), method = "bray")

# Retrim metadata

ear_meta_bdivs2 <- as(sample_data(ear_ASV_right_R), "data.frame") ear_meta_bdivs2 <- subset(ear_meta_bdivs2,

!is.na(Recent_common_cold) & Swab_3 != "male_1") Test only the 3-group variable:

swab3_adonis <- adonis(ear_dist2 ~ Swab_3, data = ear_meta_bdivs2, perm = 9999, by = "margin") swab3_adonis

#### Call:

## adonis(formula = ear_dist2 ~ Swab_3, data = ear_meta_bdivs2, permutations = 9999, by = "margin")

#### Permutation: free

## Number of permutations: 9999

#### Terms added sequentially (first to last)

#### Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)

## Swab_3 2 1.1214 0.56068 2.0237 0.10638 0.0274 *

## Residuals 34 9.4197 0.27705 0.89362

## Total 36 10.5411 1.00000

## ---

## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Test the 3-group variable corrected for season/batch:

adonis2(ear_dist2 ~ Sampling_season + Swab_3, data = ear_meta_bdivs2, perm = 9999, by = "margin")

## Permutation test for adonis under reduced model

## Marginal effects of terms

## Permutation: free

## Number of permutations: 9999

#### adonis2(formula = ear_dist2 ~ Sampling_season + Swab_3, data = ear_meta_bdivs2, permutations = 9999, by = "margin")

## Df SumOfSqs R2 F Pr(>F)

## Sampling_season 1 0.1602 0.01520 0.5709 0.7612

## Swab_3 2 1.1474 0.10885 2.0447 0.0238 *

## Residual 33 9.2595 0.87842

## Total 36 10.5411 1.00000

## ---

## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

This new three-group variable is significant both on its own and when corrected for sampling batch. Add the uncorrected value to the earlier table and export:

Table 2

ear_bdiv_res$Variable <- as.character(ear_bdiv_res$Variable) ear_bdiv_res <- rbind(ear_bdiv_res, c(Variable = "Swab_3",

pVal = swab3_adonis$aov.tab$'Pr(>F)'[1])) write.table(ear_bdiv_res, "Outputs/table2.txt")

NMDS ordination plot of the sex + ear-swabbing variable:

Figure 2

ear_ASV_ord2 <- ordinate(

subset_samples(ear_ASV_right_R, !is.na(Recent_common_cold) & Swab_3 != "male_1"),

"NMDS", "bray", try = 999, trace = FALSE)

ear_ASV_ord_df2 <- cbind(as.data.frame(ear_ASV_ord2$points), ear_meta_bdivs2) ear_ASV_ord_df2$Swab_3 <- factor(ear_ASV_ord_df2$Swab_3, levels =

c("female_1", "female_0", "male_0")) fig2 <- arrangeGrob(

ggplot(ear_ASV_ord_df2, aes(x=MDS1, y=MDS2,

color=Swab_3, shape=Swab_3)) + theme_bw() +

geom_point(size = 2) + coord_fixed() +

stat_ellipse(level = 0.9) +

scale_color_manual(values = c("seagreen3", "gray50", "black"), name = "Ear cleaning\nwith swabs",

labels = c("female, swab", "female, no swab", "male, no swab")) + scale_shape_manual(values = c(20, 17, 17),

name = "Ear cleaning\nwith swabs",

labels = c("female, swab", "female, no swab", "male, no swab")) + theme(panel.grid = element_blank(),

legend.position = "bottom",

legend.margin = margin(t = -1, b = -5, unit="mm"), legend.key.height = unit(4, "mm")) +

guides(color = guide_legend(nrow=3, byrow=TRUE)))

grid.arrange(fig2)

−0.25 0.00 0.25

−0.4 0.0 0.4

MDS1

MDS2

Ear cleaning with swabs

female, swab female, no swab male, no swab

ggsave(fig2, filename = "Outputs/fig2_nmds_plots.pdf", device = cairo_pdf, width = 80, height = 85, units = "mm")

Dalam dokumen R Code for Sjövall, A. et al.: Microbiome of the external auditory canal (Halaman 43-51)