## The following supplement accompanies the article "Changes in zooplankton assemblages in the northern Monterey Bay, California, during a fall transition" by Julio B. J. Harvey*, Jennifer L. Fisher, John P. Ryan, Shannon B. Johnson, William T. Peterson, Robert C. Vrijenhoek ## *Corresponding author: jbjharvey@gmail.com ## Marine Ecology Progress Series 604: 99–120 (2018) ## ## R scripts for data analysis ## ############################################################ # required packages library(Hmisc) library(phyloseq) library(biom) library(vegan) library(marmap) library(lattice) library(permute) library(survival) library(gtools) library("doParallel") registerDoParallel(cores = 8) library(foreach) library(iterators) library(reshape) library(reshape2) library(ggplot2) library(plyr) library(tidyr) library(gtable) library(scales) library(dplyr) library(Formula) library(Rmisc) ############################################################ ## sampling map (Figure 1) # import bathymetric data MBbathy <- read.bathy("monterey.xyz") # summarize summary(MBbathy) # create coordinate subset to zoom on area to be mapped lat_h <- c(36.74, 37.05) lon_h <- c(-122.1, -121.75) # subset original data accordingly northMBbathy <- subsetBathy(MBbathy, x=lon_h, y=lat_h,locator=FALSE) # import points for August and October respectively stations_aug <- read.csv("simz_coords_aug2013.csv", header = TRUE) stations_oct <- read.csv("simz_coords_oct2013.csv", header = TRUE) # plot points for August and October with different colors plot(northMBbathy, deep = c(-1200, -100, 0), shallow = c(-100, -10, 0), step = c(100, 10, 0), lwd = c(1, 1, 1), lty = c(1, 1, 1), col = c("darkgrey", "darkgrey", "black"), drawlabels = c(TRUE, TRUE, FALSE)) + points(stations_aug$lon, stations_aug$lat, pch = 19, cex = 0.7, col = "gray") + # August stations points(stations_oct$lon, stations_oct$lat, pch = 17, cex = 0.7, col = "black") + # October stations points(c(-122), c(36.75), pch = 19, cex = 0.7, col = "black") + # M1 mooring scaleBathy(northMBbathy, deg = 0.055, x = "topright", inset = 10) ## import biomass data (morphology), convert to .biom format, add metadata biomass_JLF <- read.csv("biomass_JLF.csv", header =T, na.strings = c("0.0", "0", "")) # extract only the desired columns biomass_e <- biomass_JLF[,c(5,6,9)] # rename columns colnames(biomass_e) <- c("station", "taxonomy", "carbon_m3") # melt biomass_m <- melt(biomass_e, id.vars=c("station", "taxonomy"), measure.vars=c("carbon_m3")) # delete column 'variable' biomass_m$variable <- NULL # change NAs to 0s biomass_m[is.na(biomass_m)] <- 0 # sum duplicate taxa biomass_sum <- ddply(biomass_m, c("station", "taxonomy"), numcolwise(sum, na.rm = TRUE)) # cast to wide format biomass_cast <- dcast(biomass_sum, fun.aggregate = NULL, taxonomy ~ station, value.var="value") # append prefix to column names colnames(biomass_cast) <- paste("net", colnames(biomass_cast), sep = "") # rename first column colnames(biomass_cast) [1] <- "taxonomy" # convert NAs to 0s biomass_cast[is.na(biomass_cast)] <- 0 # export as a tab delimited text file # manually edit header and add 'taxonomy' column for conversion to .biom format write.table(biomass_cast, file = "biomass_cast.txt", sep="\t") # convert to biom format, add sample metadata, convert to .json (this step in QIIME, not R) # biom convert -i biomass_cast.txt -o biomass_cast.biom --to-hdf5 --table-type="OTU table" --process-obs-metadata taxonomy # biom add-metadata -i biomass_cast.biom -o biomass_smd.biom --sample-metadata-fp biomass_map.txt # biom convert -i biomass_smd.biom -o biomass_smd_json.biom --table-type="OTU table" --to-json # import biomass data from json formatted .biom file biomass_smd = import_biom("biomass_smd_json.biom", parseFunction = parse_taxonomy_default, parallel = TRUE) # remove low biomass sample 23 biomass_smd <- prune_samples(c("net1", "net3", "net5", "net7", "net8", "net9", "net10", "net12", "net14", "net17", "net28", "net35", "net36"), biomass_smd) # summary biomass_smd sample_data(biomass_smd) sample_names(biomass_smd) tax_table(biomass_smd)[ , 1:1] otu_table(biomass_smd)[ , 1:10] ############################################################ ## biomass calculations # summary biomass_smd sample_data(biomass_smd) sample_names(biomass_smd) tax_table(biomass_smd) otu_table(biomass_smd)[ , 1:10] taxa_names(biomass_smd) ## mean biomass between sampling periods # example: copepods biomass_copepods <- subset_taxa(biomass_smd, Rank1 %in% c("ACARTIA HUDSONICA", "ACARTIA LONGIREMIS", "ACARTIA TONSA", "CALANUS MARSHALLAE", "CALANUS PACIFICUS", "CALOCALANUS PAVO", "CALOCALANUS STYLIREMIS", "CALOCALANUS TENUIS", "CLAUSOCALANUS", "CLAUSOCALANUS ARCUICORNIS", "CLAUSOCALANUS FURCATUS", "CLAUSOCALANUS PERGENS", "COPEPODA", "CORYCAEUS ANGLICUS", "CTENOCALANUS VANUS", "EPILABIDOCERA AMPHITRITES", "EUCALANUS", "METRIDIA", "MICROCALANUS PUSILLUS", "OITHONA SIMILIS", "OITHONA SPINIROSTRIS", "ONCAEA", "PARACALANUS (Unknown)", "PARACALANUS PARVUS", "PSEUDOCALANUS", "RHINCALANUS NASUTUS", "TORTANUS DISCAUDATUS")) # convert to dataframe df <- psmelt(biomass_copepods) # order df by column df <- df[order(df[,"Date"], df[,"Station"], df[,"Rank1"]),] # remove unneded columns df_e <- df[, c(2,3,8,11)] # station not included # order df by column df_e <- df_e[order(df_e[,"Date"], df_e[,"Sample"]),] # sum taxa by 'Sample' and 'Date' df_sum <- ddply(df_e, c("Sample", "Date"), numcolwise(sum, na.rm = TRUE)) # station not included # order df by column df_sum <- df_sum[order(df_sum[,"Date"], df_sum[,"Sample"]),] # renumber rows row.names(df_sum) <- 1:nrow(df_sum) # average ave(df_sum[c(1:10), c("Abundance")]) # August copepods 25.88 ave(df_sum[c(11:13), c("Abundance")]) # October copepods 21.56117 ## total biomass at each station within each sampling period (month) # example: euphausiids # euphausia biomass_euphausia <- subset_taxa(biomass_smd, Rank1 %in% c("EUPHAUSIA PACIFICA")) # thysanoessa biomass_thysanoessa <- subset_taxa(biomass_smd, Rank1 %in% c("THYSANOESSA SPINIFERA")) # convert to data frame df <- psmelt(biomass_euphausia) df <- psmelt(biomass_thysanoessa) # order df by column df <- df[order(df[,"Date"], df[,"Station"], df[,"Rank1"]),] # remove unneded columns df_e <- df[, c(2,3,5,8,11)] # station included # renumber rows row.names(df_e) <- 1:nrow(df_e) # sum df_station <- ddply(df_e, c("Date", "Station"), numcolwise(sum, na.rm = TRUE)) ############################################################ ## import HTS data # example: COI data # convert to QIIME summary files to .biom format, add sample metadata, convert to .json (this step in QIIME, not R) # biom convert -i otu_table_COI_L6.txt -o COI_L6.biom --to-hdf5 --table-type="OTU table" --process-obs-metadata taxonomy # biom add-metadata -i COI_L6.biom -o COI_L6_smd.biom --sample-metadata-fp COI_map.txt # biom convert -i COI_L6_smd.biom -o COI_L6_smd_json.biom --table-type="OTU table" --to-json # import data into phyloseq netsCOI = import_biom("/Users/qiime/otu_table_COI_summary/COI_L6_smd_json.biom", parseFunction = parse_taxonomy_default, parallel = TRUE) # export tax table to isolate zooplankton results and double check taxonomic assignments tax_table_COI <- tax_table(netsCOI) write.csv(tax_table_COI, file = "tax_table_COI.csv") # isolate zooplankton taxa # check taxonomic assignments with independant BLAST searches # annotate original QIIME summary L6.txt file as needed # convert annotated QIIME summary to .biom format, add metadata, convert to json, reimport into phyloseq ############################################################ ## plot mean richness (Figures 4 and 6) # example: COI data # renew original phyloseq data object netsCOI_sp <- netsCOI # change theme to bw theme_set(theme_bw()) # rename samples for sorting purposes sample_names(netsCOI_sp) sample_data(netsCOI_sp) sample_names(netsCOI_sp) <- c("simz_001", "simz_006", "simz_018", "simz_012", "simz_010", "simz_005", "simz_009", "simz_016", "simz_013", "simz_017", "simz_004", "simz_002", "simz_014", "simz_008", "simz_003", "simz_011", "simz_007", "simz_015", "simz_019", "simz_020", "simz_021", "simz_032", "simz_034", "simz_028", "simz_030", "simz_024", "simz_036", "simz_035", "simz_029", "simz_033", "simz_025", "simz_022", "simz_026", "simz_027", "simz_031", "simz_023") # remove sample SIMZ15 (too few sequences), and SIMZ35, canyon site (only sampled once), and 22, 31 (sampled only in Oct) # - this set for comparing diversity and NMDS between August and October (so only 4 stations are included) netsCOI_sp <- prune_samples(c("simz_001", "simz_006", "simz_018", "simz_012", "simz_010", "simz_005", "simz_009", "simz_016", "simz_013", "simz_017", "simz_004", "simz_002", "simz_014", "simz_008", "simz_003", "simz_011", "simz_007", "simz_019", "simz_020", "simz_021", "simz_032", "simz_034", "simz_028", "simz_030", "simz_024", "simz_036", "simz_029", "simz_033", "simz_025", "simz_026", "simz_027", "simz_023"), netsCOI_sp) # estimate_richness/plot_richness 'Observed' only works with counts, not decimals, so have to convert data to presence/absence first # convert data to 1s and 0s for presence/absence netsCOI_pabs <- transform_sample_counts(netsCOI_sp, function(x) ifelse(x>0,1,0)) sample_data(netsCOI_pabs) otu_table(netsCOI_pabs)[1:10, 1:10] # isolate taxa for each plot # all taxa data_COI = subset_taxa(netsCOI_pabs, Rank5 %in% c("Balanidae", "Balanus", "Chthamalus", "Pollicipes", "Tetraclita", "Vesicomya", "Membranipora", "Evadne", "Penilia", "Lensia", "Sphaeronectes", "Acartia", "Calanus", "Calocalanus", "Candacia", "Clausocalanus", "Cosmocalanus", "Ctenocalanus", "Eucalanus", "Metridia", "Oncaea", "Paracalanus", "Pseudocalanus", "Rhincalanus", "Temora", "Cancer", "Emerita", "Lophopanopeus", "Mimulus", "Pinnixa", "Pugettia", "Amphiodia", "Amphipholis", "Dendraster", "Luidia", "Ophiopholis", "Ophiopteris", "Pisaster", "Rathbunaster", "Euphausia", "Nematoscelis", "Thysanoessa", "Citharichthys", "Genyonemus", "Paralichthys", "Psettichthys", "Acanthodoris", "Aeolidia", "Hermissenda", "Melibe", "Onchidoris", "Triopha", "Tritonia", "Discoconchoecia", "Aglaja", "Amphissa", "Corolla", "Crepipatella", "Lacuna", "Mitrella", "Nassarius", "Olivella", "Rictaxis", "Bipalponephtys", "Halosydna", "Harmothoe", "Lepidasthenia", "Phascolosoma", "Pholoides", "Phragmatopoma", "Urechis")) # copepods data_COI = subset_taxa(netsCOI_pabs, Rank5 %in% c("Acartia", "Calanus", "Calocalanus", "Clausocalanus", "Cosmocalanus", "Ctenocalanus", "Eucalanus", "Metridia", "Oncaea", "Paracalanus", "Psuedocalanus", "Rhincalanus", "Temora")) # echinoderms data_COI = subset_taxa(netsCOI_pabs, Rank5 %in% c("Amphiodia", "Amphipholis", "Luidia", "Ophiopholis", "Ophiopteris", "Pisaster", "Rathbunaster", "Dendraster")) # barnacles data_COI = subset_taxa(netsCOI_pabs, Rank5 %in% c("Pollicipes", "Balanus", "Balanidae", "Chthamalus", "Tetraclita")) # polychaetes data_COI = subset_taxa(netsCOI_pabs, Rank5 %in% c("Phascolosoma", "Bipalponephtys", "Halosydna", "Harmothoe", "Lepidasthenia", "Pholoides", "Phragmatopoma", "Urechis")) # gastropods data_COI = subset_taxa(netsCOI_pabs, Rank5 %in% c("Aeolidia", "Hermissenda", "Acanthodoris", "Melibe", "Tritonia", "Triopha", "Onchidoris", "Crepipatella", "Aglaja", "Rictaxis", "Lacuna", "Amphissa", "Mitrella", "Nassarius", "Olivella", "Corolla")) # crabs data_COI = subset_taxa(netsCOI_pabs, Rank5 %in% c("Cancer", "Lophopanopeus", "Mimulus", "Pinnixa", "Pugettia", "Emerita")) sample_data(data_COI) otu_table(data_COI) taxa_names(data_COI) # export data to average richness by station data_m <- psmelt(data_COI) # extract desired columns data_e <- data_m[, c(2,3,5,8,15,16)] # order by data_e <- data_e[order(data_e[,"Date"], data_e[,"Station"]),] # sum by sample data_sum <- ddply(data_e, c("Sample", "Station", "Date"), numcolwise(sum, na.rm = TRUE)) # order by data_sum <- data_sum[order(data_sum[,"Date"], data_sum[,"Station"]),] # average by station within each month data_ave <- ddply(data_sum, c("Station", "Date"), numcolwise(ave, na.rm = TRUE)) # remove duplicate rows data_uniq <- unique(data_ave) # order by data_uniq <- data_uniq[order(data_uniq[,"Date"], data_uniq[,"Station"]),] # boxplot ggplot(data_uniq, aes(x=Date, y=Abundance, color=NULL), alpha=0.1) + geom_boxplot() + geom_point(stat="identity") + #scale_y_continuous(limits = c(0, 8)) + theme(aspect.ratio=5/3) + xlab("month") + ylab("sampling station mean richness") + # ggtitle("COI all taxa") # ggtitle("COI copepods") # ggtitle("COI echinoderms") # ggtitle("COI barnacles") # ggtitle("COI polychaetes") # ggtitle("COI gastropods") # ggtitle("COI crabs") ggtitle("COI") # Shapiro-Wilk normality shapiro.test(data_uniq[c(1:4), 3]) shapiro.test(data_uniq[c(5:8), 3]) # t-test(Student's, two sample text) t.test(Abundance ~ Date, data_uniq, var.equal=TRUE) ############################################################ ## bar plots, variation between sampling periods (Figures 5 and 7) # plot mean percent sequence abundances # example: copepod COI data # renew original phyloseq data object netsCOI_sp <- netsCOI # rename samples sample_names(netsCOI_sp) <- c("simz_001", "simz_006", "simz_018", "simz_012", "simz_010", "simz_005", "simz_009", "simz_016", "simz_013", "simz_017", "simz_004", "simz_002", "simz_014", "simz_008", "simz_003", "simz_011", "simz_007", "simz_015", "simz_019", "simz_020", "simz_021", "simz_032", "simz_034", "simz_028", "simz_030", "simz_024", "simz_036", "simz_035", "simz_029", "simz_033", "simz_025", "simz_022", "simz_026", "simz_027", "simz_031", "simz_023") # remove sample 15 (too few sequences) netsCOI_sp <- prune_samples(c("simz_001", "simz_002", "simz_003", "simz_004", "simz_005", "simz_006", "simz_007", "simz_008", "simz_009", "simz_010", "simz_011", "simz_012", "simz_013", "simz_014", "simz_016", "simz_017", "simz_018", "simz_019", "simz_020", "simz_021", "simz_022", "simz_023", "simz_024", "simz_025", "simz_026", "simz_027", "simz_028", "simz_029", "simz_030", "simz_031", "simz_032", "simz_033", "simz_034", "simz_035", "simz_036"), netsCOI_sp) # isolate and plot taxa (example: copepods) # copepods netsCOI_sp_copepods = subset_taxa(netsCOI_sp, Rank5 %in% c("Acartia", "Calanus", "Calocalanus", "Clausocalanus", "Cosmocalanus", "Ctenocalanus", "Eucalanus", "Metridia", "Oncaea", "Paracalanus", "Psuedocalanus", "Rhincalanus", "Temora")) # create error bars for bar plot # substitute in each data set in turn, converting from phyloseq object to data frame df_error <- psmelt(netsCOI_sp_copepods) # clean up df_error <- df_error[, c(2,3,5,8,15)] # summarize standard deviation, standard error and 95% confidence interval # this takes the mean 'Abundance' for each genus df_error_se <- summarySE(df_error, measurevar="Abundance", groupvars=c("Date","Rank5")) # assign minmum values to all values that are too small to see on the plot # this facilitates presence/absence visualization for smallest values # min=0.001 df_error_se$Abundance[df_error_se$Abundance > 0 & df_error_se$Abundance < 0.001] <- 0.001 # plot ggplot(df_error_se, aes(x=Rank5, y=Abundance)) + geom_bar(stat="identity", position="stack", color="black", width=0.8) + facet_grid(Date~.) + ylab("mean % COI sequence abundance") + theme(axis.text.x = element_text(angle=-90, hjust=0, vjust=0.4)) + ggtitle("COI copepods") + geom_errorbar(data=df_error_se, aes(y=Abundance, ymin=Abundance-se, ymax=Abundance+se), width=0.5) + scale_y_continuous(limits = c(0, 0.085), expand = c(0, 0)) + theme(aspect.ratio=4/3) ############################################################ ## NMDS and ANOSIM (Figure 8) # only use data from the four stations that were sampled during both months (see above) # examples: copepod COI data # copepods netsCOI_sp_copepods = subset_taxa(netsCOI_sp, Rank5 %in% c("Acartia", "Calanus", "Calocalanus", "Clausocalanus", "Cosmocalanus", "Ctenocalanus", "Eucalanus", "Metridia", "Oncaea", "Paracalanus", "Psuedocalanus", "Rhincalanus", "Temora")) # check otu_table(netsCOI_sp_copepods) sample_data(netsCOI_sp_copepods) tax_table(netsCOI_sp_copepods) sample_names(netsCOI_sp_copepods) taxa_names(netsCOI_sp_copepods) nsamples(netsCOI_sp_copepods) ntaxa(netsCOI_sp_copepods) # arcsin square root transform percent sequence abundance data data_arc = transform_sample_counts(netsCOI_sp_copepods, function(x) asin(sqrt(x/100)) ) # remove columns (samples) with all 0 values data_arc = prune_samples(sample_sums(data_arc)>0, data_arc) # Calculate distance matrix iDist <- distance(data_arc, method="bray", type="samples") # Calculate ordination iNMDS <- ordinate(data_arc, "NMDS", distance=iDist) # plot plot_ordination(data_arc, iNMDS, color="Date", shape="Station") + ggtitle(paste("NMDS using distance method bray - copepods")) + scale_colour_manual(values = c("grey", "black")) + scale_shape_discrete(solid=TRUE) + geom_point(size = 4) + scale_y_continuous(limits = c(-0.65, 0.65)) + scale_x_continuous(limits = c(-0.65, 0.65)) # examine stress stressplot(iNMDS) iNMDS # calculate ANOSIM # grouping by date date_group = get_variable(data_arc, "Date") date_ano = anosim(distance(data_arc, "bray"), date_group) date_ano$signif date_ano$statistic summary(date_ano) plot(date_ano) # grouping by Station station_group = get_variable(data_arc, "Station") station_ano = anosim(distance(data_arc, "bray"), station_group) summary(station_ano) plot(station_ano) # grouping by Station for August only station_aug = subset_samples(data_arc, !Date == "October") station_group = get_variable(station_aug, "Station") station_ano = anosim(distance(station_aug, "bray"), station_group) summary(station_ano) plot(station_ano) # check sample_data(station_aug) sample_names(station_aug) ############################################################ ## bar plot, variation among stations and between sampling periods (Figure 9) # renew original phyloseq data object netsCOI_sp <- netsCOI # rename samples sample_names(netsCOI_sp) <- c("simz_001", "simz_006", "simz_018", "simz_012", "simz_010", "simz_005", "simz_009", "simz_016", "simz_013", "simz_017", "simz_004", "simz_002", "simz_014", "simz_008", "simz_003", "simz_011", "simz_007", "simz_015", "simz_019", "simz_020", "simz_021", "simz_032", "simz_034", "simz_028", "simz_030", "simz_024", "simz_036", "simz_035", "simz_029", "simz_033", "simz_025", "simz_022", "simz_026", "simz_027", "simz_031", "simz_023") # remove sample sample 15 (too few sequences) netsCOI_sp <- prune_samples(c("simz_001", "simz_002", "simz_003", "simz_004", "simz_005", "simz_006", "simz_007", "simz_008", "simz_009", "simz_010", "simz_011", "simz_012", "simz_013", "simz_014", "simz_016", "simz_017", "simz_018", "simz_019", "simz_020", "simz_021", "simz_022", "simz_023", "simz_024", "simz_025", "simz_026", "simz_027", "simz_028", "simz_029", "simz_030", "simz_031", "simz_032", "simz_033", "simz_034", "simz_035", "simz_036"), netsCOI_sp) # reorder 'Station' for the whole zooplankton genera data set sample_data(netsCOI_sp)$Station = factor(sample_data(netsCOI_sp)$Station, levels = c("offshore","midpoint","nearshore","canyon","south","LaSelva")) # isolate taxa for distributions plot # echinoderms netsCOI_sp_echinoderms = subset_taxa(netsCOI_sp, Rank5 %in% c("Amphiodia", "Amphipholis", "Luidia", "Ophiopholis", "Ophiopteris", "Pisaster", "Rathbunaster", "Dendraster")) # polychaetes netsCOI_sp_polychaetes = subset_taxa(netsCOI_sp, Rank5 %in% c("Phascolosoma", "Bipalponephtys", "Halosydna", "Harmothoe", "Lepidasthenia", "Pholoides", "Phragmatopoma", "Urechis")) # crabs netsCOI_sp_crabs = subset_taxa(netsCOI_sp, Rank5 %in% c("Cancer", "Lophopanopeus", "Mimulus", "Pinnixa", "Pugettia", "Emerita")) # scale y-axis values to enable plotting all taxa at a similar scale netsCOI_sp_2crabs_scaled = transform_sample_counts((subset_taxa(netsCOI_sp, Rank5 %in% c("Cancer", "Lophopanopeus"))), function(x) x/2) # generate object with other taxa netsCOI_sp_crabs_other = subset_taxa(netsCOI_sp, Rank5 %in% c("Mimulus", "Pinnixa", "Pugettia", "Emerita")) # merge netsCOI_sp_crabs_scaled <- merge_phyloseq(netsCOI_sp_2crabs_scaled, netsCOI_sp_crabs_other) # fish netsCOI_sp_fish = subset_taxa(netsCOI_sp, Rank5 %in% c("Citharichthys", "Genyonemus", "Paralichthys", "Psettichthys")) # euphausiids netsCOI_sp_euphausiids = subset_taxa(netsCOI_sp, Rank5 %in% c("Euphausia", "Thysanoessa")) # scale y-axis values for Thysanoessa netsCOI_sp_thysan_100 = transform_sample_counts((subset_taxa(netsCOI_sp_euphausiids, Rank5 %in% c("Thysanoessa"))), function(x) x*100) # scale y-axis values for Euphausia netsCOI_sp_euphausia_4 = transform_sample_counts((subset_taxa(netsCOI_sp, Rank5 %in% c("Euphausia"))), function(x) x/4) # merge with Euphausia data netsCOI_sp_euphausiids_scaled = merge_phyloseq(netsCOI_sp_thysan_100, netsCOI_sp_euphausia_4) # check taxa_names, merge all otus under the first taxon name in the list of names to be merged taxa_names(netsCOI_sp_echinoderms) netsCOI_dist_echinoderms <- merge_taxa(netsCOI_sp_echinoderms, taxa_names(netsCOI_sp_echinoderms)[c(1:3, 6:10, 12)], 1) netsCOI_dist_polychaetes <- merge_taxa(netsCOI_sp_polychaetes, taxa_names(netsCOI_sp_polychaetes)[c(1:5, 7, 8)], 1) netsCOI_dist_crabs <- merge_taxa(netsCOI_sp_crabs_scaled, taxa_names(netsCOI_sp_crabs_scaled)[c(3:7)], 1) netsCOI_dist_fish <- merge_taxa(netsCOI_sp_fish, taxa_names(netsCOI_sp_fish)[c(1:4)], 1) # merge phyloseq objects and check netsCOI_dist <- merge_phyloseq(netsCOI_dist_echinoderms, netsCOI_dist_polychaetes, netsCOI_dist_crabs, netsCOI_dist_fish, netsCOI_sp_euphausiids_scaled) taxa_names(netsCOI_dist) sample_data(netsCOI_dist) otu_table(netsCOI_dist) tax_table(netsCOI_dist) # rename merged taxa to govern x-axis plotting sort order tax_table(netsCOI_dist)[1, 5] <- c("d_echinoderms_other") tax_table(netsCOI_dist)[2, 5] <- c("a_Dendraster") tax_table(netsCOI_dist)[3, 5] <- c("c_Amphiodia") tax_table(netsCOI_dist)[4, 5] <- c("b_Ophiopteris") tax_table(netsCOI_dist)[5, 5] <- c("f_polychaetes_other") tax_table(netsCOI_dist)[6, 5] <- c("e_Phragmatopoma") tax_table(netsCOI_dist)[7, 5] <- c("g_Cancer") tax_table(netsCOI_dist)[8, 5] <- c("g_Cancer") tax_table(netsCOI_dist)[9, 5] <- c("h_crabs_other") tax_table(netsCOI_dist)[10, 5] <- c("j_flatfish") tax_table(netsCOI_dist)[11, 5] <- c("i_Genyonemus") tax_table(netsCOI_dist)[12, 5] <- c("l_Thysanoessa") tax_table(netsCOI_dist)[13, 5] <- c("k_Euphausia") # generate error bar values for netsCOI_dist df_error <- psmelt(netsCOI_dist) # clean up # df_error <- df_error[, c(2,3,5,8,15)] # summarize standard deviation, standard error and 95% confidence interval # this takes the mean 'Abundance' for each genus at each station for each date df_error_se <- summarySE(df_error, measurevar="Abundance", groupvars=c("Station","Date","Rank5")) # assign minmum values to values that are too small to see on the plot # min=0.005 df_error_se$Abundance[df_error_se$Abundance > 0 & df_error_se$Abundance < 0.005] <- 0.005 ggplot(df_error_se, aes(x=Rank5, y=Abundance)) + geom_bar(stat="identity", position="stack", color="black", width=0.8) + facet_grid(Date~Station) + ylab("mean % COI sequence abundance") + theme(axis.text.x = element_text(angle=-90, hjust=0, vjust=0.4)) + ggtitle("COI distributions") + geom_errorbar(data=df_error_se, aes(y=Abundance, ymin=Abundance-se, ymax=Abundance+se), width=0.5) + scale_y_continuous(limits = c(0, 0.31), expand = c(0, 0)) + theme(aspect.ratio=2/1) ############################################################