####boxplot of captive vs free living keto cardinals### library(ggplot2) library(dplyr) library(tidyr) library(patchwork) install.packages("rstatix") library(rstatix) install.packages("effectsize") # only run if not already installed library(effectsize) datumP=read.csv(file.choose()) #NOCA ZACH+AST head(datumP) # Keep other datasets as they are (no changes for unknown carotenoids) filtered_dataN <- datumP %>% filter(Sample == "Follicle", Age == "AHY", Sex == "Male", Color == "Non-Keto", Captivity.status != "Before", Concentration > 0) head(filtered_dataN) filtered_dataK <- datumP %>% filter(Sample == "Follicle", Age == "AHY", Sex == "Male", Color == "Keto", Captivity.status != "Before", Concentration > 0) filtered_dataKC <- datumP %>% filter(Sample == "Follicle", Age == "AHY", Sex == "Male", Color == "Keto", Captivity.status != "Before", Carotenoids != "Canthaxanthin", Concentration > 0) filtered_dataYC <- datumP %>% filter(Sample == "Follicle", Age == "AHY", Sex == "Male", Color == "Non-Keto", Captivity.status != "Before", Carotenoids %in% c("Canary Xanthophyll A", "Canary Xanthophyll B"), Concentration > 0) filtered_dataYD <- datumP %>% filter(Sample == "Follicle", Age == "AHY", Sex == "Male", Color == "Non-Keto", Captivity.status != "Before", Carotenoids %in% c("Lutein", "Zeaxanthin"), Concentration > 0) head(filtered_dataYD) # Summarize function to calculate total concentration by ID and Captivity.status summarize_data <- function(df) { df %>% filter(Captivity.status %in% c("Wild", "After")) %>% group_by(ID, Captivity.status) %>% summarise(total_concentration = sum(Concentration, na.rm = TRUE), .groups = "drop") %>% mutate(Captivity.status = recode(Captivity.status, "After" = "Captive", "Wild" = "Free-living")) } # Summarize all datasets dataK <- summarize_data(filtered_dataK) dataKC <- summarize_data(filtered_dataKC) dataN <- summarize_data(filtered_dataN) dataYC <- summarize_data(filtered_dataYC) dataYD <- summarize_data(filtered_dataYD) # Create a small dataframe for M04 and M10 add_rows <- datumF %>% filter(ID %in% c("LL"), Sample == "Plasma", Age == "AHY", Sex == "Male", Captivity.status != "Before") %>% distinct(ID, Captivity.status) %>% mutate( total_concentration = 0, Captivity.status = recode(Captivity.status, "After" = "Captive", "Wild" = "Free-living") ) # Combine with dataYC dataYC <- bind_rows(dataYC, add_rows) # Wilcoxon p-value function get_p_label <- function(df) { p <- wilcox.test(total_concentration ~ Captivity.status, data = df)$p.value if (p <= 0.001) return("***") if (p <= 0.01) return("**") if (p <= 0.05) return("*") return("ns") } head(dataKC) # Assign significance labels sigYC <- get_p_label(dataYC) sigYD <- get_p_label(dataYD) sigN <- get_p_label(dataN) sigKC <- get_p_label(dataKC) sigK <- get_p_label(dataK) # Define color schemes keto_colors <- c("Free-living" = "red3", "Captive" = "firebrick4") nonketo_colors <- c("Free-living" = "gold", "Captive" = "khaki") # Plot function make_plot <- function(data, title, y_label, ylims, fill_colors, show_legend = FALSE, sig_label = NULL) { p <- ggplot(data, aes(x = Captivity.status, y = total_concentration, fill = Captivity.status)) + geom_boxplot(width = 0.2, outlier.shape = NA, alpha = 0.7) + geom_jitter(shape = 21, width = 0.1, size = 2.5, color = "black", stroke = 0.2) + labs(title = title, x = NULL, y = y_label) + scale_fill_manual(values = fill_colors) + scale_y_continuous(limits = ylims) + theme_minimal(base_size = 14) + theme( panel.grid.major.y = element_line(color = "gray85"), panel.grid.minor.y = element_blank(), panel.grid.major.x = element_blank(), axis.line = element_line(color = "black"), axis.ticks = element_line(color = "black"), axis.title.y = element_text(size = 14, face = "bold"), axis.text = element_text(size = 12), plot.title = element_text(hjust = 0.5, size = 13), legend.position = if (show_legend) "right" else "none" ) if (!is.null(sig_label)) { p <- p + annotate("text", x = 1.5, y = ylims[2] * 0.95, label = sig_label, size = 5, fontface = "bold") } return(p) } # Y-axis limits keto_ylim <- c(-.01, 210.05) nonketo_ylim <- c(-0.01, 50.15) # Build plots plotK <- make_plot(dataK, "Total Ketocarotenoids", "Ketocarotenoid (µg/g)", keto_ylim, keto_colors, show_legend = TRUE, sig_label = sigK) plotKC <- make_plot(dataKC, "Ketocarotenoids without Canthaxanthin", "", keto_ylim, keto_colors,sig_label = sigKC) plotN <- make_plot(dataN, "Total Non-Ketocarotenoids", "Non-Ketocarotenoid (µg/g)", nonketo_ylim, nonketo_colors, show_legend = TRUE,sig_label = sigN) plotYC <- make_plot(dataYC, "Canary Xanthophylls", "", caxa_ylim, nonketo_colors, sig_label = sigYC) plotYD <- make_plot(dataYD, "Lutein and Zeaxanthin", "", nonketo_ylim, nonketo_colors, sig_label = sigYD) # Final patchwork layout final_plotP <- ( (plotK | plotKC) / (plotN | plotYD | plotYC) ) + plot_annotation( tag_levels = 'A', theme = theme(plot.title = element_text(hjust = 0.5, size = 16), plot.caption = element_text(hjust = 0.5, size = 14, face = "bold") ), caption = "Treatment" )& theme(legend.position = "none") print(final_plotP) #####STATS TIME#### # Total Ketocarotenoids # Example for Total Ketocarotenoids cat("Wilcoxon test for Total Ketocarotenoids:\n") resK <- dataK %>% wilcox_test(total_concentration ~ Captivity.status) %>% add_significance() print(resK) # Compute effect size (rank biserial correlation) effK <- rank_biserial(dataK$total_concentration, dataK$Captivity.status) print(effK) #KETO Without canthax cat("Wilcoxon test for Ketocarotenoids without canthax:\n") resKC <- dataKC %>% wilcox_test(total_concentration ~ Captivity.status) %>% add_significance() print(resKC) # Get median values by Captivity.status for dataKC dataKC %>% group_by(Captivity.status) %>% summarise(median_concentration = median(total_concentration, na.rm = TRUE), n = n()) # Compute effect size (rank biserial correlation) effKC <- rank_biserial(dataKC$total_concentration, dataKC$Captivity.status) print(effKC) #non keto## #Total nonKETO Without cat("Wilcoxon test for total non keto:\n") resN <- dataN %>% wilcox_test(total_concentration ~ Captivity.status) %>% add_significance() print(resN) # Compute effect size (rank biserial correlation) effN <- rank_biserial(dataN$total_concentration, dataN$Captivity.status) print(effN) #Total nonKETO Without cat("Wilcoxon test for total non keto:\n") resYC <- dataYC %>% wilcox_test(total_concentration ~ Captivity.status) %>% add_significance() print(resYC) head(dataYC) # Compute effect size (rank biserial correlation) effYC <- rank_biserial(dataYC$total_concentration, dataYC$Captivity.status) print(effYC) cat("Wilcoxon test for dietary:\n") resYD <- dataYD %>% wilcox_test(total_concentration ~ Captivity.status) %>% add_significance() print(resYD) head(dataYD) # Compute effect size (rank biserial correlation) effYD <- rank_biserial(dataYD$total_concentration, dataYD$Captivity.status) print(effYD)