######################################
#
# BUSCO summary figure
# @version 4.0.0
# @since BUSCO 2.0.0
# 
# Copyright (c) 2016-2023, Evgeny Zdobnov (ez@ezlab.org)
# Licensed under the MIT license. See LICENSE.md file.
#
######################################

# Load the required libraries
library(ggplot2)
library(grid)

# !!! CONFIGURE YOUR PLOT HERE !!! 
# Output
my_output <- paste("./","busco_figure.pdf",sep="/") 
my_width <- 6.4
my_height <- 4.8
my_unit <- "in"

# Colors
my_colors <- c("#56B4E9", "#3492C7", "#F0E442", "#F04442")
# Bar height ratio
my_bar_height <- 0.75

# Legend
my_title <- "BUSCO v5.5.0 Assessment Results"

# Font
my_family <- "sans"
my_size_ratio <- 1

# !!! SEE YOUR DATA HERE !!! 
# Your data as generated by python, remove or add more
my_species <- c('otoArg', 'otoArg', 'otoArg', 'otoArg', 'troMau', 'troMau', 'troMau', 'troMau', 'rhaChi2', 'rhaChi2', 'rhaChi2', 'rhaChi2', 'copChr', 'copChr', 'copChr', 'copChr', 'astCal', 'astCal', 'astCal', 'astCal', 'rhaChi', 'rhaChi', 'rhaChi', 'rhaChi', 'aulStu', 'aulStu', 'aulStu', 'aulStu', 'mayZeb', 'mayZeb', 'mayZeb', 'mayZeb')
my_species <- factor(my_species, levels = rev(c('astCal', 'mayZeb', 'troMau', 'aulStu', 'otoArg', 'copChr', 'rhaChi', 'rhaChi2')))
#my_species <- factor(my_species,levels(my_species)[c(length(levels(my_species)):1)]) # reorder your species here just by changing the values in the vector :
my_percentage <- c(92.4, 1.2, 1.9, 4.5, 96.8, 1.6, 0.5, 1.1, 92.7, 1.2, 1.3, 4.8, 91.2, 1.3, 2.2, 5.3, 95.6, 1.6, 0.5, 2.3, 96.8, 1.6, 0.5, 1.1, 97.2, 1.1, 0.5, 1.2, 95.3, 3.1, 0.5, 1.1)
my_values <- c(3364, 42, 70, 164, 3523, 57, 20, 40, 3375, 45, 49, 171, 3319, 47, 79, 195, 3481, 58, 18, 83, 3524, 57, 19, 40, 3539, 40, 19, 42, 3468, 112, 17, 43)

######################################
######################################
######################################
# Code to produce the graph
labsize = 1
if (length(levels(my_species)) > 10){
 labsize = 0.66
}
print("Plotting the figure ...")
category <- c(rep(c("S","D","F","M"),c(1)))
category <-factor(category)
category = factor(category,levels(category)[c(4,1,2,3)])
df = data.frame(my_species,my_percentage,my_values,category)

figure <- ggplot() + 
  
  geom_bar(aes(y = my_percentage, x = my_species, fill = category), position = position_stack(reverse = TRUE), data = df, stat="identity", width=my_bar_height) + 
  coord_flip() + 
  theme_gray(base_size = 8) + 
  scale_y_continuous(labels = c("0","20","40","60","80","100"), breaks = c(0,20,40,60,80,100)) + 
  scale_fill_manual(values = my_colors,labels =c(" Complete (C) and single-copy (S)  ",
                                                 " Complete (C) and duplicated (D)",
                                                 " Fragmented (F)  ",
                                                 " Missing (M)")) +   
  ggtitle(my_title) + 
  xlab("") + 
  ylab("% BUSCOs") + 

  theme(plot.title = element_text(family=my_family, hjust=0.5, colour = "black", size = rel(2.2)*my_size_ratio, face = "bold")) + 
  theme(legend.position="top",legend.title = element_blank()) + 
  theme(legend.text = element_text(family=my_family, size = rel(1.2)*my_size_ratio)) + 
  theme(panel.background = element_rect(color="#FFFFFF", fill="white")) + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major = element_blank()) +
  theme(axis.text.y = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio)) + 
  theme(axis.text.x = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio)) + 
  theme(axis.line = element_line(size=1*my_size_ratio, colour = "black")) + 
  theme(axis.ticks.length = unit(.85, "cm")) + 
  theme(axis.ticks.y = element_line(colour="white", size = 0)) + 
  theme(axis.ticks.x = element_line(colour="#222222")) + 
  theme(axis.ticks.length = unit(0.4, "cm")) + 
  theme(axis.title.x = element_text(family=my_family, size=rel(1.2)*my_size_ratio)) + 
  
  guides(fill = guide_legend(override.aes = list(colour = NULL))) +
  guides(fill=guide_legend(nrow=2,byrow=TRUE))
  
  for(i in rev(c(1:length(levels(my_species))))){
    detailed_values <- my_percentage[my_species==my_species[my_species==levels(my_species)[i]]]
    total_buscos <- sum(my_values[my_species==my_species[my_species==levels(my_species)[i]]])
    figure <- figure + 
    annotate("text", label=paste("C:", detailed_values[1] + detailed_values[2], " [S:", detailed_values[1], ", D:", detailed_values[2], "], F:", detailed_values[3], ", M:", detailed_values[4], ", n:", total_buscos, sep=""), 
             y=3, x = i, size = labsize*4*my_size_ratio, colour = "black", hjust=0, family=my_family)
  }

ggsave(figure, file=my_output, width = my_width, height = my_height, unit = my_unit)
