#***********************************************************************************************
# Purpose = Test the IES-richness of the identified paralogs families TRP and GFR
# Plot = box plot with pairwise comparison Kruskal-Wallis
#***********************************************************************************************
library('ggplot2')
library('devtools')
library('easyGgplot2')
library('ggpubr')
library("extrafont")
#font_import()
#fonts()
# Full genes set
GENES_v2 <- read.table(' path to Gene_v2_IES_count_VEG.txt ', h = T, sep = "\t", fill = T, quote = "", stringsAsFactors = F)
GENES_v2$Set <- "Random"
GENES_v2$IES_density <- as.numeric(as.character(GENES_v2$Number_of_IES))/as.numeric(as.character(GENES_v2$Length))*1000
#GENES_v2$IES_density <- as.numeric(as.character(GENES_v2$Number_of_IES))/as.numeric(as.character(GENES_v2$Length))*as.numeric(as.character(GENES_v2$VEG))
IES_Num_Gene_set <- as.numeric(as.character(GENES_v2$Number_of_IES[as.numeric(GENES_v2$Number_of_IES) >= 1]))


# TPR Proteins (Identified with Blastp)
TRP_v2 <- read.table(' path to TRP_v2_IES_count_VEG.txt ', h = T, sep = "\t")
TRP_v2$Set <- "TPR"
TRP_v2$IES_density <- as.numeric(as.character(TRP_v2$Number_of_IES))/as.numeric(as.character(TRP_v2$Length))*1000
#TRP_v2$IES_density <- as.numeric(as.character(TRP_v2$Number_of_IES))/as.numeric(as.character(TRP_v2$Length))*as.numeric(as.character(TRP_v2$VEG))
IES_Num_TPR <- as.numeric(as.character(TRP_v2$Number_of_IES[as.numeric(TRP_v2$Number_of_IES) >= 1]))

# GF Receptors Cys-rich (Identified with Blastp)
GFR_v2 <- read.table(' path to GFR_Cy-rich_IES_count_VEG.txt ', h = T, sep = "\t")
GFR_v2$Set <- "GFR"
GFR_v2$IES_density  <- as.numeric(as.character(GFR_v2$Number_of_IES))/as.numeric(as.character(GFR_v2$Length))*1000
#GFR_v2$IES_density  <- as.numeric(as.character(GFR_v2$Number_of_IES))/as.numeric(as.character(GFR_v2$Length))*as.numeric(as.character(GFR_v2$VEG))
IES_Num_GFR <- as.numeric(as.character(GFR_v2$Number_of_IES[as.numeric(GFR_v2$Number_of_IES) >= 1]))

rando <- sample(GENES_v2$Gene_ID, 150)
GENES_rando_v2 <- GENES_v2[which(GENES_v2$Gene_ID%in%rando),]

# Number of IESs
df <- rbind(GENES_rando_v2[as.numeric(GENES_rando_v2$Number_of_IES) >= 1,], GFR_v2[as.numeric(GFR_v2$Number_of_IES) >= 1,], TRP_v2[as.numeric(TRP_v2$Number_of_IES) >= 1,])
p <- ggboxplot(df, x = "Set", y = "Number_of_IES",
               color = "Set", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
               add = "jitter", shape = "Set", outline = F, outlier.shape=NA, 
               ylim = c(0, 48), xlab = "", ylab = "n° of IESs")

my_comparisons <- list( c("Random", "GFR"), c("Random", "TPR"))
p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", p.adjust.method = "BH", size = 0.6)+ # Add pairwise comparisons p-value
  stat_compare_means(label.x = 0.75, label.y = 49, size = 5) + scale_y_continuous(breaks=seq(0,48,5)) +
  theme(axis.text=element_text(size=25), 
        axis.title.y=element_text(size=30, hjust=0.5, vjust = +100), 
        axis.title.x=element_text(size=30, vjust= -100),
        legend.position = "top",
        legend.text=element_text(size=20),
        plot.margin=unit(c(1,1,1.5,1.2),"cm")) +
  guides(color=guide_legend(title=NULL)) +
  guides(fill=FALSE)

# IES density
df <- rbind(GENES_rando_v2[as.numeric(GENES_rando_v2$Number_of_IES) >= 1,], GFR_v2[as.numeric(GFR_v2$Number_of_IES) >= 1,], TRP_v2[as.numeric(TRP_v2$Number_of_IES) >= 1,])
p <- ggboxplot(df, x = "Set", y = "IES_density",
               color = "Set", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
               add = "jitter", shape = "Set", outline = F, outlier.shape=NA, 
               xlab = "", ylab = "IES density (IESs / kb)")

my_comparisons <- list( c("Random", "GFR"), c("Random", "TPR"))
p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", p.adjust.method = "BH", size = 0.6) + 
  stat_compare_means(label.x = 0.75, label.y = 12.3, size = 5) + scale_y_continuous(breaks=seq(0,15,1)) +
  theme(axis.text=element_text(size=25), 
        axis.title.y=element_text(size=30, hjust=0.5, vjust = +5), 
        axis.title.x=element_text(size=30, vjust= -10),
        legend.position = "top",
        legend.text=element_text(size=20),
        plot.margin=unit(c(1,1,1.5,1.2),"cm")) +
  guides(color=guide_legend(title=NULL)) +
  guides(fill=FALSE)

# Sign32
Sign_v2_32 <- read.table(' path to Sign32_IES_count_VEG.txt ', h = T, sep = "\t", stringsAsFactors = F)
Sign_v2_32$Set <- "32°C"
Sign_v2_32$Gene_ID <- as.character(Sign_v2_32$Gene_ID)
Sign_v2_32$IES_density <- as.numeric(as.character(Sign_v2_32$Number_of_IES))/as.numeric(as.character(Sign_v2_32$Length))*1000
#Sign_v2_32$IES_density <- as.numeric(as.character(Sign_v2_32$Number_of_IES))/as.numeric(as.character(Sign_v2_32$Length))*as.numeric(as.character(Sign_v2_32$VEG))
IES_Num_Sign_v2_32 <- as.numeric(as.character(Sign_v2_32$Number_of_IES[as.numeric(Sign_v2_32$Number_of_IES) >= 1]))

# Sign18
Sign_v2_18 <- read.table(' path to Sign18_IES_count_VEG.txt ', h = T, sep = "\t")
Sign_v2_18$Set <- "18°C"
Sign_v2_18$IES_density <- as.numeric(as.character(Sign_v2_18$Number_of_IES))/as.numeric(as.character(Sign_v2_18$Length))*1000
#Sign_v2_18$IES_density <- as.numeric(as.character(Sign_v2_18$Number_of_IES))/as.numeric(as.character(Sign_v2_18$Length))*as.numeric(as.character(Sign_v2_18$VEG))
IES_Num_Sign_v2_18 <- as.numeric(as.character(Sign_v2_18$Number_of_IES[as.numeric(Sign_v2_18$Number_of_IES) >= 1]))

# Sign25
Sign_v2_25 <- read.table(' path to Sign25_IES_count_VEG.txt ', h = T, sep = "\t")
Sign_v2_25$Set <- "25°C"
Sign_v2_25$IES_density <- as.numeric(as.character(Sign_v2_25$Number_of_IES))/as.numeric(as.character(Sign_v2_25$Length))*1000
#Sign_v2_25$IES_density <- as.numeric(as.character(Sign_v2_25$Number_of_IES))/as.numeric(as.character(Sign_v2_25$Length))*as.numeric(as.character(Sign_v2_25$VEG))
IES_Num_Sign_v2_25 <- as.numeric(as.character(Sign_v2_25$Number_of_IES[as.numeric(Sign_v2_25$Number_of_IES) >= 1]))

## Boxplot and pairwise comparisons
# Number of IESs
df <- rbind(GENES_rando_v2[as.numeric(GENES_rando_v2$Number_of_IES) >= 1,], Sign_v2_25[as.numeric(Sign_v2_25$Number_of_IES) >= 1,], Sign_v2_18[as.numeric(Sign_v2_18$Number_of_IES) >= 1,], Sign_v2_32[as.numeric(Sign_v2_32$Number_of_IES) >= 1,])
p <- ggboxplot(df, x = "Set", y = "Number_of_IES",
               color = "Set", palette =c("#00AFBB", "#87CEEB", "#006400", "#FC4E07"),
               add = "jitter", shape = "Set", outline = F, outlier.shape=NA, 
               ylim = c(0, 55), xlab = "", ylab = "Number of IESs")

my_comparisons <- list( c("Random", "25°C"), c("Random", "18°C"), c("Random", "32°C"))
p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", p.adjust.method = "BH", size = 0.6)+ # Add pairwise comparisons p-value
  stat_compare_means(label.x = 0.80, label.y = 54, size = 5) + scale_y_continuous(breaks=seq(0,55,5)) +
  theme(axis.text=element_text(size=25), 
        axis.title.y=element_text(size=30, hjust=0.5, vjust = +100), 
        axis.title.x=element_text(size=30, vjust= -100),
        legend.position = "top",
        legend.text=element_text(size=20),
        plot.margin=unit(c(1,1,1.5,1.2),"cm")) +
  guides(color=guide_legend(title=NULL)) +
  guides(fill=FALSE)
# IES density
p <- ggboxplot(df, x = "Set", y = "IES_density",
               color = "Set", palette =c("#00AFBB", "#87CEEB", "#006400", "#FC4E07"),
               add = "jitter", shape = "Set", outline = F, outlier.shape=NA, 
               xlab = "", ylab = "IES density")

my_comparisons <- list( c("Random", "25°C"), c("Random", "18°C"), c("Random", "32°C"))
p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", p.adjust.method = "BH", size = 0.6) + 
  stat_compare_means(label.x = 0.80, label.y = 12.4, size = 5) + scale_y_continuous(breaks=seq(0,25,1)) +
  theme(axis.text=element_text(size=25), 
        axis.title.y=element_text(size=30, hjust=0.5, vjust = +5), 
        axis.title.x=element_text(size=30, vjust= -100),
        legend.position = "top",
        legend.text=element_text(size=20),
        plot.margin=unit(c(1,1,1.5,1.2),"cm")) +
  guides(color=guide_legend(title=NULL)) +
  guides(fill=FALSE)
