

source("../headers.R")
source("../functions.R")
source("load_data.R")
source("load_retention.R")



base_img_dir=paste0("images/")
base_result_dir=paste0("results/")

dir.create(base_img_dir,showWarnings=F,  recursive=T)
dir.create(base_result_dir,showWarnings=F,  recursive=T)



img_dir=paste0(base_img_dir,"Clustering/")
dir.create(img_dir,showWarnings=F,  recursive=T)


ies=read.table("../data/internal_eliminated_sequence_PGM_IES51_features.tab",h=T,sep="\t",stringsAsFactor=F)
rownames(ies)=ies$ID

dated=read.table("../data/date_internal_eliminated_sequence_MIC_tetraurelia.pt_51.tab",h=T,sep="\t",stringsAsFactor=F)
dated=dated[,c("ID_PGM","LCA_ageclass","LCA_age")]
dated=dated[!is.na(dated$LCA_ageclass) & !is.na(dated$ID_PGM),]
rownames(dated)=dated$ID_PGM


pcolors=rev(brewer.pal(9,"Reds"))

col_fun = colorRamp2(seq(0,1,length.out=9), pcolors)

col_fun_bw = colorRamp2(seq(0,1,length.out=9), rev(brewer.pal(9,"Greys")))
col_fun_orange = colorRamp2(seq(0,1,length.out=9), rev(c("white",brewer.pal(8,"YlOrBr"))))
col_fun_reds = colorRamp2(seq(0,1,length.out=30), rev(colorRampPalette(c("white", "orange","red"))(30)))
col_fun_blues = colorRamp2(seq(0,1,length.out=30), rev(colorRampPalette(c("white", "midnightblue"))(30)))


es=1-scores[,paste0("RETENTION_SCORE_",labels)]
colnames(es)=labels

df=es
#df=head(es,n=1000)


write.table(df,"IES_Excision_Score.tsv",sep="\t",quote=F)


#
##################



mat <- as.matrix(cor(es))
pdf(paste0(img_dir,"/pheatmap_samples.pdf"))
pheatmap(mat)
dev.off()

library(ComplexHeatmap)
library(circlize)

nb_colors=10
col_fun = colorRamp2(seq(0,1,length.out=nb_colors), c(colorRampPalette(brewer.pal(9,"Blues"))(nb_colors)))


library(dendsort)
orderedDend = dendsort(hclust(dist(mat)))
col_dend = dendsort(hclust(dist(t(mat))))

h<-hclust(dist(mat))
orderedDend <- reorder(as.dendrogram(h),c(1:max(h$order)))


pdf(paste0(img_dir,"/Heatmap_samples.pdf"),width=8)
Heatmap(mat, show_column_dend = FALSE, show_row_dend = TRUE,border=TRUE,
               row_title_rot = 0,name="Correlation", 
               cluster_rows = orderedDend, cluster_columns = orderedDend, 
                show_row_names=TRUE, row_names_side = "right", col =col_fun,      
                heatmap_legend_param = list(at = seq(0,1,0.2),labels = seq(0,1,0.2))
                )
dev.off()

#
#######################

groups=data.frame(ID=rownames(df[apply(df[,frag_labels],1,min)< 0.8,]),GROUP_NAME="None")
df=df[setdiff(rownames(df),groups$ID),]
nrow(df)

nb_groups=4
group_names=c("Very early","Early","Intermediate","Late")
#gcolors=c(brewer.pal(nb_groups,"Set1"),"grey")

gcolors=c(brewer.pal(5,"Set1")[-2],"grey")
gcolors=c("#E41A1C","#FF7F00","#4DAF4A","#377EB8","grey")
gcolors=c("#E41A1C","#FF7F00","#4DAF4A","dodgerblue","grey")
names(gcolors)=c(group_names,"None")

set.seed(1234)
kclus <- kmeans(df, nb_groups, iter.max=100)

par(mfrow=c(2,2))
for(g in 1:nb_groups) {
    
    boxplot(df[names(kclus$cluster[kclus$cluster==g]),],ylim=c(0,1),main=paste("Group ",g," : ",group_names[g]," N=",length(names(kclus$cluster[kclus$cluster==g]))),outline=F)
}

group_idx=c(1,2,4,3)
group_names=group_names[group_idx]
#names(gcolors)=c(group_names,"None")


for(g in group_idx) {
    gids=names(kclus$cluster[kclus$cluster==g])
    groups=rbind(groups,data.frame(ID=gids,GROUP_NAME=group_names[g]))
}
rownames(groups)=groups$ID

group_names=c("Very early","Early","Intermediate","Late")


table(groups$GROUP_NAME)
round(table(groups$GROUP_NAME)/nrow(groups)*100,1)
write.table(groups,"IES_Groups.tsv",sep="\t",quote=F)

par(mfrow=c(2,2))
for(g in group_idx) {
    
    boxplot(df[names(kclus$cluster[kclus$cluster==g]),],ylim=c(0,1),main=paste("Group ",g," : ",group_names[g]," N=",length(names(kclus$cluster[kclus$cluster==g]))),outline=F,col=gcolors[group_names[g]])
}




split <- factor(kclus$cluster, levels=group_idx)
hm<-Heatmap(df, 
            split=split, cluster_row_slices = FALSE,show_row_names=F, row_title =" ",
            column_order = labels,
            col =col_fun,name="ES")


pdf(paste0(img_dir,"HeatMap_IES_Excision_Score.pdf"))
hm<-draw(hm)
dev.off() 



#~ hmlist=NULL
#~ for(gname in c("Very early","Early","Intermediate","Late","None")) {
#~     group_ies_ids= groups[groups$GROUP_NAME==gname,]$ID
    
#~     hm<-Heatmap(es[group_ies_ids,], show_column_dend = FALSE, show_row_dend = FALSE,border=TRUE,
#~                 column_order = labels,
#~                 row_title =gname,
#~                 show_row_names=FALSE, row_names_side = "right",name=gname, col =col_fun_bw,
#~                 show_heatmap_legend =gname==group_names[1],   
#~                 border_gp=gpar(col=gcolors[gname],lty=2,lwd=2),             
#~                 use_raster=TRUE
#~                 )
#~     #draw(hm)       
#~     if(is.null(hmlist)) {
#~         hmlist=hm
#~     } else {
#~         hmlist=add_heatmap(hmlist, hm, direction = "vertical")
#~     }    
#~ }

#~ pdf(paste0(img_dir,"HeatMap_IES_Excision_Score_BW.pdf"))
#~ draw(hmlist)
#~ dev.off()  

 

#~ hmlist=NULL
#~ for(gname in c("Very early","Early","Intermediate","Late")) {
#~     group_ies_ids= groups[groups$GROUP_NAME==gname,]$ID
    
#~     hm<-Heatmap(es[group_ies_ids,], show_column_dend = FALSE, show_row_dend = FALSE,border=TRUE,
#~                 column_order = labels,
#~                 row_title =gname,
#~                 show_row_names=FALSE, row_names_side = "right",name=gname, col =col_fun_orange,
#~                 show_heatmap_legend =gname==group_names[1],   
#~                 border_gp=gpar(col=gcolors[gname],lty=1,lwd=2),             
#~                 use_raster=TRUE
#~                 )
#~     #draw(hm)       
#~     if(is.null(hmlist)) {
#~         hmlist=hm
#~     } else {
#~         hmlist=add_heatmap(hmlist, hm, direction = "vertical")
#~     }    
#~ }

#~ pdf(paste0(img_dir,"HeatMap_IES_Excision_Score_Orange.pdf"))
#~ draw(hmlist)
#~ dev.off()  

supTable=merge(cbind(groups,GROUP_RANK=NA),es,by.x="ID",by.y="row.names",all=T)
rownames(supTable)=supTable$ID
r=1

head(supTable)

hmlist=NULL
for(gname in c("Very early","Early","Intermediate","Late")) {
    group_ies_ids= groups[groups$GROUP_NAME==gname,]$ID
    hm=NULL
    group_es=es[group_ies_ids,]
    hm<-Heatmap(group_es, show_column_dend = FALSE, show_row_dend = FALSE,border=TRUE,
                column_order = labels,
                row_title =gname,
                show_row_names=FALSE, row_names_side = "right",name=gname, col =col_fun_blues,
                show_heatmap_legend =gname==group_names[1],   
                border_gp=gpar(col=gcolors[gname],lty=1,lwd=3),             
                use_raster=TRUE
                )
        
    if(is.null(hmlist)) {
        hmlist=hm
    } else {
        hmlist=add_heatmap(hmlist, hm, direction = "vertical")
    }   
    
    hm=draw(hm)
    supTable[rownames(group_es[row_order(hm),]),]$GROUP_RANK =seq(r,length(group_ies_ids)+r-1,1)
    r=max(supTable$GROUP_RANK,na.rm=T)+1
    
}

pdf(paste0(img_dir,"HeatMap_IES_Excision_Score_Blues.pdf"))
draw(hmlist)
dev.off()  

supTable=supTable[order(supTable$GROUP_NAME,supTable$ID),]
supTable=supTable[order(supTable$GROUP_RANK),]

head(supTable)
write.table(supTable,"SupTableS5.tsv",sep="\t",quote=F,row.names=F)



par(mfrow=c(2,2),mar=c(8,4,4,2))
for(g in group_idx) {
    gids=names(kclus$cluster[kclus$cluster==g])
    boxplot(df[gids,],ylim=c(0,1),main=paste0(group_names[g]," N=",length(gids)),las=2,outline=F)
}











gp_list=list()
for(gname in c("Very early","Early","Intermediate","Late")) {
    group_ies_ids= groups[groups$GROUP_NAME==gname,]$ID
    df = 1-scores[group_ies_ids,paste0("RETENTION_SCORE_",labels)]
    colnames(df)=labels
    df$ID=rownames(df)
    df.m <- reshape2::melt(df, id.vars = "ID", na.rm = TRUE)
    gp_irs<-ggplot(df.m, aes(x = variable, y = value, fill=variable)) +
       coord_cartesian(ylim=c(0,1)) +
        scale_color_manual(values = c("white") ) +
   scale_fill_manual(values = rep(as.vector(gcolors[gname]),ncol(df)-1)) +
        geom_violin(trim=TRUE,scale="width",  color="white") +
        stat_summary(fun = median, fun.min = median, fun.max = median,
                 geom = "crossbar", width = 0.3,col="black")+
       labs(title=paste0(gname," N=",length(group_ies_ids)),x="", y = "IES Excision Score",size=13,color = "black") +  
       theme(text=element_text(size=13,color = "black"),legend.position="none",panel.background = element_blank(),
          axis.line = element_line(color = "black"),
          axis.text.y = element_text(size=13,color = "black"),
          axis.text.x = element_text(angle = 45, hjust = 1,size=13,color = "black")
          )
    gp_irs     
    ggsave(paste0(img_dir,"violin_plot_IES_Excision_Score_Group_",sub(" ","_",group_names[g]),".pdf"), plot = gp_irs, width = 22, height = 15, units = "cm")    
    print(gname)
    print(apply(df[,labels],2,median))
    gp_list[[gname]]=gp_irs
}
pdf(paste0(img_dir,"/violin_plot_IES_Excision_Score_Groups.pdf"),width=10,height=10)
grid.arrange(grobs = gp_list, ncol = 2, nrow = 2)
dev.off()




