

source("../headers.R")
source("../functions.R")

library(seqinr)
base_img_dir=paste0("images/")
base_result_dir=paste0("results/")

dir.create(base_img_dir,showWarnings=F,  recursive=T)
dir.create(base_result_dir,showWarnings=F,  recursive=T)


ies=read.table("/data/PARAMECIUM/GENOMIC/tetraurelia/micronucleus/IES/51/internal_eliminated_sequence_PGM_IES51_features.tab",h=T,sep="\t",stringsAsFactor=F)
rownames(ies)=ies$ID

ies$CINDEX= apply(ies[,c("ID","IES_SEQ")],1,cindex,add_TA=FALSE)

internal_ies_basename="internal_IES.pt_51_with_ies"
internal_IES=read.table(paste0("internal_IES/",internal_ies_basename,".tsv"),h=T,sep="\t",stringsAsFactor=F)
internal_IES$CINDEX= apply(internal_IES[,c("NAME","SEQ")],1,cindex)
rownames(internal_IES)=internal_IES$NAME

encompassing_ies_ids=unique(internal_IES$IES_ID)

imp_ies_basename="IES_on_Imprecisely_Eliminated_Regions.mic2"
imp_IES=read.table(paste0("imprecise_EliminatedRegions/",imp_ies_basename,".tsv"),h=T,sep="\t",stringsAsFactor=F)

imp_IES$CINDEX= apply(imp_IES[,c("NAME","SEQ")],1,cindex)


imp_IRS=read.table("imprecise_EliminatedRegions/imp_IRS.tsv",h=T,sep="\t",stringsAsFactor=F)
IRS=read.table("internal_IES/IRS.tsv",h=T,sep="\t",stringsAsFactor=F)
internal_IRS=read.table("internal_IES/internal_IRS.tsv",h=T,sep="\t",stringsAsFactor=F)



colors=c("black","red","orange","#66B400","dodgerblue")
colors=c("black",brewer.pal(4,"Dark2"))
names(colors)=c("All","Nested","Juxtaposed","Encompassing","Imp")

colors=c("black","#1B9E77","#7570B3","#E7298A")
names(colors)=c("All","Internal","Encompassing","Imp")

# length distribution
pdf(paste0(base_img_dir,"ecdf_length_IES_in_MIC-limited.pdf"))
nbs=c(nrow(ies))
plot(ecdf(ies$SIZE),xlim=c(0,200),xlab="Size (nt)",main="",ylab="Cumulative fraction",col=colors["All"])

nbs=c(nbs,nrow(internal_IES))
lines(ecdf(internal_IES$LENGTH),col=colors["Internal"])


#~ nbs=c(nbs,nrow(internal_IES[internal_IES$TYPE=="Nested",]))
#~ lines(ecdf(internal_IES[internal_IES$TYPE=="Nested",]$LENGTH),col=colors["Nested"])


#~ nbs=c(nbs,nrow(internal_IES[internal_IES$TYPE=="Juxtaposed",]))
#~ lines(ecdf(internal_IES[internal_IES$TYPE=="Juxtaposed",]$LENGTH),col=colors["Juxtaposed"])

nbs=c(nbs,nrow(internal_IES))
lines(ecdf(internal_IES$DIFF_LENGTH),col=colors["Encompassing"])



nbs=c(nbs,nrow(imp_IES))
lines(ecdf(imp_IES$LENGTH),col=colors["Imp"])

#legend("right",legend=paste0(c("All IES","Nested IESs","Juxtaposed IES","Encompassing IES","Imp IES")," N=",nbs),lwd=2,col=colors,bty="n")
dev.off()





# sensitivity
coverage_cutoff=10
bin=50
for(cname in c("EZL1")) {
    
    img_dir=paste0(base_img_dir,"/",cname,"/")

    dir.create(img_dir,showWarnings=F,  recursive=T)
    
    # internal IES
#~     internal_IRS_covered=internal_IRS[internal_IRS[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
#~     h<-hist(internal_IRS_covered[,paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,1/bin),plot=F)
#~     lines(h$mids,h$density,lwd=2,col=colors["Internal"])
#~      print(max(h$density))









    results=data.frame()
    pdf(paste0(img_dir,"hist_IRS_",cname,"_v2.pdf"),height=7,width=4)
    par(mfrow=c(3,1),xpd=T,mar=c(2.1,2.1,2.1,2.1))

    # internal 
    internal_IRS_cur_covered=internal_IRS[internal_IRS[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    h<-hist(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,1/bin),col=colors["Internal"],border="white",xlab="",main="")
    for(b in c(1,bin)) {
        text(h$mids[b],h$counts[b]+3,paste0(round(h$counts[b]/sum(h$counts)*100,0),"%"))
    }  
    results=rbind(results,c(nrow(internal_IRS),nrow(internal_IRS_cur_covered))) 
        
    # Encompassing
   
    h<-hist(IRS[encompassing_ies_ids,paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,1/bin),col="mediumpurple",border="white",xlab="",main="")
    hist(IRS[intersect(encompassing_ies_ids,rownames(IRS[IRS[,paste0("SIGNIFICANT_",cname)],])),paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,1/bin),col=colors["Encompassing"],border="white",xlab="",main="",add=T)
    
    for(b in c(1,bin)) {
        text(h$mids[b],h$counts[b]+3,paste0(round(h$counts[b]/sum(h$counts)*100,0),"%"))
    } 
    results=rbind(results,c(length(encompassing_ies_ids),length(encompassing_ies_ids)))
    
    # Imp IES
    imp_IRS_covered=imp_IRS[imp_IRS[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    h<-hist(imp_IRS_covered[,paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,1/bin),col=colors["Imp"],border="white",xlab="",main="")
    for(b in c(1,bin)) {
        text(h$mids[b],h$counts[b]+3,paste0(round(h$counts[b]/sum(h$counts)*100,0),"%"))
    }  
    results=rbind(results,c(nrow(imp_IRS),nrow(imp_IRS_covered)))
        
    dev.off()
    rownames(results)=c("Internal","Encompassing","Imp")
    colnames(results)=c("Nb","NbCovered")
    
    

    internal_IRS_cur_covered=internal_IRS[internal_IRS[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    h<-hist(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,1/bin),col=colors["Internal"],border="white",xlab="",main="")

    pdf(paste0(img_dir,"boxplot_length_internal_IRS_",cname,".pdf"),height=6,width=3)
    boxplot(list(
    "IRS~0"=internal_IES[rownames(internal_IRS_cur_covered[ internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] <h$breaks[2] ,]),]$LENGTH,
    "IRS~1"=internal_IES[rownames(internal_IRS_cur_covered[ internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] >h$breaks[bin] ,]),]$LENGTH
    ),outline=F,col=colors["Internal"],ylab="IES length (nt)",log="y",las=2
    )
     dev.off()

    



    results=data.frame()
    pdf(paste0(img_dir,"hist_IRS_",cname,".pdf"),height=5,width=4)
    par(mfrow=c(3,1),xpd=T,mar=c(2.1,2.1,2.1,2.1))
    # Imp IES
    imp_IRS_covered=imp_IRS[imp_IRS[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    h<-hist(imp_IRS_covered[,paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,1/bin),col=colors["Imp"],border="white",xlab="",main="")
    for(b in c(1,bin)) {
        text(h$mids[b],h$counts[b]+3,paste0(round(h$counts[b]/sum(h$counts)*100,0),"%"))
    }  
    results=rbind(results,c(nrow(imp_IRS),nrow(imp_IRS_covered)))

    # internal IES bigger the encompassing
    cur_ies_ids=internal_IES[internal_IES$LENGTH>=internal_IES$DIFF_LENGTH,]$NAME
    internal_IRS_cur=internal_IRS[cur_ies_ids,]
    internal_IRS_cur_covered=internal_IRS_cur[internal_IRS_cur[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    
    h<-hist(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,1/bin),col=colors["Internal"],border="white",xlab="",main="")
    for(b in c(1,bin)) {
        text(h$mids[b],h$counts[b]+3,paste0(round(h$counts[b]/sum(h$counts)*100,0),"%"))
    }  
    results=rbind(results,c(nrow(internal_IRS_cur),nrow(internal_IRS_cur_covered)))  
    
    
    # internal IES smaller the encompassing
    cur_ies_ids=internal_IES[internal_IES$LENGTH<internal_IES$DIFF_LENGTH,]$NAME
    internal_IRS_cur=internal_IRS[cur_ies_ids,]
    internal_IRS_cur_covered=internal_IRS_cur[internal_IRS_cur[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    
    h<-hist(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,1/bin),col=colors["Internal"],border="white",xlab="",main="")
    for(b in c(1,bin)) {
        text(h$mids[b],h$counts[b]+3,paste0(round(h$counts[b]/sum(h$counts)*100,0),"%"))
    }
    results=rbind(results,c(nrow(internal_IRS_cur),nrow(internal_IRS_cur_covered)))  
    dev.off()
    

}








retained=list()

results=c()
for(cname in c("PGM","EZL1","TFIIS4","DCL23","DCL5")) {
    
    img_dir=paste0(base_img_dir,"/",cname,"/")

    dir.create(img_dir,showWarnings=F,  recursive=T)

    
    # all IES
    res=c(sum(IRS[,paste0("SIGNIFICANT_",cname)]),sum(!IRS[,paste0("SIGNIFICANT_",cname)]),0,0)
    results=rbind(results,c(cname,"All",nrow(IRS),round(res/sum(res)*100,1)))
    expected=c(res[1],res[2])
    
    pdf(paste0(img_dir,"hist_IRS_","AllIES","_",cname,".pdf"))
    hist(IRS[,paste0("RETENTION_SCORE_",cname)],xlab=paste0("IRS ",cname),breaks=seq(0,1,0.01),main=paste0("Canonical IESs"," N=",sum(res[1:3])))
    hist(IRS[IRS[,paste0("SIGNIFICANT_",cname)],paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,0.01),add=T,col="indianred")
    dev.off()
    
    
    # encompassing IES
    res=c(sum(IRS[encompassing_ies_ids,paste0("SIGNIFICANT_",cname)]),sum(!IRS[encompassing_ies_ids,paste0("SIGNIFICANT_",cname)]),0,0)
    
    pdf(paste0(img_dir,"hist_IRS_","EncompassingIES","_",cname,".pdf"))
    hist(IRS[encompassing_ies_ids,paste0("RETENTION_SCORE_",cname)],xlab=paste0("IRS ",cname),breaks=seq(0,1,0.01),main=paste0("Encompassing IESs"," N=",sum(res[1:3])))
    hist(IRS[intersect(encompassing_ies_ids,IRS[ IRS[,paste0("SIGNIFICANT_",cname)],]$ID),paste0("RETENTION_SCORE_",cname)],breaks=seq(0,1,0.01),add=T,col="indianred")
    dev.off()
    
    
    results=rbind(results,c(cname,"Encompassing",length(encompassing_ies_ids),round(res/sum(res)*100,1)))
    


    # internal IES
    internal_IRS_covered=internal_IRS[internal_IRS[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    res=c(sum(internal_IRS_covered[,paste0("RETENTION_SCORE_",cname)] >0.8),
        sum(internal_IRS_covered[,paste0("RETENTION_SCORE_",cname)] <0.2))
    res=c(res,nrow(internal_IRS_covered)-sum(res),nrow(internal_IRS)-nrow(internal_IRS_covered))
    
    pdf(paste0(img_dir,"hist_IRS_","internalIES","_",cname,".pdf"))
    h<-hist(internal_IRS_covered[,paste0("RETENTION_SCORE_",cname)],xlab=paste0("IRS ",cname),breaks=seq(0,1,0.01),main=paste0("internal IESs\n","N=",sum(res[1:3])," + ",res[4]," not covered"))
    abline(v=c(0.2,0.8),lty=2)
    text(c(0.9,0.1,0.5),max(h$counts),paste0(round(res[1:3]/sum(res)*100,1),"%"))
    dev.off()
    
    
    results=rbind(results,c(cname,"Internal",nrow(internal_IRS),round(res/sum(res)*100,1)))
    
    retained[[cname]] =rownames(internal_IRS_covered[internal_IRS_covered[,paste0("RETENTION_SCORE_",cname)] >0.8,])


    # internal IES : Nested
    cur_ies_ids=internal_IES[internal_IES$TYPE=="Nested",]$NAME
    internal_IRS_cur=internal_IRS[cur_ies_ids,]
    internal_IRS_cur_covered=internal_IRS_cur[internal_IRS_cur[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    res=c(sum(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] >0.8),
        sum(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] <0.2))
    res=c(res,nrow(internal_IRS_cur_covered)-sum(res),nrow(internal_IRS_cur)-nrow(internal_IRS_cur_covered))
    
    pdf(paste0(img_dir,"hist_IRS_","NestedIES","_",cname,".pdf"))
    h<-hist(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)],xlab=paste0("IRS ",cname),breaks=seq(0,1,0.01),main=paste0("Nested IESs\n","N=",sum(res[1:3])," + ",res[4]," not covered"))
    abline(v=c(0.2,0.8),lty=2)
    text(c(0.9,0.1,0.5),max(h$counts),paste0(round(res[1:3]/sum(res)*100,1),"%"))
    dev.off()
    
    results=rbind(results,c(cname,"Internal (Nested)",nrow(internal_IRS_cur),round(res/sum(res)*100,1)))
  
    # internal IES : Juxtaposed
    cur_ies_ids=internal_IES[internal_IES$TYPE=="Juxtaposed",]$NAME
    internal_IRS_cur=internal_IRS[cur_ies_ids,]
    internal_IRS_cur_covered=internal_IRS_cur[internal_IRS_cur[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    res=c(sum(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] >0.8),
        sum(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] <0.2))
    res=c(res,nrow(internal_IRS_cur_covered)-sum(res),nrow(internal_IRS_cur)-nrow(internal_IRS_cur_covered))
    
    pdf(paste0(img_dir,"hist_IRS_","JuxtaposedIES","_",cname,".pdf"))
    h<-hist(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)],xlab=paste0("IRS ",cname),breaks=seq(0,1,0.01),main=paste0("Juxtaposed IESs\n","N=",sum(res[1:3])," + ",res[4]," not covered"))
    abline(v=c(0.2,0.8),lty=2)
    text(c(0.9,0.1,0.5),max(h$counts),paste0(round(res[1:3]/sum(res)*100,1),"%"))
    dev.off()
   
    results=rbind(results,c(cname,"Internal (Juxtaposed)",nrow(internal_IRS_cur),round(res/sum(res)*100,1)))
 


    # internal IES bigger the encompassing
    cur_ies_ids=internal_IES[internal_IES$LENGTH>=internal_IES$DIFF_LENGTH,]$NAME
    internal_IRS_cur=internal_IRS[cur_ies_ids,]
    internal_IRS_cur_covered=internal_IRS_cur[internal_IRS_cur[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    res=c(sum(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] >0.8),
        sum(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] <0.2))
    res=c(res,nrow(internal_IRS_cur_covered)-sum(res),nrow(internal_IRS_cur)-nrow(internal_IRS_cur_covered))
    
    pdf(paste0(img_dir,"hist_IRS_","biggerEncompIES","_",cname,".pdf"))
    h<-hist(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)],xlab=paste0("IRS ",cname),breaks=seq(0,1,0.01),main=paste0("internal IES bigger than encompassing\n","N=",sum(res[1:3])," + ",res[4]," not covered"))
    abline(v=c(0.2,0.8),lty=2)
    text(c(0.9,0.1,0.5),max(h$counts),paste0(round(res[1:3]/sum(res)*100,1),"%"))
    dev.off()
   
    
    results=rbind(results,c(cname,"Internal (bigger than encompassing)",nrow(internal_IRS_cur),round(res/sum(res)*100,1)))
  
    # internal IES smaller the encompassing
    cur_ies_ids=internal_IES[internal_IES$LENGTH<internal_IES$DIFF_LENGTH,]$NAME
    internal_IRS_cur=internal_IRS[cur_ies_ids,]
    internal_IRS_cur_covered=internal_IRS_cur[internal_IRS_cur[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    res=c(sum(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] >0.8),
        sum(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)] <0.2))
    res=c(res,nrow(internal_IRS_cur_covered)-sum(res),nrow(internal_IRS_cur)-nrow(internal_IRS_cur_covered))
    
    pdf(paste0(img_dir,"hist_IRS_","smallerEncompIES","_",cname,".pdf"))
    h<-hist(internal_IRS_cur_covered[,paste0("RETENTION_SCORE_",cname)],xlab=paste0("IRS ",cname),breaks=seq(0,1,0.01),main=paste0("internal IES smaller than encompassing\n","N=",sum(res[1:3])," + ",res[4]," not covered"))
    abline(v=c(0.2,0.8),lty=2)
    text(c(0.9,0.1,0.5),max(h$counts),paste0(round(res[1:3]/sum(res)*100,1),"%"))
    dev.off()
   
    results=rbind(results,c(cname,"Internal (smaller than encompassing)",nrow(internal_IRS_cur),round(res/sum(res)*100,1)))
      
    

    # Imp IES
    imp_IRS_covered=imp_IRS[imp_IRS[,paste0("SUPPORT_",cname)] > coverage_cutoff,]
    res=c(sum(imp_IRS_covered[,paste0("RETENTION_SCORE_",cname)] >0.8),
        sum(imp_IRS_covered[,paste0("RETENTION_SCORE_",cname)] <0.2))
    res=c(res,nrow(imp_IRS_covered)-sum(res),nrow(imp_IRS)-nrow(imp_IRS_covered))
    
    
    pdf(paste0(img_dir,"hist_IRS_","ImpIES","_",cname,".pdf"))
    h<-hist(imp_IRS_covered[,paste0("RETENTION_SCORE_",cname)],xlab=paste0("IRS ",cname),breaks=seq(0,1,0.01),main=paste0("Imp IES\n","N=",sum(res[1:3])," + ",res[4]," not covered"))
    abline(v=c(0.2,0.8),lty=2)
    text(c(0.9,0.1,0.5),max(h$counts),paste0(round(res[1:3]/sum(res)*100,1),"%"))
    dev.off()
   
    
    results=rbind(results,c(cname,"Imp",nrow(imp_IRS),round(res/sum(res)*100,1)))

}

results[is.na(results)]=0
  
colnames(results)=c("RNAi","Category","Number","Retained","Not Retained","Unknown","Not covered")

results=results[order(results[,c("Category")],results[,c("RNAi")]),]


write.table(results,"IES_MIC-limited_sensitivity.tsv",sep="\t",quote=F,row.names=F,col.names=T)
























prop=data.frame(as.numeric(results[results[,"RNAi"]=="EZL1","Retained"]),as.numeric(results[results[,"RNAi"]=="EZL1","Number"]))
rownames(prop)=results[results[,"RNAi"]=="EZL1","Category"]
colnames(prop)=c("Retained","Number")
prop=prop[c("All","Internal","Internal (Nested)","Internal (Juxtaposed)","Encompassing","Internal (bigger than encompassing)","Internal (smaller than encompassing)","Imp"),]

pdf(paste0(base_img_dir,"proportion_EZL1-sensitive.pdf"))
bp<-barplot(prop$Retained,names.arg=rownames(prop),las=2,ylim=c(0,100),ylab="% of IES retained in EZL1 KD",col=c("gray","blue",colors[2:4],"dodgerblue","cyan",colors[5]))
text(bp,prop$Retained+5,prop$Number)
dev.off()

pdf(paste0(base_img_dir,"proportion_EZL1-sensitive.pdf"))
bp<-barplot(prop$Retained,names.arg=rownames(prop),las=2,ylim=c(0,100),ylab="% of IES retained in EZL1 KD",col="grey")
text(bp,prop$Retained+5,prop$Number)
dev.off()

# length comparison 


df <- data.frame(x=internal_IES$LENGTH,y=internal_IES$DIFF_LENGTH)
df$density <- get_density(df$x, df$y)


gp <- ggplot(df) +
       geom_point(aes(x, y, color = density),size=3) +
       scale_y_log10(limits=c(20,3700)) +
       scale_x_log10(limits=c(20,3700)) +
       geom_abline(intercept = 0, slope = 1,col="red") +
       scale_color_viridis() +
       theme_bw()+
        labs(title="",x="internal IES length (nt)", y = "encompassing IES length (nt)")

ggsave(paste0(base_img_dir,"dotplot_IES_length_internal.pdf"), plot = gp, width = 17, height = 15, units = "cm")    
   
# consensus



sequences=list(
All=list(ID=ies$ID,SEQ=substr(ies$IES_SEQ,0,13)),
Internal=list(ID=internal_IES$NAME,SEQ=substr(internal_IES$SEQ,0,13)),
Internal_Bigger=list(ID=internal_IES[internal_IES$LENGTH>internal_IES$DIFF_LENGTH,]$NAME,SEQ=substr(internal_IES[internal_IES$LENGTH>internal_IES$DIFF_LENGTH,]$SEQ,0,13)),
Internal_Smaller=list(ID=internal_IES[internal_IES$LENGTH<=internal_IES$DIFF_LENGTH,]$NAME,SEQ=substr(internal_IES[internal_IES$LENGTH<=internal_IES$DIFF_LENGTH,]$SEQ,0,13)),
Internal=list(ID=internal_IES$NAME,SEQ=substr(internal_IES$SEQ,0,13)),
Nested=list(ID=internal_IES[internal_IES$TYPE=="Nested",]$NAME,SEQ=substr(internal_IES[internal_IES$TYPE=="Nested",]$SEQ,0,13)),
Juxtaposed=list(ID=internal_IES[internal_IES$TYPE=="Juxtaposed",]$NAME,SEQ=substr(internal_IES[internal_IES$TYPE=="Juxtaposed",]$SEQ,0,13)),
Encompassing=list(ID=encompassing_ies_ids,SEQ=substr(ies[encompassing_ies_ids,]$IES_SEQ,0,13)),
Imp=list(ID=imp_IES$NAME,SEQ=substr(imp_IES$SEQ,0,13)),
Imp25_33nt=list(ID=imp_IES[imp_IES$LENGTH>=25 & imp_IES$LENGTH<34, ]$NAME,SEQ=substr(imp_IES[imp_IES$LENGTH>=25 & imp_IES$LENGTH<34, ]$SEQ,0,13)),
Imp42_140nt=list(ID=imp_IES[imp_IES$LENGTH>=42 & imp_IES$LENGTH<141, ]$NAME,SEQ=substr(imp_IES[imp_IES$LENGTH>=42 & imp_IES$LENGTH<140, ]$SEQ,0,13))
)

str(sequences)



cur_img_dir=paste0(base_img_dir,"SeqLogo/")
dir.create(cur_img_dir,showWarnings=F,  recursive=T)

weglogo="/usr/local/src/anaconda/anaconda3/bin/weblogo"
for(cat_seq in names(sequences)) {
    print(paste(cat_seq,length(sequences[[cat_seq]][["SEQ"]])))
    write.fasta(as.list(sequences[[cat_seq]][["SEQ"]]), sequences[[cat_seq]][["ID"]], paste0(base_result_dir,cat_seq,"_IES_consensus.fa"), open = "w", nbchar = 60, as.string = FALSE)
    
    system(paste0("cat ",base_result_dir,cat_seq,"_IES_consensus.fa | ",weglogo," -A dna -c classic --units probability --resolution 300 --title '",cat_seq," IES' --format PDF > ",cur_img_dir,cat_seq,"_IES_consensus_weblogo_prop.pdf"))
    system(paste0("cat ",base_result_dir,cat_seq,"_IES_consensus.fa | ",weglogo," -A dna -c classic --units bits --composition 0.28  --resolution 300 --title '",cat_seq," IES' --format PDF > ",cur_img_dir,cat_seq,"_IES_consensus_weblogo_bits.pdf"))


}

cindexes=list(
All=ies$CINDEX,
Nested=internal_IES[internal_IES$TYPE=="Nested",]$CINDEX,
Juxtaposed=internal_IES[internal_IES$TYPE=="Juxtaposed",]$CINDEX,
Encompassing=ies[encompassing_ies_ids,]$CINDEX,
Imp=imp_IES$CINDEX
)

plot(NULL,xlim=c(0,11),ylim=c(0,1),axes=F,ylab="Density",xlab="Cindex")
axis(2)
axis(1,at=seq(0.5,11,1),labels=seq(0,10,1))
for(i in 1:length(names(cindexes))) {
    dname=names(cindexes)[i]
    
    h<-hist(cindexes[[dname]],breaks=seq(0,11,1),plot=F)
    lines(h$mids,h$density,col=colors[i],lwd=2)
    
}
legend("topright",lwd=2,col=colors,legend=names(cindexes))
