source("../headers.R")
source("../functions.R")
options(scipen = 999)

img_dir=paste0("images/")

dir.create(img_dir,showWarnings=F,recursive=T)



nb_mapped_reads=c()
read_lengths=c()

ies=read.table("../data/internal_eliminated_sequence_PGM_IES51_features.tab",h=T,sep="\t",stringsAsFactor=F)
rownames(ies)=ies$ID


mapping_dir="../data/mapping/ptetraurelia_mac_51_with_ies/DNAseq/"
ref_pref="pt_51_with_ies"

# number of mapped reads
nb_reads=c()
for(p in 1:length(prefixes)) {
    prefix=prefixes[p]
    
    
        
    log_file=try(system(paste0("find ",mapping_dir," -name ",prefixes[p],".BOWTIE.",ref_pref,".log"),intern=T))
    #nb_read=as.numeric(try(system(paste0("head ",log_file," | grep 'MAPPED_READS=' | perl -p -e 's/MAPPED_READS=//'"),intern=T)))
    read_length= as.numeric(try(system(paste0("grep 'len max' ",log_file," | perl -p -e 's/len max\t//'"),intern=T)))
    
    log_file=try(system(paste0("find ",mapping_dir," -name ",prefixes[p],".rmdup.BOWTIE.",ref_pref,".log"),intern=T))
    nb_read=as.numeric(try(system(paste0("head ",log_file," | grep 'NB_READS_RMDUP=' | perl -p -e 's/NB_READS_RMDUP=//'"),intern=T)))
    

    
    nb_mapped_reads=c(nb_mapped_reads,nb_read)
    read_lengths=c(read_lengths,read_length)
    print(paste(prefix,log_file))
    log_file=""
    nb_read=0
}
names(nb_mapped_reads)=labels
nb_mapped_reads
read_lengths



# LOAD DATA 
################################
data=list()
for(prefix in prefixes) {
    dir=try(system(paste0("find ","../data/ParTIES/ -name ",prefix, " | head -1"),intern=T))
   file=paste0(dir,"/ParTIES/Concatemer/Concatemer.tab")
   print(file)
   if(!file.exists(file)) {
      print(paste("No file",file))
   } else {
    tab=read.delim(file,h=T,sep="\t",stringsAsFactor=F)
    tab=tab[!tab$AMBIGUOUS_MATCHES,]
    data[[prefix]]=tab
   }
} 


# normalized number of IES-IES junctions
################################
results=c()
for(p in 1:length(prefixes)){
    tab=data[[prefixes[p]]]
    nb=nrow(tab)
    
    results= c(results,nb/nb_mapped_reads[p]*1e6)
}
results[is.na(results)]=0
names(results)=labels



pdf(paste0(img_dir,"normalized_number_excision_product.pdf"))
par(mar=c(8.1, 4.1, 4.1, 2.1))
barplot(results,las=2,col="darkgrey",border="white",ylab="Number of excised IES junctions (RPM)",cex=1.3,cex.axis=1.3,cex.lab=1.3)
dev.off()

# Per TC
for(tc in names(cfg)) {
    tc_prefixes=cfg[[tc]][["PREFIXES"]]
    tc_labels=cfg[[tc]][["LABELS"]]
    if(cfg[[tc]][["EXP_CATEGORY"]] != "APHI" &length(tc_prefixes) > 1) {

        pdf(paste0(img_dir,"normalized_number_excision_product_",tc,".pdf"),width=9)
        par(mar=c(8.1, 4.1, 4.1, 2.1))
        barplot(results[tc_labels],width=0.6,las=2,col="darkgrey",border="white",ylab="Number of excised IES junctions (RPM)",cex=1.3,cex.axis=1.3,cex.lab=1.3)
        dev.off() 
    }
}



# single IES circles and concatemerized IES-IES junctions
################################

results=c()

colors=brewer.pal(2,"Set1")
for(p in 1:length(prefixes)){
    tab=data[[prefixes[p]]]
    nb=nrow(tab)
    nb_monomer=sum(tab$MONOMER)
    
    results= cbind(results,c(c(nb-nb_monomer,nb_monomer)/nb_mapped_reads[p]*1e6,round(nb_monomer/nb*100,1)))
}
results[is.na(results)]=0
rownames(results)=c("CONCATEMER","SINGLE_CIRCLE","PERCENT_MONOMER")
colnames(results)=labels



pdf(paste0(img_dir,"normalized_number_excision_products_2scales.pdf"),width=6,height=5)
par(mar=c(8.1, 4.1, 2.1, 2.1), xpd=TRUE)
plot(1:ncol(results),results[1,],type="h",lwd=8,lend=1,xlim=c(1,ncol(results)),axes=F,xlab="",ylab="Number of excised IES junctions (RPM)",cex=1.3,cex.lab=1.3,cex.axis=1.3)
axis(1,at=1:ncol(results)+0.2,labels=colnames(results),las=2,cex=1.3,cex.lab=1.3, col = NA, col.ticks = 1)
axis(2,cex=1.3,cex.lab=1.3)
par(new = T)
plot(1:ncol(results)+0.4,results[2,],type="h",lwd=8,lend=1,col="darkgrey",xlim=c(1,ncol(results)),axes=F,xlab="",ylab="",cex=1.3,cex.lab=1.3,cex.axis=1.3)
axis(4,cex=1.3,cex.lab=1.3)
legend("topleft",legend=c("Concatemers","Single-IES circles"),col=c("black","darkgrey"),pch=15,bty="n",cex=1.3)
dev.off()

write.table(t(results),"normalized_number_excision_products.tsv",row.names=T,quote=F,sep="\t")


# NUMBER OF IES in CONCATEMERS
#######################


all_ies_ids=c()
groups=read.table("../IES_Excision_Score/IES_Groups.tsv",h=T,sep="\t")
group_names=c("Very early","Early","Intermediate","Late")
gcolors=c("#E41A1C","#FF7F00","#4DAF4A","dodgerblue","grey")

results=c()
for(p in 1:length(prefixes)){
    
    tab=data[[prefixes[p]]]
    ies_ids= unique(unlist(strsplit(as.character(tab$IES_IDs),split=' ')))
    
    all_ies_ids=c(all_ies_ids,ies_ids)
    
    results= rbind(results,table(groups[ies_ids,]$GROUP_NAME)[c(group_names)])
}
all_ies_ids=unique(all_ies_ids)
results[is.na(results)]=0
rownames(results)=labels
colnames(results)=group_names

# Total number of IES involved
length(all_ies_ids)/nrow(ies)


pdf(paste0(img_dir,"nb_ies_involved.pdf"),width=8)
par(mar=c(8.1, 4.1, 4.1, 2.1), xpd=TRUE)
bp<-barplot(t(results),ylab="# IESs involved in excision product",names.arg=labels,las=2,col=gcolors,border="white")
text(bp,rowSums(results)+600,paste0(round(rowSums(results)/sum(groups$GROUP_NAME!="None")*100,0),"%"))
legend("topleft",legend=colnames(results),pch=15,col=gcolors,border="white",bty="n")
dev.off()




pdf(paste0(img_dir,"prop_ies_involved.pdf"),width=8)
par(mar=c(8.1, 4.1, 4.1, 2.1), xpd=TRUE)
bp<-barplot(t(results[alg_labels,]/rowSums(results[alg_labels,])),ylab="Proportion of IESs involved in excision product",names.arg=alg_labels,las=2,col=gcolors,border="white",ylim=c(0,1.19))
legend("topleft",legend=colnames(results),pch=15,col=gcolors,border="white",bty="n",horiz=T)
text(bp,1.03,rowSums(results[alg_labels,]))
dev.off()




# IES Length for MONOMERs
#######################

max_size=1000
bin_size=15

ies_in_monomers_ids=c()
for(p in 1:length(prefixes)){
    
    tab=data[[prefixes[p]]]
    tab=tab[tab$MONOMER,]
    ies_in_monomers_ids= unique(c(ies_in_monomers_ids,unique(unlist(strsplit(as.character(tab$IES_IDs),split=' ')))))
}

pdf(paste0(img_dir,"ies_length_of_circle.pdf"))
hist(ies[ies_in_monomers_ids,]$SIZE,breaks=seq(20,max(ies$SIZE)+bin_size,bin_size),xlim=c(25,max_size),main="",xlab="IES size involved in circles",col="darkgrey",border="white",cex.axis=1.3,cex=1.3,cex.lab=1.3)
dev.off()
            

for(p in 1:length(alg_prefixes)){
    
    tab=data[[alg_prefixes[p]]]
    tab=tab[tab$MONOMER,]
    print(nrow(tab))
    if(nrow(tab) > 50) {
        ies_ids= unique(unlist(strsplit(as.character(tab$IES_IDs),split=' ')))
    print(max(ies[ies_ids,]$SIZE))
        pdf(paste0(img_dir,"ies_length_of_circle_",sub(" ","_",alg_labels[p]),".pdf"))
        hist(ies[ies_ids,]$SIZE,breaks=seq(20,max(ies$SIZE)+bin_size,bin_size),xlim=c(25,max_size),main=alg_labels[p],xlab="IES size involved in circles",col="darkgrey",border="white")
        dev.off()
    }
}


