
library(ggplot2)
options(stringsAsFactors = FALSE)

library(gplots)
library(pheatmap)
library("RColorBrewer")


source("../headers.R")
source("../functions.R")

mic_base_genome="ptetraurelia_mic2"
mic_ref_pref="mic2"



mac_base_genome="ptetraurelia_mac_51"
mac_ref_pref="pt_51"

mode="normal"

image_dir=paste0("images/",mode,"/")

dir.create(image_dir,showWarnings=F,  recursive=T)




seqtype="DNAseq"
mic_data_dir=paste0("/data/PARAMECIUM/TELOMEREs/Illumina/ptetraurelia/",mic_base_genome,"/results/")
mac_data_dir=paste0("/data/PARAMECIUM/TELOMEREs/Illumina/ptetraurelia/",mac_base_genome,"/results/")


# MIC
#########################################

seqlength=read.table(paste0("../data/",mic_base_genome,".seqlength"),h=F)
colnames(seqlength)=c("SEQ_ID","LENGTH")
seq_ids=seqlength$SEQ_ID


mapping_base_dir=paste0("/data/PARAMECIUM/MAPPING/tetraurelia/",mic_base_genome,"/",seqtype,"/")

nb_reads=c()
for(p in 1:length(prefixes)) {
    prefix=prefixes[p]
    if(mode=="normal")  {
        
        log_file=try(system(paste0("find ",mapping_base_dir," -name ",prefixes[p],".BOWTIE.",mic_ref_pref,".log"),intern=T))
        nb_read=as.numeric(try(system(paste0("head ",log_file," | grep 'MAPPED_READS=' | perl -p -e 's/MAPPED_READS=//'"),intern=T)))
    } else {
        log_file=try(system(paste0("find ",mapping_base_dir," -name ",prefixes[p],".rmdup.BOWTIE.",mic_ref_pref,".log"),intern=T))
        nb_read= as.numeric(try(system(paste0("head ",log_file," | grep 'NB_READS_RMDUP=' | perl -p -e 's/NB_READS_RMDUP=//'"),intern=T)))
    }
    nb_reads=c(nb_reads,nb_read)
    print(paste(prefix,log_file))
    nb_read=0
}
nb_reads



data=list()
for(p in 1:length(prefixes)){
    
    mic_file=paste0(mic_data_dir,"/",prefixes[p],".TELOMERIC_regions.BOWTIE.",mic_ref_pref,".gff3.tab")
    
    print(mic_file)
    data[[labels[p]]]=read.table(mic_file,h=T,sep="\t")
    
    
}


results=c()
for(p in 1:length(prefixes)){
    
    tab=data[[labels[p]]]
    print(paste(prefixes[p],nrow(tab)))
    results=c(results,nrow(tab)/nb_reads[p]*1e6 )
}
names(results)=labels


 
 
pdf(paste0(image_dir,"normalized_number_telomerisation_site.",mic_ref_pref,".pdf"))
par(mar=c(8.1, 4.1, 4.1, 4.1))
barplot(t(results),ylab="# of telomerisation sites (RPM)",names.arg=labels,las=2,col=c("grey"))
dev.off()


results=c()
for(p in 1:length(prefixes)){
    
    tab=data[[labels[p]]]
    gr<-makeGRangesFromDataFrame(tab[,c("SEQ_ID","START","END")],seqnames.field="SEQ_ID")
    seqlevels(gr)=seq_ids  
    

     results=c(results,nrow(mcols(reduce(gr,min.gapwidth=1000, ignore.strand=T)))/nb_reads[p]*1e6)
 }

pdf(paste0(image_dir,"normalized_number_telomerisation_site_reduce_1kb.",mic_ref_pref,".pdf"))
par(mar=c(12.1, 4.1, 4.1, 4.1), xpd=TRUE)
barplot(results,ylab="Normalized number of telomerisation site",names.arg=labels,las=2)
dev.off()




groups=read.table("../IES_Excision_Score/IES_Groups.tsv",h=T,sep="\t")
group_names=c("Very early","Early","Intermediate","Late"
#,"None"
)
gcolors=c("#E41A1C","#FF7F00","#4DAF4A","dodgerblue"
#,"grey"
)

names(gcolors)=group_names


ies<-as(import.gff("internal_eliminated_sequence_PGM_IES51.mic2.gff3"), "GRanges")
seqlevels(ies)=seq_ids
ies$group_name = groups[ies$ID,]$GROUP_NAME
ies=ies[ies$group_name!="None",]

results=c()
for(p in 1:length(prefixes)){
    tab=data[[labels[p]]]
    gr<-makeGRangesFromDataFrame(tab[,c("SEQ_ID","START","END")],seqnames.field="SEQ_ID")
    seqlevels(gr)=seq_ids
    gr$DIST=Inf
      
    d=distanceToNearest(gr, ies, ignore.strand=TRUE)
    gr[queryHits(d),]$DIST=mcols(d)$distance
    
    
    results=rbind(results,c(sum(gr$DIST<10,na.rm=T)/nb_reads[p]*1e6 ,sum(gr$DIST >= 10 & gr$DIST<=100,na.rm=T)/nb_reads[p]*1e6,sum(gr$DIST>100,na.rm=T)/nb_reads[p]*1e6))
    
}
rownames(results)=labels
colnames(results)=c("<10nt","10nt-100nt",">100nt")


pdf(paste0(image_dir,"normalized_number_telomerisation_site.",mic_ref_pref,"_width_IES_distances.pdf"))
par(mar=c(8.1, 4.1, 4.1, 4.1))
barplot(t(results),ylab="# of telomerisation sites (RPM)",names.arg=labels,las=2,col=c("black","darkgrey","grey90"))
legend("topright",legend=colnames(results),title="Distance from IES",col=c("black","darkgrey","grey90"),bty="n",pch=15)
dev.off()





p=length(prefixes)

tab=data[[labels[p]]]
gr<-makeGRangesFromDataFrame(tab[,c("SEQ_ID","START","END")],seqnames.field="SEQ_ID")
seqlevels(gr)=seq_ids
results=c()
for(gname in group_names) {
    ies_group=ies[ies$group_name==gname,]
    d=distanceToNearest(ies_group, gr, ignore.strand=TRUE)
    d= d[!is.na(mcols(d)$distance),]
    d= d[mcols(d)$distance<=10,]
    
#~     results=c(results,length(queryHits(d))/nrow(mcols(ies_group))*100)
    results=c(results,length(queryHits(d)))
}
names(results)=group_names
#~ barplot(results,col=gcolors,las=2,ylab="% of IES close to telomere addition site",main=labels[p])
 results
#~   Very early        Early Intermediate         Late 
#~           10           13            9           16 

    

# MAC
###################################################




seqlength=read.table(paste0("../data/",mac_base_genome,".seqlength"),h=F)
colnames(seqlength)=c("SEQ_ID","LENGTH")
seq_ids=seqlength$SEQ_ID




data=list()
for(p in 1:length(prefixes)){
    
    mic_file=paste0(mac_data_dir,"/",prefixes[p],".TELOMERIC_regions.BOWTIE.",mac_ref_pref,".gff3.tab")
    
    print(mic_file)
    data[[labels[p]]]=read.table(mic_file,h=T,sep="\t")
    
    
}


results=c()
for(p in 1:length(prefixes)){
    
    tab=data[[labels[p]]]
    print(paste(prefixes[p],nrow(tab)))
    results=c(results,nrow(tab)/nb_reads[p]*1e6 )
}
names(results)=labels

 
 
pdf(paste0(image_dir,"normalized_number_telomerisation_site.",mac_ref_pref,".pdf"))
par(mar=c(8.1, 4.1, 4.1, 4.1))
barplot(t(results),ylab="# of telomerisation sites (RPM)",names.arg=labels,las=2,col=c("grey"))
dev.off()









