
source("../headers.R")
source("../functions.R")

seqlength=read.table("../data/ptetraurelia_mic2.seqlength",sep="\t")
colnames(seqlength)=c("SEQ_ID","SEQ_LENGTH")


base_img_dir=paste0("images/")

dir.create(base_img_dir,showWarnings=F,  recursive=T)


te=read.table(paste0("../data//ptetraurelia_mic2_TE_annotation_v1.0.tab"),sep="\t",h=T)
te=te[te$TYPE=="match",]
te[te$TPN_TYPE=="",]$TPN_TYPE="SOLO_ORF"
te=te[te$COPY_LENGTH>500,]
te$LENGTH= te$END-te$START+1
te=merge(te,seqlength,by="SEQ_ID",all.x=T)
te=te[te$SEQ_LENGTH>2000,]
rownames(te)=te$ID

ref_pref="mic2"
base_genome="ptetraurelia_mic2"


#~ mic= read.table(paste0(result_dir,"/","MicGSC_BCP_AAIOSF_2_HiSeq",".",mapper,".",ref_pref,".MIN_QUAL_30.count_reads.tab"),sep="\t",h=T)    
#~ rownames(mic)=mic$ID
#~ mic=mic[te$ID,]

#~ te=te[setdiff(te$ID,mic[mic$RESULT >as.numeric(quantile(mic$RESULT,probs=seq(0,1,0.05))["95%"]),]$ID),]


tpn_types=c("LINE","TIR","SOLO_ORF","SINE")

seqtype="DNAseq"
mapping_base_dir=paste0("/data/PARAMECIUM/MAPPING/tetraurelia/",base_genome,"/",seqtype,"/")


mapper="BOWTIE"

all_prefixes=c(prefixes,aphi_prefixes)
all_labels=c(labels,aphi_labels)

nb_mapped_reads=c()
for(p in 1:length(all_prefixes)) {
    prefix=all_prefixes[p]
    log_file=try(system(paste0("find ",mapping_base_dir," -name ",prefix,".",mapper,".",ref_pref,".log | head -1"),intern=T))
    
    nb_mapped_read<-as.numeric(try(system(paste("head",log_file," | grep 'MAPPED_READS=' | perl -p -e 's/MAPPED_READS=//'"),intern=T)))
    nb_mapped_reads=c(nb_mapped_reads,nb_mapped_read)
    print(paste(prefix,log_file))
    nb_mapped_read=0
}
names(nb_mapped_reads)=all_labels
nb_mapped_reads

result_dir=paste0("data/ptetraurelia_mic2/ptetraurelia_mic2_TE_annotation_v1.0/")


data=data.frame()
for(p in 1:length(all_prefixes)) {
  prefix=all_prefixes[p]
  label=all_labels[p]

  tab= read.table(paste0(result_dir,"/",prefix,".",mapper,".",ref_pref,".MIN_QUAL_30.count_reads_DEPTH.tab"),sep="\t",h=T)    
  rownames(tab)=tab$ID
  tab=tab[te$ID,]
    
  tab=merge(tab,te[,c("ID","COPY_LENGTH")],by="ID")
  norm_fact= nb_mapped_reads[label]/min(nb_mapped_reads)
  tab$RPKM = tab$RESULT / ( tab$COPY_LENGTH/1e3 * norm_fact )
  tab$DEPTH = tab$RESULT /  norm_fact

  if(dim(data)[1]==0) {
    data= data.frame(ID=tab$ID ,VALUE= tab$DEPTH)
  } else {
     data=merge(data,data.frame(ID=tab$ID ,VALUE= tab$DEPTH),by="ID")
  }
  colnames(data)[length(colnames(data))]=label
   
}
rownames(data)=data$ID 
data=data[,-1]  

data_te=data


par(mar=c(8,4,4,2))
boxplot(data_te[,labels],outline=F,las=2)



par(mar=c(8,4,4,2))
boxplot(data_te[,aphi_labels],outline=F,las=2)

#####################
# IES
##########################################

groups=read.table("../IES_Excision_Score/IES_Groups.tsv",h=T,sep="\t")
group_names=c("Very early","Early","Intermediate","Late")


ref_pref="pt_51_with_ies"
seqtype="DNAseq"
base_genome="ptetraurelia_mac_51_with_ies"



result_dir=paste0("data/",base_genome,"/internal_eliminated_sequence_PGM_IES51/")



data=data.frame()
for(p in 1:length(all_prefixes)) {
  prefix=all_prefixes[p]
  label=all_labels[p]
   
  tab= read.table(paste0(result_dir,"/",prefix,".",mapper,".",ref_pref,".MIN_QUAL_30.count_reads_DEPTH.tab"),sep="\t",h=T)    
  rownames(tab)=tab$ID
   tab$LENGTH= tab$END-tab$START+1
    
  
  norm_fact= nb_mapped_reads[label]/min(nb_mapped_reads)
  tab$RPKM = tab$RESULT / ( tab$LENGTH/1e3 * norm_fact ) 
  tab$DEPTH = tab$RESULT /  norm_fact
  
  if(dim(data)[1]==0) {
    data= data.frame(ID=tab$ID ,VALUE= tab$DEPTH)
  } else {
     data=merge(data,data.frame(ID=tab$ID ,VALUE= tab$DEPTH),by="ID")
  }
  colnames(data)[length(colnames(data))]=label   
            
}
rownames(data)=data$ID 
data=data[,-1]  


data_ies=data




par(mar=c(8,4,4,2))
boxplot(data_ies[,labels],outline=F,las=2)


par(mar=c(8,4,4,2))
boxplot(data_ies[,aphi_labels],outline=F,las=2)

perc_covered_genome = read.table("../MIC_coverage/percent_covered_genome.txt",h=T,sep="\t")


# Wild type time course
########################

lst=list()
for(p in 1:length(prefixes)) {
    
    lst[[paste0(labels[p],"_TE")]]=data_te[,labels[p]]
    lst[[paste0(labels[p],"_IES")]]=data_ies[,labels[p]]
}


pdf(paste0(base_img_dir,"mean_depth_coverage.pdf"),width=8)
cols=c("darkgrey","white")
par(mar=c(9,4,4,2),xpd=TRUE)
ats=seq(1,length(names(lst))*2-length(labels),1)[-seq(3,length(names(lst))*2-length(labels),3)]
bp<-boxplot(lst,outline=F,las=2,col=cols,at=ats,axes=F,ylab="Mean depth coverage",cex=1.3,cex.axis=1.3,cex.lab=1.3)
axis(2,cex=1.3)
axis(1,at=ats[seq(1,length(ats),2)]+0.5,label=labels,las=2,cex=1.3)

ymaxs=apply(rbind(bp$stats[5,][seq(1,length(ats),2)],bp$stats[5,][seq(2,length(ats),2)]),2,max)
text(ats[seq(1,length(ats),2)]+0.5,ymaxs+3,perc_covered_genome[labels,1])

legend("topleft",bty="n",legend=c("TE","IES"),fill=cols,cex=1.3)
dev.off()

cov=cbind(unlist(lapply(lst,median))[seq(1,length(labels)*2,2)],unlist(lapply(lst,median))[seq(2,length(labels)*2,2)])
rownames(cov)=labels
colnames(cov)=c("TE","IES")

write.table(cov,paste0(base_img_dir,"mean_depth_coverage.tsv"),sep="\t",quote=F)





# aphi time course
########################

lst=list()
for(p in 1:length(aphi_prefixes)) {
    
    lst[[paste0(aphi_labels[p],"_TE")]]=data_te[,aphi_labels[p]]
    lst[[paste0(aphi_labels[p],"_IES")]]=data_ies[,aphi_labels[p]]
}


pdf(paste0(base_img_dir,"mean_depth_coverage_aphi.pdf"),width=5,height=5)
cols=c("darkgrey","white")
par(mar=c(7,4,2,2),xpd=TRUE)
ats=seq(1,length(names(lst))*2-length(aphi_labels),1)[-seq(3,length(names(lst))*2-length(aphi_labels),3)]
bp<-boxplot(lst,outline=F,las=2,col=cols,at=ats,axes=F,ylab="Mean depth coverage",cex=1.3,cex.axis=1.3,cex.lab=1.3)
axis(2,cex=1.3)
axis(1,at=ats[seq(1,length(ats),2)]+0.5,label=aphi_labels,las=2,cex=1.3)

ymaxs=apply(rbind(bp$stats[5,][seq(1,length(ats),2)],bp$stats[5,][seq(2,length(ats),2)]),2,max)
text(ats[seq(1,length(ats),2)]+0.5,ymaxs+3,perc_covered_genome[aphi_labels,1])

legend("topleft",bty="n",legend=c("TE","IES"),fill=cols,cex=1.3)
dev.off()



# Ezl


seqtype="DNAseq"
mapping_base_dir=paste0("/data/PARAMECIUM/MAPPING/tetraurelia/",base_genome,"/",seqtype,"/")


mapper="BOWTIE"


rnai_prefixes=c("Ezl174-2_RNAi_r1","DCL2_3_RNAi_r1_HBJ-1")
rnai_labels=c("EZL1","DCL23")

nb_mapped_reads=c()
for(p in 1:length(rnai_prefixes)) {
    prefix=rnai_prefixes[p]
    log_file=try(system(paste0("find ",mapping_base_dir," -name ",prefix,".",mapper,".",ref_pref,".log | head -1"),intern=T))
    
    nb_mapped_read<-as.numeric(try(system(paste("head",log_file," | grep 'MAPPED_READS=' | perl -p -e 's/MAPPED_READS=//'"),intern=T)))
    nb_mapped_reads=c(nb_mapped_reads,nb_mapped_read)
    print(paste(prefix,log_file))
    nb_mapped_read=0
}
names(nb_mapped_reads)=rnai_labels
nb_mapped_reads

result_dir=paste0("data/ptetraurelia_mic2/ptetraurelia_mic2_TE_annotation_v1.0/")


data=data.frame()
for(p in 1:length(rnai_prefixes)) {
  prefix=rnai_prefixes[p]
  label=rnai_labels[p]

  tab= read.table(paste0(result_dir,"/",prefix,".",mapper,".",ref_pref,".MIN_QUAL_30.count_reads_DEPTH.tab"),sep="\t",h=T)    
  rownames(tab)=tab$ID
  tab=tab[te$ID,]
    
  tab=merge(tab,te[,c("ID","COPY_LENGTH")],by="ID")
  norm_fact= nb_mapped_reads[label]/min(nb_mapped_reads)
  tab$RPKM = tab$RESULT / ( tab$COPY_LENGTH/1e3 * norm_fact )
  tab$DEPTH = tab$RESULT /  norm_fact

  if(dim(data)[1]==0) {
    data= data.frame(ID=tab$ID ,VALUE= tab$DEPTH)
  } else {
     data=merge(data,data.frame(ID=tab$ID ,VALUE= tab$DEPTH),by="ID")
  }
  colnames(data)[length(colnames(data))]=label
   
}
rownames(data)=data$ID 
data=data[,-1]  

data_rnai=data


pdf(paste0(base_img_dir,"mean_depth_coverage_RNAi.pdf"),width=3,height=7)
par(xpd=TRUE)
bp<-boxplot(data_rnai,outline=F,axes=F,ylab="Mean depth coverage",cex=1.3,cex.axis=1.3,cex.lab=1.3)
text(1:length(rnai_labels),bp$stats[5,]+3,perc_covered_genome[rnai_labels,1])
axis(2,cex=1.3)
axis(1,at=1:length(rnai_labels),labels=rnai_labels,tick=FALSE)
dev.off()


