
options(stringsAsFactors = FALSE)
source("../headers.R")

data_dir="results/"
exp="TC3"
print(exp)
keep=cfg[[exp]][["KEEP"]]
keep=rep(TRUE,length(cfg[[exp]][["PREFIXES"]]))


exp_prefixes=cfg[[exp]][["PREFIXES"]][keep]
exp_nucleus=cfg[[exp]][["NUCLEUS"]][keep]
exp_times=cfg[[exp]][["TIME"]][keep]
exp_seq_dates=cfg[[exp]][["SEQ_DATE"]][keep]
exp_ploidy=cfg[[exp]][["PLOIDY"]][keep]

# SNPresults
results=c()

for(p in 1:length(exp_prefixes)) {
    file=paste0(data_dir,"/",exp_prefixes[p],".BOWTIE.pt_51.pe.sorted.bam.findSNP.tab")
    print(file)
    tab=read.delim(file,h=T,sep="\t",comment.char='#')
    if(sum(tab$POS==596316)==1) {
        cov=tab[tab$POS==596316,]$COVERAGE
        percent=tab[tab$POS==596316,]$PERCENT
    } else {
        cov=NA
        percent=NA
    }

    nb_transgene=round(cov*percent,0)
    nb_endogene=cov-nb_transgene
    cphg = (nb_transgene/nb_endogene)

    results=rbind(results,c(exp_prefixes[p],cov,percent,nb_transgene,nb_endogene,cphg))
}
colnames(results)=c("PREFIX","COV","PERCENT","TRANSGENE","ENDOGENE","CPHG")
cphg_snp=as.numeric(results[,c("CPHG")])

results

ref_pref="pt_51"
x=594000
nd7_start=594873
nd7_end=596396

binsize=20
smooth=50
cphg_nd7=c()
for(p in 1:length(exp_prefixes)) {
    prefix=exp_prefixes[p]
    file=paste0(data_dir,"/",prefix,".BOWTIE.",ref_pref,".bin",binsize,".smooth",smooth,".bedgraph")

    print(file)
    tab=read.table(file,h=F)
    tab$POS=rowMeans(tab[,c("V2","V3")])
    fact_norm = mean(tab[tab$POS < x,]$V4)
    tab$FC=tab$V4/fact_norm

    cov_nd7=mean(tab[tab$POS> nd7_start & tab$POS < nd7_end,]$FC)
    cphg_nd7=c(cphg_nd7,cov_nd7-1)

}

data.frame(PREFIX=exp_prefixes,CPHG_SNP=cphg_snp,CPHG_ND7=cphg_nd7)
                
