#!/usr/bin/env Rscript
args=commandArgs(trailingOnly=TRUE)
library(vcfR)

setwd("~/myproject/accessibleSitesWindows_arrayversion")

# Read in information on Genome
# Get chromosome length
# Define how many SNPs in each 10kb window starting from all sites file, subset these windows from the bialleic SNPs only file
allsitesvcf<-read.vcfR(args[1])

benithos_gt_num<-extract.gt(allsitesvcf)
rm(allsitesvcf)
GenomeInfoOrenil2 <- read.delim("../inputdata/GenomeInfoOrenil2.txt")

# Define window size
window_size <- 10000
window_jump <- 10000
chr_start<-1


i=args[2]
#extract the genotypes, transform to dataframe, retain chromsome positions
benithos_gt_num<-as.data.frame(benithos_gt_num)
colnames(benithos_gt_num)<-c("WildP", "Mother", "child1", "child2")
benithos_gt_num <- data.frame(apply(benithos_gt_num, 2, function(x) {gsub("/", "|", x)}))
count_callable_sites<-benithos_gt_num
rm(benithos_gt_num)
#extract each SNPs position from the rownames, they are in format chromosome _ position
count_callable_sites$POS<-sapply(strsplit(rownames(count_callable_sites),'.2_'), "[", 2)
count_callable_sites$POS<-as.numeric(count_callable_sites$POS)
mychrom<-subset(GenomeInfoOrenil2,GenomeInfoOrenil2$RefSeq.Accn==i)
chr_end<-mychrom$Sequence.Length
# use seq to find the start points of each window
window_start <- seq(from = chr_start, to = chr_end, by = window_jump)
# add the size of the window to each start point 
window_stop <- window_start + window_size -1
# remove windows from the start and stop vectors
window_start <- window_start[which(window_stop < chr_end)]
window_stop <- window_stop[which(window_stop < chr_end)]
#number of total windows
chr_end - window_stop[length(window_stop)]
# save as a data.frame
windows <- data.frame(start = window_start, stop = window_stop)
#
   out_count_callable_sites<-list()
       for (j in 1:length(windows$start)) {
          mywindow_start<-windows$start[j]
          mywindow_end<-windows$stop[j]
          testtable<-subset(count_callable_sites, POS>=mywindow_start & POS<=mywindow_end)
          # gives all zeros if a window is empty! need to remove those for the mean later
          sumSNPs<-nrow(testtable)
          mybinsums<-data.frame(sumSNPs)
          mybinsums$start<-mywindow_start
          mybinsums$stop<-mywindow_end
          out_count_callable_sites[[j]]<-mybinsums
        }
out_count_callable_sites_df<-do.call(rbind, out_count_callable_sites)
out_count_callable_sites_df$CHROM<-mychrom$RefSeq.Accn
write.table(out_count_callable_sites_df, file=paste("mychrom",i,"callablesitesV2.txt", sep="_"))