require(GenomicRanges)
require(rtracklayer)

args <- commandArgs()

#will be called R --vanilla so input start with args[3]

source("/gpfs/gpfs1/home/broberts/STARRseq_trxn_paper/bin/starr_analysis_funcs.R")

#work_dir must be changed to match file system
work_dir="/gpfs/gpfs1/home/broberts/STARRseq_trxn_paper/steensel_lab_mpra_data"
setwd(work_dir)

#get lifted over hg19 autosome bins

#this is an R save object of a GenomicRanges object containing every Alu position in hg19. Must be changed to match local file system.
liftedOver_hg19_Alu_GR_file <- "/gpfs/gpfs1/home/broberts/STARRseq_trxn_paper/steensel_lab_mpra_data/alu_pos_hg19_liftedOver_GR.Rvar"

loaded_liftedOver_Alu_pos_GR <- load(liftedOver_hg19_Alu_GR_file)
liftedOver_hg19_Alu_pos_GR <- get(loaded_liftedOver_Alu_pos_GR)
rm(list= loaded_liftedOver_Alu_pos_GR)

#bw comes in as arg[3]

bw_file <- args[3]

#this is causing problems by making too big of vector; chunking it
chunk_size <- 200000
num_chunks <- ceiling(length(liftedOver_hg19_Alu_pos_GR)/chunk_size)

list_chunk_indices <- lapply(c(1:num_chunks),function(num_chunk_x){
	startx <- 1+(num_chunk_x-1)*chunk_size
	endx <- num_chunk_x*chunk_size
	return(c(startx, endx))
})

list_chunk_indices[[length(list_chunk_indices)]][2] <- length(liftedOver_hg19_Alu_pos_GR)


list_bin_vals_by_chunk <- lapply(list_chunk_indices,function(chunk_index_x){
	print(paste("overlapping",chunk_index_x[1],"to",chunk_index_x[2]))
	
	chunk_liftedOver_hg19_Alu_pos_GR_ref <- liftedOver_hg19_Alu_pos_GR[chunk_index_x[1]: chunk_index_x[2]]
	chunk_bin_GR_x <- read_and_bin_func_bigWig(chunk_liftedOver_hg19_Alu_pos_GR_ref, bigWig_path= bw_file,run_unstable=F)
	return(chunk_bin_GR_x$bin_vals)
})

all_Alu_pos_vals <- unlist(list_bin_vals_by_chunk)

steensel_all_hg19_alu_bin_GR <- liftedOver_hg19_Alu_pos_GR
steensel_all_hg19_alu_bin_GR$vals <- all_Alu_pos_vals

#parse input file to get name of output in new dir

out_GR_dir <- "hg19_Alu_pos_GRs"
if(!file.exists(out_GR_dir)){dir.create(out_GR_dir)}


bw_split1 <- unlist(strsplit(bw_file,split="/"))

bw_file <- bw_split1[length(bw_split1)]

split_x <- unlist(strsplit(bw_file,split="_|\\."))

if(length(split_x)==7){out_name <- paste(split_x[3], split_x[2],"nostrand",sep="_")}else{out_name <- paste(split_x[3], split_x[2],split_x[6],sep="_")}

assign(out_name, steensel_all_hg19_alu_bin_GR)

save(list= out_name,file=paste(out_GR_dir,paste(out_name,".Rvar",sep=""),sep="/"))

q()








