#-------- Make max_shffl table from single line hamster_max_shuff_ data frames -------------


# 436 is number of batches (j) of 5e2 mixed mod calx that covers one shuffle of complete genome.
# 200 is number of complete genomes attempted by each run on Hoffman cluster.

hamster_max_shuff_list <- list()

for(i in 1:200) {
	tryCatch (
		{
		print(i)
		hamster_max_shuff_list[[i]] <- read.table(paste0("/u/flashscratch/d/desmond/hamster_max_shuff_gam_", i, "_batch_1.txt"),header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names="V1")	
			for(j in 2:436) {
				tryCatch (
				{
				temp_shuff <- read.table(paste0("/u/flashscratch/d/desmond/hamster_max_shuff_gam_", i, "_batch_", j, ".txt"),header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names="V1")
				hamster_max_shuff_list[[i]] <- cbind(hamster_max_shuff_list[[i]], temp_shuff)
				},
			error = function(e) {cat ("Error on gam ", i, " and batch ",  j, ": ", conditionMessage(e),"\n")}
			)
		}
	},
	error = function(e) {cat ("Error on gam ", i, " and batch 1: ", conditionMessage(e),"\n")}
	)

}


# Hoffman frequently chokes on ~25% of 200 gam jobs for reasons unknown. Fix with ./rescue_hamster

hamster_max_shuff_list <- hamster_max_shuff_list[unlist(sapply(hamster_max_shuff_list, function(x) {ifelse(is.null(x), 0, ncol(x))} )) == 436]

length(hamster_max_shuff_list)



for(i in 1:200) {
	tryCatch(
	{
	print(i)
	hamster_max_shuff_list[[i]] <- t(hamster_max_shuff_list[[i]])
	hamster_max_shuff_list[[i]] <- apply(hamster_max_shuff_list[[i]], 2, max, na.rm=T)
	},
	error = function(e) {cat ("Error on line ", i, ": ", conditionMessage(e),"\n")}
	)
	
}

########################################################################
# change names to hamster_max_shuff_1, hamster_max_shuff_2 etc. as needed
# CAREFUL! DO NOT OVERWRITE PREVIOUS FILES!!
##################################################################

hamster_max_shuff_temp_store <- do.call(rbind, hamster_max_shuff_list)

# accumulate enough hamster_max_shuff data frames so total number rows = 1e3
dim(hamster_max_shuff_temp_store)


write.table(hamster_max_shuff_temp_store,"/u/home/d/desmond/hamster_shuff_files/hamster_max_shuff_6.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE)


# ---------- Combine hamster_max_shuff_n -------------

hamster_max_shuff <- read.table("/u/home/d/desmond/hamster_shuff_files/hamster_max_shuff_1.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)

num_jobs <- 10 # how many jobs to reach > 1e3, eg 8, for hamster_max_shuff_1, hamster_max_shuff_2 etc.

for (i in 2:num_jobs) {	
tryCatch (
	{
hamster_max_shuff_temp <- read.table(paste0("/u/home/d/desmond/hamster_shuff_files/hamster_max_shuff_", i, ".txt"),sep="\t",stringsAsFactors=FALSE,header=TRUE)
hamster_max_shuff <- rbind(hamster_max_shuff, hamster_max_shuff_temp)
	},
	error = function(e) {cat ("Error on line ", i, ": ", conditionMessage(e),"\n")}
	)
}

write.table(hamster_max_shuff,"/u/home/d/desmond/hamster_shuff_files/hamster_max_shuff.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE)


hamster_thresh_95 <- apply(hamster_max_shuff, 2, function(x) {quantile(x, 0.95)})
write.table(unlist(hamster_thresh_95),"/u/home/d/desmond/hamster_shuff_files/hamster_thresh_95.txt",quote=FALSE,sep="\t",row.names=TRUE,col.names=FALSE)

# To read back in:
# hamster_thresh_95 <- read.table("hamster_thresh_95.txt",header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names=1,col.names=c("","thresh"))


hamster_thresh_99 <- apply(hamster_max_shuff, 2, function(x) {quantile(x, 0.99)})
write.table(unlist(hamster_thresh_99),"/u/home/d/desmond/hamster_shuff_files/hamster_thresh_99.txt",quote=FALSE,sep="\t",row.names=TRUE,col.names=FALSE)

# To read back in:
# hamster_thresh_99 <- read.table("hamster_thresh_99.txt",header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names=1,col.names=c("","thresh"))















