#-------- Make max_shffl table from single line human_max_shuff_ data frames -------------


# 611 is number of batches (j) of 5e2 mixed mod calx that covers one shuffle of complete genome.
# 200 is number of complete genomes attempted by each run on Hoffman cluster.



human_max_shuff_list <- list()

for(i in 1:200) {
	tryCatch (
		{
		print(i)
		human_max_shuff_list[[i]] <- read.table(paste0("/u/flashscratch/d/desmond/human_max_shuff_gam_", i, "_batch_1.txt"),header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names="V1")	
			for(j in 2:611) {
				tryCatch (
				{
				temp_shuff <- read.table(paste0("/u/flashscratch/d/desmond/human_max_shuff_gam_", i, "_batch_", j, ".txt"),header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names="V1")
				human_max_shuff_list[[i]] <- cbind(human_max_shuff_list[[i]], temp_shuff)
				},
			error = function(e) {cat ("Error on gam ", i, " and batch ",  j, ": ", conditionMessage(e),"\n")}
			)
		}
	},
	error = function(e) {cat ("Error on gam ", i, " and batch 1: ", conditionMessage(e),"\n")}
	)

}



# Hoffman frequently chokes on ~25% of 200 gam jobs for reasons unknown. Instead of laboriously repairing missing jobs, only collect complete jobs and re-run enough times to make up deficit. Or use scripts like "rescue_human" and "human_test_rescue".



human_max_shuff_list <- human_max_shuff_list[unlist(sapply(human_max_shuff_list, function(x) {ifelse(is.null(x), 0, ncol(x))} )) == 611]


length(human_max_shuff_list)



for(i in 1:200) {
	tryCatch(
	{
	print(i)
	human_max_shuff_list[[i]] <- t(human_max_shuff_list[[i]])
	human_max_shuff_list[[i]] <- apply(human_max_shuff_list[[i]], 2, max, na.rm=T)
	},
	error = function(e) {cat ("Error on line ", i, ": ", conditionMessage(e),"\n")}
	)
	
}

########################################################################
# change names to human_max_shuff_1, human_max_shuff_2 etc. as needed
# CAREFUL! DO NOT OVERWRITE PREVIOUS FILES!!
##################################################################

human_max_shuff_temp_store <- do.call(rbind, human_max_shuff_list)

# accumulate enough human_max_shuff data frames so total number rows = 1e3
dim(human_max_shuff_temp_store)


write.table(human_max_shuff_temp_store,"/u/home/d/desmond/human_shuff_files/human_max_shuff_6.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE)


# ---------- Combine human_max_shuff_n -------------

human_max_shuff <- read.table("/u/home/d/desmond/human_shuff_files/human_max_shuff_1.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)

num_jobs <- 10 # how many jobs to reach > 1e3, eg 10, for human_max_shuff_1, human_max_shuff_2 etc.

for (i in 2:num_jobs) {	
tryCatch (
	{
human_max_shuff_temp <- read.table(paste0("/u/home/d/desmond/human_shuff_files/human_max_shuff_", i, ".txt"),sep="\t",stringsAsFactors=FALSE,header=TRUE)
human_max_shuff <- rbind(human_max_shuff, human_max_shuff_temp)
	},
	error = function(e) {cat ("Error on line ", i, ": ", conditionMessage(e),"\n")}
	)
}

write.table(human_max_shuff,"/u/home/d/desmond/human_shuff_files/human_max_shuff.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE)


human_thresh_95 <- apply(human_max_shuff, 2, function(x) {quantile(x, 0.95)})
write.table(unlist(human_thresh_95),"/u/home/d/desmond/human_shuff_files/human_thresh_95.txt",quote=FALSE,sep="\t",row.names=TRUE,col.names=FALSE)

# To read back in:
# human_thresh_95 <- read.table("human_thresh_95.txt",header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names=1,col.names=c("","thresh"))


human_thresh_99 <- apply(human_max_shuff, 2, function(x) {quantile(x, 0.99)})
write.table(unlist(human_thresh_99),"/u/home/d/desmond/human_shuff_files/human_thresh_99.txt",quote=FALSE,sep="\t",row.names=TRUE,col.names=FALSE)

# To read back in:
# human_thresh_99 <- read.table("human_thresh_99.txt",header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names=1,col.names=c("","thresh"))


























