# ------------- Combine -log10P files for human genome on Hoffman cluster ---------------


logP_list <- list()

for(i in c(1:62)){
	logP_list[[i]] <- read.table(paste0("/u/flashscratch/d/desmond/log10p_raw_sub_",i,".txt"), stringsAsFactors=FALSE,  header=TRUE,sep="\t")
 }

log10p_raw_sub <- do.call(rbind, logP_list)


RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)

# get rid of chrY, because no chrY seq in hamster genome
RH_human <- RH_human[RH_human$Chromosome != "chrY",]



dim(log10p_raw_sub)
# [1] 305391     28

dim(RH_human)
# [1] 305391    119

# merge to make sure all rows present in final file, even if mix mod calc fails
log10p_raw_sub_2 <- merge(RH_human[,c(1:4)],log10p_raw_sub,all.x=TRUE)
dim(log10p_raw_sub_2)
# [1] 305391     28


# Sort:
chrOrder<-c(paste("chr",1:22,sep=""),"chrX")
log10p_raw_sub_2$Chromosome <-factor(log10p_raw_sub_2$Chromosome, levels=chrOrder)
log10p_raw_sub_2 <- log10p_raw_sub_2[order(log10p_raw_sub_2$Chromosome, log10p_raw_sub_2$pos), ]
log10p_raw_sub_2$Chromosome <- as.character(log10p_raw_sub_2$Chromosome)

row.names(log10p_raw_sub_2) <- NULL

write.table(log10p_raw_sub_2,"log10P_human.txt",quote=FALSE,sep="\t",row.names=FALSE)