# ------------- Combine -log10P files for hamster genome on Hoffman cluster ---------------


logP_list <- list()

for(i in c(1:44)){
	logP_list[[i]] <- read.table(paste0("/u/flashscratch/d/desmond/log10p_raw_sub_",i,".txt"), stringsAsFactors=FALSE,  header=TRUE,sep="\t")
 }

log10p_raw_sub <- do.call(rbind, logP_list)


RH_hamster <- read.table("RH_hamster_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Sort:
chrOrder<-paste("chr",c(1:10,"X","Y"),sep="")
RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
RH_hamster$Chromosome <- as.character(RH_hamster$Chromosome)

# get rid of chrY, because no chrY seq in hamster genome
RH_hamster <- RH_hamster[RH_hamster$Chromosome != "chrY",]


# Do not do this. Keep Contig_ID column! Needed for hamster genome browser.
# RH_hamster <- RH_hamster[,-c(1)]



dim(log10p_raw_sub)
# [1] 217580     28

dim(RH_hamster)
# [1] 217580    120


# merge to make sure all rows present in final file, even if mix mod calc fails, and to re-introduce Contig_ID
log10p_raw_sub_2 <- merge(RH_hamster[,c(1:5)],log10p_raw_sub,all.x=TRUE)

dim(log10p_raw_sub_2)
# [1] 217580     29

# Get Contig_ID as first column
log10p_raw_sub_2 <- log10p_raw_sub_2[,c(5,1:4,6:ncol(log10p_raw_sub_2))]

dim(log10p_raw_sub_2)
# [1] 217580     29


# Sort:
chrOrder<-c(paste("chr",1:10,sep=""),"chrX")
log10p_raw_sub_2$Chromosome <-factor(log10p_raw_sub_2$Chromosome, levels=chrOrder)
log10p_raw_sub_2 <- log10p_raw_sub_2[order(log10p_raw_sub_2$Chromosome, log10p_raw_sub_2$pos), ]
log10p_raw_sub_2$Chromosome <- as.character(log10p_raw_sub_2$Chromosome)

row.names(log10p_raw_sub_2) <- NULL

write.table(log10p_raw_sub_2,"log10P_hamster.txt",quote=FALSE,sep="\t",row.names=FALSE)