sem <- function(x) {sqrt(var(x,na.rm = TRUE)/sum(!is.na(x)))}

#----------------- Prepare human gseq data ---------------------


RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Code below is to get rid of up and down ramps for copy number changes. 
# Should not be used here, where doing P val analyses.
# # Get rows at beginning of each chromosome:
# RH_human_start <- RH_human[RH_human$posS == 0 & RH_human$posE == 1e6,]

# # Get rid of ramp ups and ramp downs:
# RH_human <- RH_human[c(0,diff(RH_human$pos)) == 1e4,]

# # combine RH_human without ramps and RH_human_start:
# RH_human <- rbind(RH_human_start,RH_human)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)


# # Transform chr1 etc. to numbers
# RH_human$Chromosome <- gsub('chr', '', RH_human$Chromosome)
# RH_human[RH_human$Chromosome == "X","Chromosome"] <- 23
# RH_human[RH_human$Chromosome == "Y","Chromosome"] <- 24
# chrOrder<-c(1:24)
# RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
# RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
# RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

# # Compute chromosome size
# gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_human)
# colnames(gen_coord)[2] <- "chr_size"
# gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
# gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
# gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# # Use cumsum to make genome coordinates
# gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# # merge genome coordinates with RH_human
# RH_human <- merge(RH_human,gen_coord[,c("Chromosome","coord")])
# RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
# RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
# RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

# RH_human$coord <- RH_human$pos + RH_human$coord

# get rid of chrY, because no chrY seq in hamster genome
RH_human <- RH_human[RH_human$Chromosome != "chrY",]

# # Get rid of unneeded coord column at end of RH_human
# RH_human <- RH_human[,-ncol(RH_human)]





#----------------- Prepare hamster gseq data ---------------------


RH_hamster <- read.table("RH_hamster_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Code below is to get rid of up and down ramps for copy number changes. 
# Should not be used here, where doing P val analyses.
# # Get rows at beginning of each chromosome:
# RH_hamster_start <- RH_hamster[RH_hamster$posS == 0 & RH_hamster$posE == 1e6,]

# # Get rid of ramp ups and ramp downs:
# RH_hamster <- RH_hamster[c(0,diff(RH_hamster$pos)) == 1e4,]

# # combine RH_hamster without ramps and RH_hamster_start:
# RH_hamster <- rbind(RH_hamster_start,RH_hamster)


# Sort:
chrOrder<-paste("chr",c(1:10,"X","Y"),sep="")
RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
RH_hamster$Chromosome <- as.character(RH_hamster$Chromosome)


# # Transform chr1 etc. to numbers
# RH_hamster$Chromosome <- gsub('chr', '', RH_hamster$Chromosome)
# RH_hamster[RH_hamster$Chromosome == "X","Chromosome"] <- 23
# RH_hamster[RH_hamster$Chromosome == "Y","Chromosome"] <- 24
# chrOrder<-c(1:24)
# RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
# RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
# RH_hamster$Chromosome <- as.numeric(RH_hamster$Chromosome)

# # Compute chromosome size
# gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_hamster)
# colnames(gen_coord)[2] <- "chr_size"
# gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
# gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
# gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# # Use cumsum to make genome coordinates
# gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# # merge genome coordinates with RH_hamster
# RH_hamster <- merge(RH_hamster,gen_coord[,c("Chromosome","coord")])
# RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
# RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
# RH_hamster$Chromosome <- as.numeric(RH_hamster$Chromosome)

# RH_hamster$coord <- RH_hamster$pos + RH_hamster$coord

# get rid of chrY, because no chrY seq in hamster genome
RH_hamster <- RH_hamster[RH_hamster$Chromosome != "chrY",]


# get rid of Contig_ID column
RH_hamster <- RH_hamster[,-c(1)]





# ----------------- Human read calx -------------------

# Overlapping
mean(colMeans(RH_human[,c(5:ncol(RH_human))]))
# [1] 237.1299

sem(colMeans(RH_human[,c(5:ncol(RH_human))]))
# [1] 11.61357

# Used non-overlapping in paper (more accurate sem)

# Non-overlapping (makes hardly any difference)
mean(colMeans(RH_human[seq(1,nrow(RH_human),100),c(5:ncol(RH_human))]))
# [1] 237.1227 <<<<<<<<<<<<< use in paper

sem(colMeans(RH_human[seq(1,nrow(RH_human),100),c(5:ncol(RH_human))]))
# [1] 11.61316 <<<<<<<<<<<<< use in paper



# ----------------- hamster read calx -------------------


# Overlapping
mean(colMeans(RH_hamster[,c(5:ncol(RH_hamster))]))
# [1] 13788.79

sem(colMeans(RH_hamster[,c(5:ncol(RH_hamster))]))
# [1] 234.0124

# Used non-overlapping in paper (more accurate sem)

# Non-overlapping (makes hardly any difference)
mean(colMeans(RH_hamster[seq(1,nrow(RH_hamster),100),c(5:ncol(RH_hamster))]))
# [1] 13798.44 <<<<<<<<<<<< use in paper

sem(colMeans(RH_hamster[seq(1,nrow(RH_hamster),100),c(5:ncol(RH_hamster))]))
# [1] 234.1414 <<<<<<<<<<<< use in paper























