## Manhattan plot for hum_to_hum and ham_to_ham alignments in both HEK, A23 and pool averaged RH cells.
## Also, find correlations between HEK293 and human DNA in RH pools as well as correlations between A23 and hamster DNA in RH pools. However, decided not to include these scatterplots in paper as they did not add clarity.

#install.packages("ggplot2")
library(ggplot2)
library(cowplot) #used with plot_grid 

#----------------Aesthetics ---------------------------


theme2 <- theme(
	plot.margin = unit(c(t=1.2,r=0.4,b=1.2,l=0.4), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(), 
	legend.position="none", 
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=12), #numbers on tick marks of x and y axes
	axis.title=element_text(size=14), #titles of x and y axes
	axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
	axis.title.x=element_text(margin=margin(10,0,0,0)),  #moves x axis title by adding margin space to top
	plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=14, face="plain", hjust = 0.5) #hjust shifts right
	)



# darkest two hues from 3-class PuBuGn in color brewer
# cb1<-rep(c("#1c9099", "#a6bddb"), 12)

# # darkest two hues from 3-class PuBu in color brewer
# cb1<-rep(c("#2b8cbe", "#a6bddb"), 12)


# #attractive pinks, greys
# cb1<-c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#E69F00", "#56B4E9", "#E69F00", "#009E73", "#F0E442", "#0072B2", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#D55E00", "#CC79A7")

# cb1_rev <- c("#CC79A7", "#D55E00", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#D55E00", "#0072B2", "#D55E00", "#F0E442", "#009E73", "#56B4E9", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#E69F00","#999999")

# #'4-class RdBu'
# cb2 <- c('#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#f4a582','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#92c5de','#0571b0')

# #'4-class RdYlBu'
# cb3 <- c('#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#fdae61','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#abd9e9','#2c7bb6')
	
	
size_point <- 0.3
size_hline <- 0.1

# If desired, modify balloon code. Probably not a good idea in this context, though.
# balloon_scale <- 0.8 # inflation factor for significant points	
# # scale significant points beginning wiht 0.8 pt
# size_point <- 0.8*(1 + balloon_scale*(bleed$A23_T_HUM_ratio_norm/max(bleed$A23_T_HUM_ratio_norm, na.rm=TRUE)))


# --------------- color -------------------------------

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}



#----------------- A23 align to hamster ---------------------


# read in A23 sequence reads:
A23 <- read.table("A23_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
A23_start <- A23[A23$posS == 0 & A23$posE == 1e6,]

# Get rid of ramp ups and ramp downs (though note that hamster has ramp downs, not ramp ups):
A23 <- A23[c(0,diff(A23$pos)) == 1e4,]

# combine A23 without ramps and A23_start:
A23 <- rbind(A23_start,A23)

# # get rid of contigs with only one entry. Has no action, since A23_gseq.txt already has smallest 5% of contigs removed. cf Create_A23_gseq.R
# A23 <- A23[!(A23$Contig_ID %in% aggregate(pos ~ Contig_ID, 
          # data = A23, 
          # FUN = function(x){NROW(x)})[aggregate(pos ~ Contig_ID, 
          # data = A23, 
          # FUN = function(x){NROW(x)})$pos==1,"Contig_ID"]),]


# Sort:
chrOrder<-paste("chr",c(1:10,"X"),sep="")
A23$Chromosome <-factor(A23$Chromosome, levels=chrOrder)
A23 <- A23[order(A23$Chromosome, A23$pos), ]
A23$Chromosome <- as.character(A23$Chromosome)


# Transform reads into mean ratios
A23$read_ratio <- A23$reads/mean(A23$reads)


# # Transform chr1 etc. to numbers
# A23$Chromosome <- gsub('chr', '', A23$Chromosome)
# A23[A23$Chromosome == "X","Chromosome"] <- 11
# chrOrder<-c(1:11)
# A23$Chromosome <-factor(A23$Chromosome, levels=chrOrder)
# A23 <- A23[order(A23$Chromosome, A23$pos), ]
# A23$Chromosome <- as.numeric(A23$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=A23)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
# gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-11])

# merge genome coordinates with A23
A23 <- merge(A23,gen_coord[,c("Chromosome","coord")])
A23$Chromosome <-factor(A23$Chromosome, levels=chrOrder)
A23 <- A23[order(A23$Chromosome, A23$pos), ]
# A23$Chromosome <- as.numeric(A23$Chromosome)

A23$coord <- A23$pos + A23$coord






#----------------- HAMSTER retain mean RH pools ---------------------


RH_hamster <- read.table("RH_hamster_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
RH_hamster_start <- RH_hamster[RH_hamster$posS == 0 & RH_hamster$posE == 1e6,]

# Get rid of ramp ups and ramp downs (though note that hamster has ramp downs, not ramp ups):
RH_hamster <- RH_hamster[c(0,diff(RH_hamster$pos)) == 1e4,]

# combine RH_hamster without ramps and RH_hamster_start:
RH_hamster <- rbind(RH_hamster_start,RH_hamster)


# # get rid of contigs with only one entry. Has no action, since RH_hamster_gseq.txt already has smallest 5% of contigs removed. cf Create_RH_hamster_gseq.R
# RH_hamster <- RH_hamster[!(RH_hamster$Contig_ID %in% aggregate(pos ~ Contig_ID, 
          # data = RH_hamster, 
          # FUN = function(x){NROW(x)})[aggregate(pos ~ Contig_ID, 
          # data = RH_hamster, 
          # FUN = function(x){NROW(x)})$pos==1,"Contig_ID"]),]

# Sort:
chrOrder<-paste("chr",c(1:10,"X"),sep="")
RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
RH_hamster$Chromosome <- as.character(RH_hamster$Chromosome)


# Transform reads into mean ratios for RH pools only
RH_hamster$read_ratio <- rowMeans(apply(RH_hamster[,c(grep("_w0_d0$",colnames(RH_hamster)))],2,FUN=function(x) {x/mean(x)}))


# # Transform chr1 etc. to numbers
# RH_hamster$Chromosome <- gsub('chr', '', RH_hamster$Chromosome)
# RH_hamster[RH_hamster$Chromosome == "X","Chromosome"] <- 11
# chrOrder<-c(1:11)
# RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
# RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
# RH_hamster$Chromosome <- as.numeric(RH_hamster$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_hamster)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
# gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-11])

# merge genome coordinates with RH_hamster
RH_hamster <- merge(RH_hamster,gen_coord[,c("Chromosome","coord")])
RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
# RH_hamster$Chromosome <- as.numeric(RH_hamster$Chromosome)

RH_hamster$coord <- RH_hamster$pos + RH_hamster$coord


#----------------- HEK align to human ---------------------

# read in HEK293 sequence reads:
HEK <- read.table("HEK293_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
HEK_start <- HEK[HEK$posS == 0 & HEK$posE == 1e6,]

# Get rid of ramp ups and ramp downs:
HEK <- HEK[c(0,diff(HEK$pos)) == 1e4,]

# combine HEK without ramps and HEK_start:
HEK <- rbind(HEK_start,HEK)

# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
HEK$Chromosome <-factor(HEK$Chromosome, levels=chrOrder)
HEK <- HEK[order(HEK$Chromosome, HEK$pos), ]
HEK$Chromosome <- as.character(HEK$Chromosome)


# Transform reads into mean ratios
HEK$read_ratio <- HEK$reads/mean(HEK$reads)


# # Transform chr1 etc. to numbers
# HEK$Chromosome <- gsub('chr', '', HEK$Chromosome)
# HEK[HEK$Chromosome == "X","Chromosome"] <- 23
# HEK[HEK$Chromosome == "Y","Chromosome"] <- 24
# chrOrder<-c(1:24)
# HEK$Chromosome <-factor(HEK$Chromosome, levels=chrOrder)
# HEK <- HEK[order(HEK$Chromosome, HEK$pos), ]
# HEK$Chromosome <- as.numeric(HEK$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=HEK)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
# gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# merge genome coordinates with HEK
HEK <- merge(HEK,gen_coord[,c("Chromosome","coord")])
HEK$Chromosome <-factor(HEK$Chromosome, levels=chrOrder)
HEK <- HEK[order(HEK$Chromosome, HEK$pos), ]
# HEK$Chromosome <- as.numeric(HEK$Chromosome)

HEK$coord <- HEK$pos + HEK$coord

# get rid of chrY
HEK <- HEK[HEK$Chromosome != "chrY",]



#-----------------HUMAN retain mean RH pools---------------------

# Used seq reads for retention graphs to compare on equal footing with hamster and HEK293 "retentions"

# read in RH human sequence reads:
RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
RH_human_start <- RH_human[RH_human$posS == 0 & RH_human$posE == 1e6,]

# Get rid of ramp ups and ramp downs:
RH_human <- RH_human[c(0,diff(RH_human$pos)) == 1e4,]

# combine RH_human without ramps and RH_human_start:
RH_human <- rbind(RH_human_start,RH_human)

# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)



# Mean human reads for RH pools:
RH_human$read_ratio <- rowMeans(apply(RH_human[,c(grep("_w0_d0$",colnames(RH_human)))],2,FUN=function(x) {x/mean(x)}))



#  find middle TK1

gencode_gtf_ensembl_ucsc <- read.delim("gencode_gtf_ensembl_ucsc_v31.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)


gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1",]
      # geneSymbol           tx_id Chromosome    geneS    geneE strand         gene_id      gene_type geneLength txLength cdsLength                                       gene_description    5utrS    5utrE
# 54647        TK1 ENST00000588734      chr17 78174075 78187233      - ENSG00000167900 protein_coding      13159     1681       804 thymidine kinase 1 [Source:HGNC Symbol;Acc:HGNC:11830] 78186995 78187233
      # 5utrDiff    3utrS    3utrE 3utrDiff exonCount
# 54647      239 78174121 78174758      638         6


TK1_coord <- mean(gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1","geneS"],gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1","geneE"])



#  find peak TK1 reads 

# For more accurate search of TK1 peak, restrict search to within delta of TK1, because CEN and TEL becomes larger than TK1 in some samples. (Though not RH pools; other higher peaks usually occur with later time and higher conc samples.) In fact, in RH pools TK1 is highest peak on whole of chr17. But for generality use this approach with delta_1.

delta_1 <- 1e6

TK1_max <- max(RH_human[RH_human$Chromosome=="chr17" & RH_human$pos >= TK1_coord-delta_1 & RH_human$pos <= TK1_coord+delta_1,"read_ratio"])


TK1_max
# [1] 10.58925


# Normalize using peak copy number at TK1:
RH_human$read_ratio <- RH_human$read_ratio/TK1_max


# If desired can normalize using simpler approach of finding max seq reads on chr17:
# RH_human$read_ratio <- RH_human$read_ratio/max(RH_human[RH_human$Chromosome == "chr17","read_ratio"])




# # Transform chr1 etc. to numbers
# RH_human$Chromosome <- gsub('chr', '', RH_human$Chromosome)
# RH_human[RH_human$Chromosome == "X","Chromosome"] <- 23
# RH_human[RH_human$Chromosome == "Y","Chromosome"] <- 24
# chrOrder<-c(1:24)
# RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
# RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
# RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_human)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
# gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# merge genome coordinates with RH_human
RH_human <- merge(RH_human,gen_coord[,c("Chromosome","coord")])
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
# RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

RH_human$coord <- RH_human$pos + RH_human$coord

# get rid of chrY
RH_human <- RH_human[RH_human$Chromosome != "chrY",]






# ~~~~~~~~~~~~~~~~ Figure of correlation plots ~~~~~~~~~~~~~~~~~~~~~~~


# -------------- Hamster genome in RH pools vs A23 cells ------------------


# R and P vals from calx below

# downsample for plotting
A23 = A23[seq(1,nrow(A23),1e2),]
RH_hamster = RH_hamster[seq(1,nrow(RH_hamster),1e2),]


dim(RH_hamster)
# [1] 2043  122

dim(A23)
# [1] 2043    8

A23 <- A23[,c(1:5,7:8)]
colnames(A23)[c(6)] <- c("A23_read_ratio")
RH_hamster <- RH_hamster[,c("Chromosome","Contig_ID","posS","posE","pos","read_ratio","coord")]
colnames(RH_hamster)[6] <- c("RH_hamster_read_ratio")

RH_hamster_A23 <- merge(A23,RH_hamster)

dim(RH_hamster_A23)
# [1] 2043    8

colores_1 <- gg_color_hue(length(unique(RH_hamster_A23$Chromosome)))
names(colores_1) <- levels(RH_hamster_A23$Chromosome)




p1 <- ggplot() + 
		 	geom_point(
			 	data = RH_hamster_A23, 
			 	aes(
				 	x = A23_read_ratio, 
				 	y = RH_hamster_read_ratio, 
				 	color=as.factor(Chromosome)
				 	),
			 	size= 0.5,stroke=0
			 	) +
		    stat_smooth(
			    data = RH_hamster_A23, 
			    aes(
				    x = A23_read_ratio, 
				    y = RH_hamster_read_ratio, 
				    group=1
				    ), 
			    method = "lm", 
			    formula = y ~ x, 
			    se=TRUE, 
			    colour="pink", 
			    size=0.5,
			    fill="grey"
			    ) +
		 	scale_color_manual(
			 	name ="Chromosomes", 
			 	values=colores_1, 
			 	labels=c(1:10,"X")
			 	) +
		 	theme2 +
		 	theme(
				 	plot.margin = unit(c(t=1.7,r=0.6,b=1,l=0.6), "cm"),
		 			legend.position = "right", 
		 			legend.title = element_text(size = 9), 
		 			legend.text = element_text(size = 8),
		 			legend.title.align=0.7,
		 			legend.margin=margin(t=0,r=0,b=0,l=-5,unit = "pt"),
				 	legend.box.margin=margin(t=0,r=-0,b=0,l=-5,unit = "pt"),
				 	legend.key = element_rect(fill = NA),
				 	legend.key.height = unit(0.2, "cm"),
					legend.key.width = unit(0.3, "cm"),
				 	legend.spacing.y = unit(0.1, 'cm'),
				 	legend.spacing.x = unit(0.1, 'cm')
		 			) +
		 	guides(
			 	colour = guide_legend(
				 	override.aes = list(size=1),
				 	ncol=1,
				 	byrow=FALSE
				 	)
			 	) +
		 	annotate("text", x = 0.66, y = 4.15, label= paste("italic('R') == 0.98"), parse=TRUE, size=3) +
		 	annotate("text", x = 1.1, y = 3.85, label= paste("italic('P') < 2.2%*%10^-16"), parse=TRUE, size=3) +
		 	# annotate("text", x = 0.2, y = 4.0, label= expression(atop(italic('R') == 0.98,      italic('P') < 2.2%*%10^-16)), parse=TRUE, size=3, hjust = "left") + ## could not get to left justify
		 	# ggtitle("") + 
		 	xlab("A23 copy number") + 
		 	ylab("Hamster DNA copy number \nin RH pools") + 
		 	theme(axis.title.y=element_text(size=12)) +
		 			# axis.text=element_text(size=9)) +
		 	# labs(subtitle="") #+
		 	scale_x_continuous(breaks = c(0,2,4), labels = c(0,2,4), expand = c(0, 0), limits = c(-0.2,NA)) +
		 	scale_y_continuous(breaks = c(0,2,4), labels = c(0,2,4), expand = c(0, 0), limits = c(-0.2,NA))
print(p1)






# -------------- Human genome in RH pools vs HEK293 cells -------------------


# R and P vals from calx below


# downsample for plotting
HEK <- HEK[seq(1,nrow(HEK),1e2),]
RH_human <- RH_human[seq(1,nrow(RH_human),1e2),]


dim(HEK)
# [1] 3009  122

dim(RH_human)
# [1] 3009    8


colnames(HEK)[6] <- c("HEK_read_ratio")
RH_human <- RH_human[,c("Chromosome","posS","posE","pos","read_ratio","coord")]
colnames(RH_human)[5] <- c("RH_human_read_ratio")


RH_human_HEK <- merge(HEK,RH_human)

dim(RH_human_HEK)
# [1] 3009    8


colores_1 <- gg_color_hue(length(unique(RH_human_HEK$Chromosome)))
names(colores_1) <- levels(RH_human_HEK$Chromosome)[-24]






p2 <- ggplot() + 
		 	geom_point(
			 	data = RH_human_HEK, 
			 	aes(
				 	x = HEK_read_ratio, 
				 	y = RH_human_read_ratio, 
				 	color=as.factor(Chromosome)
				 	),
			 	size= 0.5,stroke=0
			 	) +
		    stat_smooth(
			    data = RH_human_HEK, 
			    aes(
				    x = HEK_read_ratio, 
				    y = RH_human_read_ratio, 
				    group=1
				    ), 
			    method = "lm", 
			    formula = y ~ x, 
			    se=TRUE, 
			    colour="pink", 
			    size=0.5,
			    fill="grey"
			    ) +
		 	scale_color_manual(
			 	name ="Chromosomes", 
			 	values=colores_1, 
			 	labels=c(1:22,"X")
			 	) +
		 	theme2 +
		 	theme(
				 	plot.margin = unit(c(t=1.7,r=0.6,b=1,l=0.6), "cm"),
		 			legend.position = "right", 
		 			legend.title = element_text(size = 9), 
		 			legend.text = element_text(size = 8),
		 			legend.title.align=0.7,
		 			legend.margin=margin(t=0,r=0,b=0,l=-5,unit = "pt"),
				 	legend.box.margin=margin(t=0,r=-0,b=0,l=-5,unit = "pt"),
				 	legend.key = element_rect(fill = NA),
				 	legend.key.height = unit(0.2, "cm"),
					legend.key.width = unit(0.3, "cm"),
				 	legend.spacing.y = unit(0.1, 'cm'),
				 	legend.spacing.x = unit(0.1, 'cm')
		 			) +
		 	guides(
			 	colour = guide_legend(
				 	override.aes = list(size=1),
				 	ncol=2,
				 	byrow=FALSE
				 	)
			 	) +
		 	annotate("text", x = 0.9, y = 1.34, label= paste("italic('R') == 0.47"), parse=TRUE, size=3) +
		 	annotate("text", x = 1.5, y = 1.25, label= paste("italic('P') < 2.2%*%10^-16"), parse=TRUE, size=3) +
		 	# ggtitle("") + 
		 	xlab("HEK293 copy number") + 
		 	ylab("Human DNA copy number \nin RH pools") + 
		 	theme(axis.title.y=element_text(size=12)) + 
		 			# axis.text=element_text(size=9)) +
		 	# labs(subtitle="") +
		 	scale_x_continuous(breaks = c(0,2,4,6), labels = c(0,2,4,6), expand = c(0, 0), limits = c(-0.2,6)) +
		 	scale_y_continuous(breaks = c(0,1), labels = c(0,1), expand = c(0, 0), limits = c(-0.2/4,NA))
print(p2)





#------------------Make file --------------------------



pdf("pools_vs_donors.pdf",width=7.5,height=3.33,useDingbats=FALSE)
plot_grid(p1, p2, labels=c("A", "B"), ncol = 2, nrow = 1, label_size = 16, align="hv",axis="lb")
dev.off()


# png("pools_vs_donors.png",width=7.5,height=3.33,units="in",res=300)
# plot_grid(p1, p2, labels=c("A", "B"), ncol = 2, nrow = 1, label_size = 16)
# dev.off()




# ~~~~~~~~~~~~~~ Corr between A23 copy number and hamster DNA in RH pools ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


# Data used in legend of p1 scatterplot above 


cor.test(RH_hamster_A23$A23_read_ratio, RH_hamster_A23$RH_hamster_read_ratio)

	# Pearson's product-moment correlation

# data:  RH_hamster_A23$A23_read_ratio and RH_hamster_A23$RH_hamster_read_ratio
# t = 252.63, df = 2041, p-value < 2.2e-16
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
 # 0.9829798 0.9856725
# sample estimates:
      # cor 
# 0.9843836 <<<<<<<<<<< use in paper


cor.test(RH_hamster_A23$A23_read_ratio, RH_hamster_A23$RH_hamster_read_ratio)$p.value
# [1] 0 <<<<<<<<<<< use in paper



# ~~~~~~~~~~~~~~ Corr between HEK293 copy number and human DNA in RH pools ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


# Data used in legend of p2 scatterplot above 


cor.test(RH_human_HEK$HEK_read_ratio, RH_human_HEK$RH_human_read_ratio)

	# Pearson's product-moment correlation

# data:  RH_human_HEK$HEK_read_ratio and RH_human_HEK$RH_human_read_ratio
# t = 28.862, df = 3007, p-value < 2.2e-16
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
 # 0.4372972 0.4932759
# sample estimates:
      # cor 
# 0.4657524 <<<<<<<<<<< use in paper



cor.test(RH_human_HEK$HEK_read_ratio, RH_human_HEK$RH_human_read_ratio)$p.value
# [1] 6.718806e-162 <<<<<<<<<<< use in paper














