# Retention of human DNA in the six RH pools

#install.packages("ggplot2")
library(ggplot2)
library(cowplot) #used with plot_grid 

#----------------Aesthetics ---------------------------


theme2 <- theme(
	plot.margin = unit(c(t=1.2,r=0.4,b=1.2,l=0.4), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(), 
	legend.position="none", 
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=12), #numbers on tick marks of x and y axes
	axis.title=element_text(size=14), #titles of x and y axes
	axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
	axis.title.x=element_text(margin=margin(10,0,0,0)),  #moves x axis title by adding margin space to top
	plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=14, face="plain", hjust = 0.5) #hjust shifts right
	)




# darkest two hues from 3-class PuBuGn in color brewer
# cb1<-rep(c("#1c9099", "#a6bddb"), 12)

# # darkest two hues from 3-class PuBu in color brewer
# cb1<-rep(c("#2b8cbe", "#a6bddb"), 12)


# #attractive pinks, greys
# cb1<-c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#E69F00", "#56B4E9", "#E69F00", "#009E73", "#F0E442", "#0072B2", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#D55E00", "#CC79A7")

# cb1_rev <- c("#CC79A7", "#D55E00", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#D55E00", "#0072B2", "#D55E00", "#F0E442", "#009E73", "#56B4E9", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#E69F00","#999999")

# #'4-class RdBu'
# cb2 <- c('#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#f4a582','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#92c5de','#0571b0')

# #'4-class RdYlBu'
# cb3 <- c('#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#fdae61','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#abd9e9','#2c7bb6')
	
	
size_point <- 0.3
size_hline <- 0.1

# If desired, modify balloon code. Probably not a good idea in this context, though.
# balloon_scale <- 0.8 # inflation factor for significant points	
# # scale significant points beginning wiht 0.8 pt
# size_point <- 0.8*(1 + balloon_scale*(bleed$A23_T_HUM_ratio_norm/max(bleed$A23_T_HUM_ratio_norm, na.rm=TRUE)))



#----------------- Prepare human retain RH pools ---------------------


RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
RH_human_start <- RH_human[RH_human$posS == 0 & RH_human$posE == 1e6,]

# Get rid of ramp ups and ramp downs:
RH_human <- RH_human[c(0,diff(RH_human$pos)) == 1e4,]

# combine RH_human without ramps and RH_human_start:
RH_human <- rbind(RH_human_start,RH_human)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <- factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)



# Transform chr1 etc. to numbers
RH_human$Chromosome <- gsub('chr', '', RH_human$Chromosome)
RH_human[RH_human$Chromosome == "X","Chromosome"] <- 23
RH_human[RH_human$Chromosome == "Y","Chromosome"] <- 24
chrOrder<-c(1:24)
RH_human$Chromosome <- factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_human)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# merge genome coordinates with RH_human
RH_human <- merge(RH_human,gen_coord[,c("Chromosome","coord")])
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

RH_human$coord <- RH_human$pos + RH_human$coord

# get rid of chrY
RH_human <- RH_human[RH_human$Chromosome != 24,]

# find midpoints of chromosomes for breaks in ggplot
mid <- function(x) {(max(x)+min(x))/2}
chr_mid <- aggregate(coord~Chromosome,FUN = mid,data=RH_human)
colnames(chr_mid)[2] <- "mid"
chr_mid$Chromosome <-factor(chr_mid$Chromosome, levels=chrOrder)
chr_mid <- chr_mid[order(chr_mid$Chromosome), ]
chr_mid$Chromosome <- as.numeric(chr_mid$Chromosome)

# Define breaks as mid-points chromosomes
breaks <- chr_mid$mid


# attractive grey and skyblue color scheme
cb1<-rep(c("grey", "skyblue"), 12)

labels <- as.character(c(1:9,"",11,"",13,"","",16,"","","",20,"","","X"))

#----------------- human retain RH pool 1 ---------------------


# Find reads for RH pool 1 and normalize using peak copy number at TK1:
RH_human$read_ratio <- RH_human[,c(grep("^RH1_w0_d0$",colnames(RH_human)))]/RH_human[RH_human$Chromosome==17,c(grep("^RH1_w0_d0$",colnames(RH_human)))][which.max(RH_human[RH_human$Chromosome==17,c(grep("^RH1_w0_d0$",colnames(RH_human)))])]


p1 <- ggplot(data = RH_human, aes(x = coord, y = read_ratio, color=as.factor(Chromosome))) + 
	geom_point(size= size_point,stroke=0) +
	geom_text(data = RH_human[RH_human$Chromosome==17,][which.max(RH_human[RH_human$Chromosome==17,"read_ratio"]),], aes(x = coord, y = read_ratio), label="TK1", colour = "black", fontface = "italic", size = 4, nudge_y=0.15) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks, labels = labels) +
	# ggtitle("") + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="RH pool 1")+
	scale_y_continuous(breaks=c(0,1),limit = c(0, 1.5))
print(p1)


#----------------- human retain RH pool 2 ---------------------

# Find reads for RH pool 2 and normalize using peak copy number at TK1:
RH_human$read_ratio <- RH_human[,c(grep("^RH2_w0_d0$",colnames(RH_human)))]/RH_human[RH_human$Chromosome==17,c(grep("^RH2_w0_d0$",colnames(RH_human)))][which.max(RH_human[RH_human$Chromosome==17,c(grep("^RH2_w0_d0$",colnames(RH_human)))])]



p2 <- ggplot(data = RH_human, aes(x = coord, y = read_ratio, color=as.factor(Chromosome))) + 
	geom_point(size= size_point,stroke=0) +
	geom_text(data = RH_human[RH_human$Chromosome==17,][which.max(RH_human[RH_human$Chromosome==17,"read_ratio"]),], aes(x = coord, y = read_ratio), label="TK1", colour = "black", fontface = "italic", size = 4, nudge_y=0.15) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks, labels = labels) +
	# ggtitle("") + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="RH pool 2")+
	scale_y_continuous(breaks=c(0,1),limit = c(0, 1.5))
print(p2)

#----------------- human retain RH pool 3 ---------------------

# Find reads for RH pool 3 and normalize using peak copy number at TK1:
RH_human$read_ratio <- RH_human[,c(grep("^RH3_w0_d0$",colnames(RH_human)))]/RH_human[RH_human$Chromosome==17,c(grep("^RH3_w0_d0$",colnames(RH_human)))][which.max(RH_human[RH_human$Chromosome==17,c(grep("^RH3_w0_d0$",colnames(RH_human)))])]



p3 <- ggplot(data = RH_human, aes(x = coord, y = read_ratio, color=as.factor(Chromosome))) + 
	geom_point(size= size_point,stroke=0) +
	geom_text(data = RH_human[RH_human$Chromosome==17,][which.max(RH_human[RH_human$Chromosome==17,"read_ratio"]),], aes(x = coord, y = read_ratio), label="TK1", colour = "black", fontface = "italic", size = 4, nudge_y=0.15) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks, labels = labels) +
	# ggtitle("") + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="RH pool 3")+
	scale_y_continuous(breaks=c(0,1),limit = c(0, 1.5))
print(p3)

#----------------- human retain RH pool 4 ---------------------

# Find reads for RH pool 4 and normalize using peak copy number at TK1:
RH_human$read_ratio <- RH_human[,c(grep("^RH4_w0_d0$",colnames(RH_human)))]/RH_human[RH_human$Chromosome==17,c(grep("^RH4_w0_d0$",colnames(RH_human)))][which.max(RH_human[RH_human$Chromosome==17,c(grep("^RH4_w0_d0$",colnames(RH_human)))])]



p4 <- ggplot(data = RH_human, aes(x = coord, y = read_ratio, color=as.factor(Chromosome))) + 
	geom_point(size= size_point,stroke=0) +
	geom_text(data = RH_human[RH_human$Chromosome==17,][which.max(RH_human[RH_human$Chromosome==17,"read_ratio"]),], aes(x = coord, y = read_ratio), label="TK1", colour = "black", fontface = "italic", size = 4, nudge_y=0.15) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks, labels = labels) +
	# ggtitle("") + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="RH pool 4")+
	scale_y_continuous(breaks=c(0,1),limit = c(0, 1.5))
print(p4)


#----------------- human retain RH pool 5 ---------------------

# Find reads for RH pool 5 and normalize using peak copy number at TK1:
RH_human$read_ratio <- RH_human[,c(grep("^RH5_w0_d0$",colnames(RH_human)))]/RH_human[RH_human$Chromosome==17,c(grep("^RH5_w0_d0$",colnames(RH_human)))][which.max(RH_human[RH_human$Chromosome==17,c(grep("^RH5_w0_d0$",colnames(RH_human)))])]



p5 <- ggplot(data = RH_human, aes(x = coord, y = read_ratio, color=as.factor(Chromosome))) + 
	geom_point(size= size_point,stroke=0) +
	geom_text(data = RH_human[RH_human$Chromosome==17,][which.max(RH_human[RH_human$Chromosome==17,"read_ratio"]),], aes(x = coord, y = read_ratio), label="TK1", colour = "black", fontface = "italic", size = 4, nudge_y=0.15) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks, labels = labels) +
	# ggtitle("") + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="RH pool 5")+
	scale_y_continuous(breaks=c(0,1),limit = c(0, 1.5))
print(p5)

#----------------- human retain RH pool 6 ---------------------

# Find reads for RH pool 6 and normalize using peak copy number at TK1:
RH_human$read_ratio <- RH_human[,c(grep("^RH6_w0_d0$",colnames(RH_human)))]/RH_human[RH_human$Chromosome==17,c(grep("^RH6_w0_d0$",colnames(RH_human)))][which.max(RH_human[RH_human$Chromosome==17,c(grep("^RH6_w0_d0$",colnames(RH_human)))])]



p6 <- ggplot(data = RH_human, aes(x = coord, y = read_ratio, color=as.factor(Chromosome))) + 
	geom_point(size= size_point,stroke=0) +
	geom_text(data = RH_human[RH_human$Chromosome==17,][which.max(RH_human[RH_human$Chromosome==17,"read_ratio"]),], aes(x = coord, y = read_ratio), label="TK1", colour = "black", fontface = "italic", size = 4, nudge_y=0.15) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks, labels = labels) +
	# ggtitle("") + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="RH pool 6")+
	scale_y_continuous(breaks=c(0,1),limit = c(0, 1.5))
print(p6)





#------------------Make files --------------------------


pdf("Hum_seq_cov_montage_1.pdf",width=7.5,height=10, useDingbats=FALSE)
plot_grid(p1, p2,p3,p4,p5,p6, labels=c("A", "B","C","D","E","F"), ncol = 2, nrow = 3, label_size = 16)
dev.off()


tiff("Hum_seq_cov_montage_1.tif",width=7.5,height=10,units="in",res=300)
plot_grid(p1, p2,p3,p4,p5,p6, labels=c("A", "B","C","D","E","F"), ncol = 2, nrow = 3, label_size = 16)
dev.off()



png("Hum_seq_cov_montage_1.png",width=7.5,height=10,units="in",res=300)
plot_grid(p1, p2,p3,p4,p5,p6, labels=c("A", "B","C","D","E","F"), ncol = 2, nrow = 3, label_size = 16)
dev.off()



png("Hum_seq_cov_montage_hi_res_1.png",width=7.5,height=10,units="in",res=1200)
plot_grid(p1, p2,p3,p4,p5,p6, labels=c("A", "B","C","D","E","F"), ncol = 2, nrow = 3, label_size = 16)
dev.off()





















