## Manhattan plot for hum_to_hum and ham_to_ham alignments in both HEK, A23 and pool averaged RH cells.
## Also, find correlations between HEK293 and human DNA in RH pools as well as correlations between A23 and hamster DNA in RH pools. However, decided not to include these scatterplots in paper as they did not add clarity.

#install.packages("ggplot2")
library(ggplot2)
library(cowplot) #used with plot_grid 

#----------------Aesthetics ---------------------------


theme2 <- theme(
	plot.margin = unit(c(t=1.2,r=0.4,b=1.2,l=0.4), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(), 
	legend.position="none", 
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=9), #numbers on tick marks of x and y axes
	axis.title=element_text(size=9), #titles of x and y axes
	axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
	axis.title.x=element_text(margin=margin(13,0,0,0)),  #moves x axis title by adding margin space to top
	plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=10, face="plain", hjust = 0.5) #hjust shifts right
	)




# darkest two hues from 3-class PuBuGn in color brewer
# cb1<-rep(c("#1c9099", "#a6bddb"), 12)

# # darkest two hues from 3-class PuBu in color brewer
# cb1<-rep(c("#2b8cbe", "#a6bddb"), 12)


# #attractive pinks, greys
# cb1<-c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#E69F00", "#56B4E9", "#E69F00", "#009E73", "#F0E442", "#0072B2", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#D55E00", "#CC79A7")

# cb1_rev <- c("#CC79A7", "#D55E00", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#D55E00", "#0072B2", "#D55E00", "#F0E442", "#009E73", "#56B4E9", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#E69F00","#999999")

# #'4-class RdBu'
# cb2 <- c('#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#f4a582','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#92c5de','#0571b0')

# #'4-class RdYlBu'
# cb3 <- c('#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#fdae61','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#abd9e9','#2c7bb6')
	
	
size_point <- 0.3
size_hline <- 0.1

# If desired, modify balloon code. Probably not a good idea in this context, though.
# balloon_scale <- 0.8 # inflation factor for significant points	
# # scale significant points beginning wiht 0.8 pt
# size_point <- 0.8*(1 + balloon_scale*(bleed$A23_T_HUM_ratio_norm/max(bleed$A23_T_HUM_ratio_norm, na.rm=TRUE)))



#----------------- A23 align to hamster (1) ---------------------


# read in A23 sequence reads:
A23 <- read.table("A23_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
A23_start <- A23[A23$posS == 0 & A23$posE == 1e6,]

# Get rid of ramp ups and ramp downs (though note that hamster has ramp downs, not ramp ups):
A23 <- A23[c(0,diff(A23$pos)) == 1e4,]

# combine A23 without ramps and A23_start:
A23 <- rbind(A23_start,A23)

# # get rid of contigs with only one entry. Has no action, since A23_gseq.txt already has smallest 5% of contigs removed. cf Create_A23_gseq.R
# A23 <- A23[!(A23$Contig_ID %in% aggregate(pos ~ Contig_ID, 
          # data = A23, 
          # FUN = function(x){NROW(x)})[aggregate(pos ~ Contig_ID, 
          # data = A23, 
          # FUN = function(x){NROW(x)})$pos==1,"Contig_ID"]),]


# Sort:
chrOrder<-paste("chr",c(1:10,"X"),sep="")
A23$Chromosome <-factor(A23$Chromosome, levels=chrOrder)
A23 <- A23[order(A23$Chromosome, A23$pos), ]
A23$Chromosome <- as.character(A23$Chromosome)


# Transform reads into mean ratios
A23$read_ratio <- A23$reads/mean(A23$reads)


# Transform chr1 etc. to numbers
A23$Chromosome <- gsub('chr', '', A23$Chromosome)
A23[A23$Chromosome == "X","Chromosome"] <- 11
chrOrder<-c(1:11)
A23$Chromosome <-factor(A23$Chromosome, levels=chrOrder)
A23 <- A23[order(A23$Chromosome, A23$pos), ]
A23$Chromosome <- as.numeric(A23$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=A23)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-11])

# merge genome coordinates with A23
A23 <- merge(A23,gen_coord[,c("Chromosome","coord")])
A23$Chromosome <-factor(A23$Chromosome, levels=chrOrder)
A23 <- A23[order(A23$Chromosome, A23$pos), ]
A23$Chromosome <- as.numeric(A23$Chromosome)

A23$coord <- A23$pos + A23$coord

# find midpoints of chromosomes for breaks in ggplot
mid <- function(x) {(max(x)+min(x))/2}
chr_mid <- aggregate(coord~Chromosome,FUN = mid,data=A23)
colnames(chr_mid)[2] <- "mid"
chr_mid$Chromosome <-factor(chr_mid$Chromosome, levels=chrOrder)
chr_mid <- chr_mid[order(chr_mid$Chromosome), ]
chr_mid$Chromosome <- as.numeric(chr_mid$Chromosome)

# Define breaks as mid-points chromosomes
breaks <- chr_mid$mid


# attractive grey and skyblue color scheme
cb1<-c(rep(c("grey", "skyblue"), 5),"grey")

labels <- as.character(c(1:8,"","","X"))


p1 <- ggplot(data = A23, aes(x = coord, y = read_ratio, color=as.factor(Chromosome))) + 
	geom_point(size= size_point,stroke=0) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks, labels = labels) + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="A23")+
	scale_y_continuous(breaks=c(0,1,2),limit = c(0, 2))
print(p1)





#----------------- HAMSTER retain mean RH pools (2) ---------------------


RH_hamster <- read.table("RH_hamster_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
RH_hamster_start <- RH_hamster[RH_hamster$posS == 0 & RH_hamster$posE == 1e6,]

# Get rid of ramp ups and ramp downs (though note that hamster has ramp downs, not ramp ups):
RH_hamster <- RH_hamster[c(0,diff(RH_hamster$pos)) == 1e4,]

# combine RH_hamster without ramps and RH_hamster_start:
RH_hamster <- rbind(RH_hamster_start,RH_hamster)


# # get rid of contigs with only one entry. Has no action, since RH_hamster_gseq.txt already has smallest 5% of contigs removed. cf Create_RH_hamster_gseq.R
# RH_hamster <- RH_hamster[!(RH_hamster$Contig_ID %in% aggregate(pos ~ Contig_ID, 
          # data = RH_hamster, 
          # FUN = function(x){NROW(x)})[aggregate(pos ~ Contig_ID, 
          # data = RH_hamster, 
          # FUN = function(x){NROW(x)})$pos==1,"Contig_ID"]),]

# Sort:
chrOrder<-paste("chr",c(1:10,"X"),sep="")
RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
RH_hamster$Chromosome <- as.character(RH_hamster$Chromosome)


# Transform reads into mean ratios for RH pools only
RH_hamster$read_ratio <- rowMeans(apply(RH_hamster[,c(grep("_w0_d0$",colnames(RH_hamster)))],2,FUN=function(x) {x/mean(x)}))


# Transform chr1 etc. to numbers
RH_hamster$Chromosome <- gsub('chr', '', RH_hamster$Chromosome)
RH_hamster[RH_hamster$Chromosome == "X","Chromosome"] <- 11
chrOrder<-c(1:11)
RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
RH_hamster$Chromosome <- as.numeric(RH_hamster$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_hamster)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-11])

# merge genome coordinates with RH_hamster
RH_hamster <- merge(RH_hamster,gen_coord[,c("Chromosome","coord")])
RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
RH_hamster$Chromosome <- as.numeric(RH_hamster$Chromosome)

RH_hamster$coord <- RH_hamster$pos + RH_hamster$coord

# find midpoints of chromosomes for breaks in ggplot
mid <- function(x) {(max(x)+min(x))/2}
chr_mid <- aggregate(coord~Chromosome,FUN = mid,data=RH_hamster)
colnames(chr_mid)[2] <- "mid"
chr_mid$Chromosome <-factor(chr_mid$Chromosome, levels=chrOrder)
chr_mid <- chr_mid[order(chr_mid$Chromosome), ]
chr_mid$Chromosome <- as.numeric(chr_mid$Chromosome)

# Define breaks as mid-points chromosomes
breaks <- chr_mid$mid


# attractive grey and skyblue color scheme
cb1<-c(rep(c("grey", "skyblue"), 5),"grey")

labels <- as.character(c(1:8,"","","X"))


p2 <- ggplot(data = RH_hamster, aes(x = coord, y = read_ratio, color=as.factor(Chromosome))) + 
	geom_point(size= size_point,stroke=0) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks, labels = labels) + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="Hamster genome in RH pools")+
	scale_y_continuous(breaks=c(0,1,2),limit = c(0, 2))
print(p2)


#----------------- HEK align to human (3) ---------------------

# read in HEK293 sequence reads:
HEK <- read.table("HEK293_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
HEK_start <- HEK[HEK$posS == 0 & HEK$posE == 1e6,]

# Get rid of ramp ups and ramp downs:
HEK <- HEK[c(0,diff(HEK$pos)) == 1e4,]

# combine HEK without ramps and HEK_start:
HEK <- rbind(HEK_start,HEK)

# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
HEK$Chromosome <-factor(HEK$Chromosome, levels=chrOrder)
HEK <- HEK[order(HEK$Chromosome, HEK$pos), ]
HEK$Chromosome <- as.character(HEK$Chromosome)


# Transform reads into mean ratios
HEK$read_ratio <- HEK$reads/mean(HEK$reads)


# Transform chr1 etc. to numbers
HEK$Chromosome <- gsub('chr', '', HEK$Chromosome)
HEK[HEK$Chromosome == "X","Chromosome"] <- 23
HEK[HEK$Chromosome == "Y","Chromosome"] <- 24
chrOrder<-c(1:24)
HEK$Chromosome <-factor(HEK$Chromosome, levels=chrOrder)
HEK <- HEK[order(HEK$Chromosome, HEK$pos), ]
HEK$Chromosome <- as.numeric(HEK$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=HEK)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# merge genome coordinates with HEK
HEK <- merge(HEK,gen_coord[,c("Chromosome","coord")])
HEK$Chromosome <-factor(HEK$Chromosome, levels=chrOrder)
HEK <- HEK[order(HEK$Chromosome, HEK$pos), ]
HEK$Chromosome <- as.numeric(HEK$Chromosome)

HEK$coord <- HEK$pos + HEK$coord

# get rid of chrY
HEK <- HEK[HEK$Chromosome != 24,]

# find midpoints of chromosomes for breaks in ggplot
mid <- function(x) {(max(x)+min(x))/2}
chr_mid <- aggregate(coord~Chromosome,FUN = mid,data=HEK)
colnames(chr_mid)[2] <- "mid"
chr_mid$Chromosome <-factor(chr_mid$Chromosome, levels=chrOrder)
chr_mid <- chr_mid[order(chr_mid$Chromosome), ]
chr_mid$Chromosome <- as.numeric(chr_mid$Chromosome)

# Define breaks as mid-points chromosomes
breaks <- chr_mid$mid


# attractive grey and skyblue color scheme
cb1<-rep(c("grey", "skyblue"), 12)

labels <- as.character(c(1:9,"",11,"",13,"","",16,"","","",20,"","","X"))


p3 <- ggplot(data = HEK, aes(x = coord, y = read_ratio, color=as.factor(Chromosome))) + 
	geom_point(size= size_point,stroke=0) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks, labels = labels) + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="HEK293")+
	scale_y_continuous(breaks=c(0,2,4),limit = c(0, 5))
print(p3)


#-----------------HUMAN retain mean RH pools (4) ---------------------

# Used seq reads for retention graphs to compare on equal footing with hamster and HEK293 "retentions"

# Prepare human retain RH pools 

RH_human_TK1 <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
RH_human_TK1_start <- RH_human_TK1[RH_human_TK1$posS == 0 & RH_human_TK1$posE == 1e6,]

# Get rid of ramp ups and ramp downs:
RH_human_TK1 <- RH_human_TK1[c(0,diff(RH_human_TK1$pos)) == 1e4,]

# combine RH_human_TK1 without ramps and RH_human_TK1_start:
RH_human_TK1 <- rbind(RH_human_TK1_start,RH_human_TK1)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human_TK1$Chromosome <-factor(RH_human_TK1$Chromosome, levels=chrOrder)
RH_human_TK1 <- RH_human_TK1[order(RH_human_TK1$Chromosome, RH_human_TK1$pos), ]
RH_human_TK1$Chromosome <- as.character(RH_human_TK1$Chromosome)


# # Transform chr1 etc. to numbers
RH_human_TK1$Chromosome <- gsub('chr', '', RH_human_TK1$Chromosome)
RH_human_TK1[RH_human_TK1$Chromosome == "X","Chromosome"] <- 23
RH_human_TK1[RH_human_TK1$Chromosome == "Y","Chromosome"] <- 24
chrOrder<-c(1:24)
RH_human_TK1$Chromosome <-factor(RH_human_TK1$Chromosome, levels=chrOrder)
RH_human_TK1 <- RH_human_TK1[order(RH_human_TK1$Chromosome, RH_human_TK1$pos), ]
RH_human_TK1$Chromosome <- as.numeric(RH_human_TK1$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_human_TK1)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# merge genome coordinates with RH_human_TK1
RH_human_TK1 <- merge(RH_human_TK1,gen_coord[,c("Chromosome","coord")])
RH_human_TK1$Chromosome <-factor(RH_human_TK1$Chromosome, levels=chrOrder)
RH_human_TK1 <- RH_human_TK1[order(RH_human_TK1$Chromosome, RH_human_TK1$pos), ]
RH_human_TK1$Chromosome <- as.numeric(RH_human_TK1$Chromosome)

RH_human_TK1$coord <- RH_human_TK1$pos + RH_human_TK1$coord

# Decided to get rid of chrY (cf below), because even though its reads shd contribute to chrX, large segments chrY are non-pseudoautosomal, so artifactually decreases genome median.
# get rid of chrY, because no chrY seq in hamster genome
RH_human_TK1 <- RH_human_TK1[RH_human_TK1$Chromosome != 24,]



RH_human_TK1 <- RH_human_TK1[,c(1:4, ncol(RH_human_TK1), grep("_w0_d0$",colnames(RH_human_TK1)))]


#  find middle TK1 

gencode_gtf_ensembl_ucsc <- read.delim("gencode_gtf_ensembl_ucsc_v31.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)


gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1",]
     # Chromosome         gene_id           tx_id geneSymbol strand    geneS    geneE geneLength txLength cdsLength    5utrS    5utrE 5utrDiff    3utrS    3utrE 3utrDiff
# 49401      chr17 ENSG00000167900 ENST00000588734        TK1      - 78174091 78187233      13143     1681       804 78186995 78187233      239 78174121 78174758      638
      # exonCount      gene_type                                       gene_description
# 49401         6 protein_coding thymidine kinase 1 [Source:HGNC Symbol;Acc:HGNC:11830]


TK1_coord <- mean(gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1","geneS"],gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1","geneE"])






#  find peak TK1 reads 

# for TK1 retention, here, peak TK1 reads are vector for individual samples, unlike for seq retention where peak TK1 from average across six RH pools is used.


# For more accurate search of TK1 peak, restrict search to within delta of TK1, because CEN and TEL becomes larger than TK1 in some samples
delta_1 <- 1e6

RH_human_TK1_subset <- RH_human_TK1[RH_human_TK1$Chromosome==17 & RH_human_TK1$pos >= TK1_coord-delta_1 & RH_human_TK1$pos <= TK1_coord+delta_1,]

TK1_max <- apply(RH_human_TK1_subset[,c(6:(ncol(RH_human_TK1_subset)))],2,max)

TK1_max_coord <- apply(RH_human_TK1_subset[,c(5:(ncol(RH_human_TK1_subset)-1))],2,FUN= function(x) {RH_human_TK1_subset[,"coord"][which.max(x)]})




#  calculate retention assuming TK1 is 100% retention 




for(i in names(TK1_max)) {
RH_human_TK1[,i] <- RH_human_TK1[,i]/TK1_max[i]
}


RH_human_TK1$mean_retent <- rowMeans(RH_human_TK1[,c("RH1_w0_d0", "RH2_w0_d0", "RH3_w0_d0", "RH4_w0_d0", "RH5_w0_d0", "RH6_w0_d0")])

sem <- function(x) {sqrt(var(x,na.rm=TRUE)/sum(!is.na(x)))}


RH_human_TK1$sem_retent <- apply(RH_human_TK1[,c("RH1_w0_d0", "RH2_w0_d0", "RH3_w0_d0", "RH4_w0_d0", "RH5_w0_d0", "RH6_w0_d0")], 1, FUN = function(x) {sem(x)})


# renormalize TK1 peak to 1, to account for slight differences in TK1 peak in the six RH pools.
# For more accurate search of TK1 peak, restrict search to within delta of TK1, because CEN and TEL becomes larger than TK1 in some samples
delta_1 <- 1e6

RH_human_TK1_subset <- RH_human_TK1[RH_human_TK1$Chromosome==17 & RH_human_TK1$pos >= TK1_coord-delta_1 & RH_human_TK1$pos <= TK1_coord+delta_1,]

TK1_max <- max(RH_human_TK1_subset$mean_retent)

TK1_max_coord <- RH_human_TK1_subset[which.max(RH_human_TK1_subset$mean_retent),"coord"]

RH_human_TK1$mean_retent <- RH_human_TK1$mean_retent/TK1_max
RH_human_TK1$sem_retent <- RH_human_TK1$sem_retent/TK1_max


# RH_human_TK1$mean_retent_upper <- RH_human_TK1$mean_retent + RH_human_TK1$sem_retent
# RH_human_TK1$mean_retent_lower <- RH_human_TK1$mean_retent - RH_human_TK1$sem_retent






# balloon_scale <- 0.5 # inflation factor for significant points	
# size_point <- 0.1*(1 + balloon_scale*(logP[logP$Chromosome==paste0("chr",i),"log10p_g_0nM"]/max(logP[logP$Chromosome==paste0("chr",i),"log10p_g_0nM"]))) # scale significant points



# If desired, modify balloon code. Probably not a good idea in this context, though.
# balloon_scale <- 0.8 # inflation factor for significant points	
# # scale significant points beginning wiht 0.8 pt
# size_point <- 0.8*(1 + balloon_scale*(bleed$A23_T_HUM_ratio_norm/max(bleed$A23_T_HUM_ratio_norm, na.rm=TRUE)))


# find midpoints of chromosomes for breaks in ggplot. Not necessary for single chr
mid <- function(x) {(max(x)+min(x))/2}
chr_mid <- aggregate(coord~Chromosome,FUN = mid,data= RH_human_TK1)
colnames(chr_mid)[2] <- "mid"
chr_mid$Chromosome <-factor(chr_mid$Chromosome, levels=chrOrder)
chr_mid <- chr_mid[order(chr_mid$Chromosome), ]
chr_mid$Chromosome <- as.numeric(chr_mid$Chromosome)

# Define breaks as mid-points chromosomes
breaks <- chr_mid$mid


# attractive grey and skyblue color scheme
cb1<-rep(c("grey", "skyblue"), 12)

labels <- as.character(c(1:9,"",11,"",13,"","",16,"","","",20,"","","X"))





p4 <- ggplot() + 
		geom_point(
			data = RH_human_TK1, 
			aes(x = coord/1e6, 
				y = mean_retent,
				color=as.factor(Chromosome)
				),
			size= size_point,
			stroke=0
			) +
		# geom_ribbon(
			# data=RH_human_TK1,
			# aes(
				# x= coord/1e6, 
				# ymin = mean_retent-sem_retent, 
				# ymax = mean_retent+sem_retent
				# ),
			# lwd=0.2,
			# fill="grey50",
		    # alpha=0.3,
			# show.legend=FALSE
			# ) +
		geom_text(
			data = RH_human_TK1[RH_human_TK1$Chromosome==17,][which.max(RH_human_TK1[RH_human_TK1$Chromosome==17,"mean_retent"]),], 
			aes(
				x = coord/1e6, 
				y = mean_retent
				), 
			label="TK1", 
			colour = "black", 
			size = 3, 
			nudge_y = 0.1
			) +
	scale_color_manual(values=cb1) +
	theme2 +
	scale_x_continuous(breaks = breaks/1e6, labels = labels) + 
	xlab("Chromosome") + 
	ylab("Copy") + 
	labs(subtitle="Human genome in RH pools") +
	scale_y_continuous(breaks=c(0,1),limit = c(0, 1.5))
print(p4)





# ------------------------ plot chr2 retent +/- sem (5) -------------------------------





cen <- read.table("hg38_centromere.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)







p5 <- ggplot() + 
		geom_point(
			data = RH_human_TK1[RH_human_TK1$Chromosome == 2,], 
			aes(x = pos/1e6, 
				y = mean_retent
				),
			size= size_point,
			color="blue",
			stroke=0
			) +
		geom_ribbon(
			data=RH_human_TK1[RH_human_TK1$Chromosome == 2,],
			aes(
				x= pos/1e6, 
				ymin = mean_retent-sem_retent, 
				ymax = mean_retent+sem_retent
				),
			lwd=0.2,
			fill="grey50",
		    alpha=0.3,
			show.legend=FALSE
			) +
		geom_text(
			data = data.frame(y = 0.5, pos = mean(c(cen[cen$name=="GJ211860.1","chromStart"],cen[cen$name=="GJ211860.1","chromEnd"]))), 
			aes(
				x = pos/1e6, 
				y = y
				), 
			label="CEN", 
			colour = "black", 
			size = 3, 
			nudge_y = 0
			) +
		geom_segment(
			aes(
				x = cen[cen$name=="GJ211860.1","chromStart"]/1e6, 
				y = -0.02, 
				xend = cen[cen$name=="GJ211860.1","chromEnd"]/1e6, 
				yend = -0.02),
			color = "red",
			lwd = 2
			) +
		# geom_segment(
			# aes(
				# x = cen[cen$name=="GJ212053.1","chromStart"]/1e6, 
				# y = -0.02, 
				# xend = cen[cen$name=="GJ212053.1","chromEnd"]/1e6, 
				# yend = -0.02),
			# color="red"
			# ) +
		# geom_segment(
			# aes(
				# x = cen[cen$name=="GJ212055.1","chromStart"]/1e6, 
				# y = -0.03, 
				# xend = cen[cen$name=="GJ212055.1","chromEnd"]/1e6, 
				# yend = -0.03),
			# color="red"
			# ) +
	theme2 +
	# scale_x_continuous(breaks = breaks, labels = labels) + 
	scale_x_continuous() + 
	xlab("Chromosome 2 (Mb)") + 
	ylab("Copy") + 
	labs(subtitle="Human genome in RH pools") +
	scale_y_continuous(breaks=c(0,0.5),limit = c(-0.02, 0.55))
print(p5)








# -------------------- plot chr17 retent +/- sem (6) -------------------------------------------







p6 <- ggplot() + 
		geom_point(
			data = RH_human_TK1[RH_human_TK1$Chromosome == 17,], 
			aes(x = pos/1e6, 
				y = mean_retent
				),
			size= size_point,
			color="blue",
			stroke=0
			) +
		geom_ribbon(
			data=RH_human_TK1[RH_human_TK1$Chromosome == 17,],
			aes(
				x= pos/1e6, 
				ymin = mean_retent-sem_retent, 
				ymax = mean_retent+sem_retent
				),
			lwd=0.2,
			fill="grey50",
		    alpha=0.3,
			show.legend=FALSE
			) +
		geom_text(
			data = RH_human_TK1[RH_human_TK1$Chromosome==17,][which.max(RH_human_TK1[RH_human_TK1$Chromosome==17,"mean_retent"]),], 
			aes(
				x = pos/1e6, 
				y = mean_retent
				), 
			label="TK1", 
			colour = "black", 
			size = 3, 
			nudge_y = 0.05
			) +
		geom_text(
			data = data.frame(y = 0.59, pos = mean(c(cen[cen$name=="GJ212053.1","chromStart"],cen[cen$name=="GJ212055.1","chromEnd"]))), 
			aes(
				x = pos/1e6, 
				y = y
				), 
			label="CEN", 
			colour = "black", 
			size = 3, 
			nudge_y = 0
			) +
		geom_segment(
			aes(
				x = cen[cen$name=="GJ212053.1","chromStart"]/1e6, 
				y = -0.02, 
				xend = cen[cen$name=="GJ212055.1","chromEnd"]/1e6, 
				yend = -0.02),
			color="red",
			lwd = 2
			) +
		# geom_segment(
			# aes(
				# x = cen[cen$name=="GJ212053.1","chromStart"]/1e6, 
				# y = -0.02, 
				# xend = cen[cen$name=="GJ212053.1","chromEnd"]/1e6, 
				# yend = -0.02),
			# color="red"
			# ) +
		# geom_segment(
			# aes(
				# x = cen[cen$name=="GJ212055.1","chromStart"]/1e6, 
				# y = -0.03, 
				# xend = cen[cen$name=="GJ212055.1","chromEnd"]/1e6, 
				# yend = -0.03),
			# color="red"
			# ) +
	theme2 +
	# scale_x_continuous(breaks = breaks, labels = labels) + 
	scale_x_continuous() + 
	xlab("Chromosome 17 (Mb)") + 
	ylab("Copy") + 
	labs(subtitle="Human genome in RH pools")+
	scale_y_continuous(breaks=c(0,1),limit = c(-0.02, 1.1))
print(p6)






#------------------Make file --------------------------



png("HEK_A23_align_retain_sem.png",width=7.5,height=10,units="in",res=300)
plot_grid(p1, p2, p3, p4, p5, p6, labels=c("A", "B", "C", "D", "E", "F"), ncol = 2, nrow = 3, label_size = 14)
dev.off()


















