# plot relative changes in human DNA copy number. Note these are relative changes in log2 scale and are not retention, which shows systematic decrease with time (and less so with drug)

library(cowplot) # for plot_grid
library(reshape2) # for melt
library(ggplot2) # for ggplot

RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
RH_human_start <- RH_human[RH_human$posS == 0 & RH_human$posE == 1e6,]

# Chose to get rid of ramp ups and ramp downs because no compensation for low seq reads like in mixed model. In any event, makes little difference.
# Get rid of ramp ups and ramp downs (though note that RH_human has ramp downs, not ramp ups):
RH_human <- RH_human[c(0,diff(RH_human$pos)) == 1e4,]

# combine RH_human without ramps and RH_human_start:
RH_human <- rbind(RH_human_start,RH_human)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)

# log transform
RH_human_log <- RH_human

# Chose to downsample, because helps remove window autocorrelations and more accurate. 
# Results with overlapping windows will be more similar to copy number change vs genome coord figures. In any event, makes little difference. 
# To not downsample and use overlapping 1 Mb windows, comment out following line of code: 
RH_human_log <- RH_human[seq(1,nrow(RH_human),1e2),]


RH_human_log[,c(5:ncol(RH_human_log))] <- log2(RH_human_log[,c(5:ncol(RH_human_log))])


# mean center
for(i in c(5:ncol(RH_human_log))) {RH_human_log[,i] <- RH_human_log[,i] - mean(RH_human_log[,i][!is.infinite(RH_human_log[,i])])}


# ---------- differences in log2 copy number as a result of growth ---------------

RH_human_log_delta_g <- RH_human_log[,grepl("RH1_w._d0",colnames(RH_human_log))]-RH_human_log$RH1_w0_d0
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH1_w._d8",colnames(RH_human_log))]-RH_human_log$RH1_w1_d8)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH1_w._d25",colnames(RH_human_log))]-RH_human_log$RH1_w1_d25)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH1_w._d75",colnames(RH_human_log))]-RH_human_log$RH1_w1_d75)



RH_human_log_delta_g <- cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH2_w._d0",colnames(RH_human_log))]-RH_human_log$RH2_w0_d0)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH2_w._d8",colnames(RH_human_log))]-RH_human_log$RH2_w1_d8)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH2_w._d25",colnames(RH_human_log))]-RH_human_log$RH2_w1_d25)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH2_w._d75",colnames(RH_human_log))]-RH_human_log$RH2_w1_d75)



RH_human_log_delta_g <- cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH3_w._d0",colnames(RH_human_log))]-RH_human_log$RH3_w0_d0)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH3_w._d8",colnames(RH_human_log))]-RH_human_log$RH3_w1_d8)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH3_w._d25",colnames(RH_human_log))]-RH_human_log$RH3_w1_d25)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH3_w._d75",colnames(RH_human_log))]-RH_human_log$RH3_w1_d75)




RH_human_log_delta_g <- cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH4_w._d0",colnames(RH_human_log))]-RH_human_log$RH4_w0_d0)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH4_w._d8",colnames(RH_human_log))]-RH_human_log$RH4_w1_d8)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH4_w._d25",colnames(RH_human_log))]-RH_human_log$RH4_w1_d25)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH4_w._d75",colnames(RH_human_log))]-RH_human_log$RH4_w1_d75)



RH_human_log_delta_g <- cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH5_w._d0",colnames(RH_human_log))]-RH_human_log$RH5_w0_d0)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH5_w._d8",colnames(RH_human_log))]-RH_human_log$RH5_w1_d8)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH5_w._d25",colnames(RH_human_log))]-RH_human_log$RH5_w1_d25)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH5_w._d75",colnames(RH_human_log))]-RH_human_log$RH5_w3_d75)



RH_human_log_delta_g <- cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH6_w._d0",colnames(RH_human_log))]-RH_human_log$RH6_w0_d0)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH6_w._d8",colnames(RH_human_log))]-RH_human_log$RH6_w1_d8)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH6_w._d25",colnames(RH_human_log))]-RH_human_log$RH6_w1_d25)
RH_human_log_delta_g <-cbind(RH_human_log_delta_g,RH_human_log[,grepl("RH6_w._d75",colnames(RH_human_log))]-RH_human_log$RH6_w3_d75)

# transform -Inf values into NA
is.na(RH_human_log_delta_g) <- do.call(cbind,lapply(RH_human_log_delta_g, is.infinite))

# remove columns with zeros only (because subtracted column from itself)
RH_human_log_delta_g <- RH_human_log_delta_g[,colSums(RH_human_log_delta_g, na.rm=TRUE)!=0]


# ----------------------------- ggplot ---------------------------------



# from https://stackoverflow.com/questions/8197559/emulate-ggplot2-default-color-palette
# function for default ggplot2 colors

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

	
theme2 <- theme(
	plot.margin = unit(c(t=1.2,r=0.4,b=1.2,l=0.4), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(),
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=12), #numbers on tick marks of x and y axes
	axis.title=element_text(size=14), #titles of x and y axes
	axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
	axis.title.x=element_text(margin=margin(10,0,0,0)),  #moves x axis title by adding margin space to top
	plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=14, face="plain", hjust = 0.5) #hjust shifts right
	)

	
	
size_point <- 0.3
size_hline <- 0.1
	
# --------- Plot growth copy changes -------------------------


colores_1 = c("#deebf7","#3182bd")

data_g <- melt(cor(RH_human_log_delta_g, use="p"))
data_g$Var2 <- factor(data_g$Var2, levels=rev(levels(data_g$Var2)))

p1 <-	ggplot(
			data=data_g, 
			aes(x = Var1, y = Var2)
			) +
		geom_tile(
			aes(fill = value)) + 
			scale_fill_gradient(name="R",low = colores_1[1],high = colores_1[2],limits=c(-1, 1),breaks=c(-1,0,1),labels=c(-1,0,1)
			) +
		coord_fixed() +
		theme2 +
		theme(
			plot.margin = unit(c(1.0,0.2,1.0,1.8), "cm"),
			panel.grid.major = element_blank(), 
			panel.grid.minor = element_blank(), 
			panel.background = element_blank(), 
			plot.background=element_blank(),
			axis.line.x = element_blank(),
			axis.line.y = element_blank(),
			panel.border=element_blank(),
			axis.ticks = element_blank(),
			axis.text.x=element_text(size=12, angle = 45, margin=margin(-1,0,0,0), vjust = 0.5, hjust = 0.5),
			legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=-0,b=0,l=-2,unit = "pt"),
		 	legend.key.height = unit(0.3, "cm"),
			legend.key.width = unit(0.15, "cm"),
		 	legend.spacing.y = unit(0.15, 'cm'),
		 	legend.spacing.x = unit(0.15, 'cm'),
		 	legend.title = element_text(size = 11, margin=margin(0,0,3,0)), 
		 	legend.text = element_text(size = 10),
		 	legend.title.align=0.0
			) +
		scale_x_discrete(breaks = c("RH1_w6_d8","RH2_w4_d8","RH3_w4_d8","RH4_w4_d8","RH5_w3_d8","RH6_w3_d8"), labels = c("RH1","RH2","RH3","RH4","RH5","RH6")) +
		scale_y_discrete(breaks = c("RH1_w6_d8","RH2_w4_d8","RH3_w4_d8","RH4_w4_d8","RH5_w3_d8","RH6_w3_d8"), labels = c("RH1","RH2","RH3","RH4","RH5","RH6")) +
		# ggtitle("") + 
		xlab(element_blank()) + 
		ylab(element_blank()) + 
		labs(subtitle="Growth")
print(p1)




# ---------- differences in log2 copy number as a result of paclitaxel ---------------

RH_human_log_delta_d <- RH_human_log[,grepl(glob2rx('RH1_w1_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH1_w1_d0
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH1_w2_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH1_w2_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH1_w3_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH1_w3_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH1_w4_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH1_w4_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH1_w6_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH1_w6_d0)


RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH2_w1_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH2_w1_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH2_w2_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH2_w2_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH2_w3_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH2_w3_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH2_w4_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH2_w4_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH2_w6_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH2_w6_d0)


RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH3_w1_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH3_w1_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH3_w2_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH3_w2_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH3_w3_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH3_w3_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH3_w4_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH3_w4_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH3_w6_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH3_w6_d0)


RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH4_w1_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH4_w1_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH4_w2_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH4_w2_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH4_w3_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH4_w3_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH4_w4_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH4_w4_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH4_w6_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH4_w6_d0)


RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH5_w1_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH5_w1_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH5_w2_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH5_w2_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH5_w3_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH5_w3_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH5_w4_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH5_w4_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH5_w6_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH5_w6_d0)


RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH6_w1_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH6_w1_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH6_w2_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH6_w2_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH6_w3_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH6_w3_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH6_w4_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH6_w4_d0)
RH_human_log_delta_d <- cbind(RH_human_log_delta_d,RH_human_log[,grepl(glob2rx('RH6_w6_d\\d+'),colnames(RH_human_log))]-RH_human_log$RH6_w6_d0)


# transform -Inf values into NA
is.na(RH_human_log_delta_d) <- do.call(cbind,lapply(RH_human_log_delta_d, is.infinite))

# remove columns with zeros only (because subtracted column from itself)
RH_human_log_delta_d <- RH_human_log_delta_d[,colSums(RH_human_log_delta_d,na.rm=TRUE)!=0]

# --------- Plot paclitaxel copy changes -------------------------

colores_1 = c("#deebf7","#3182bd")

data_d <- melt(cor(RH_human_log_delta_d, use="p"))
data_d$Var2 <- factor(data_d$Var2, levels=rev(levels(data_d$Var2)))

p2 <-	ggplot(
			data=data_d, 
			aes(x = Var1, y = Var2)
			) +
		geom_tile(
			aes(fill = value)) + 
			scale_fill_gradient(name="R",low = colores_1[1],high = colores_1[2],limits=c(-1, 1),breaks=c(-1,0,1),labels=c(-1,0,1)
			) +
		coord_fixed() +
		theme2 +
		theme(
			plot.margin = unit(c(1.0,0.2,1.0,1.8), "cm"),
			panel.grid.major = element_blank(), 
			panel.grid.minor = element_blank(), 
			panel.background = element_blank(), 
			plot.background=element_blank(),
			axis.line.x = element_blank(),
			axis.line.y = element_blank(),
			panel.border=element_blank(),
			axis.ticks = element_blank(),
			axis.text.x=element_text(size=12, angle = 45, margin=margin(-1,0,0,0), vjust = 0.5, hjust = 0.5),
			legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=-0,b=0,l=-2,unit = "pt"),
		 	legend.key.height = unit(0.3, "cm"),
			legend.key.width = unit(0.15, "cm"),
		 	legend.spacing.y = unit(0.15, 'cm'),
		 	legend.spacing.x = unit(0.15, 'cm'),
		 	legend.title = element_text(size = 11, margin=margin(0,0,3,0)), 
		 	legend.text = element_text(size = 10),
		 	legend.title.align=0.0
			) +
		scale_x_discrete(breaks = c("RH1_w3_d25","RH2_w3_d25","RH3_w3_d25","RH4_w3_d25","RH5_w3_d25","RH6_w3_d25"), labels = c("RH1","RH2","RH3","RH4","RH5","RH6")) +
		scale_y_discrete(breaks = c("RH1_w3_d25","RH2_w3_d25","RH3_w3_d25","RH4_w3_d25","RH5_w3_d25","RH6_w3_d25"), labels = c("RH1","RH2","RH3","RH4","RH5","RH6")) +
		# ggtitle("") + 
		xlab(element_blank()) + 
		ylab(element_blank()) +  
		labs(subtitle="Paclitaxel")
print(p2)


# ~~~~~~~~~~~~~~ PCA growth ~~~~~~~~~~~~~~~~~~~~~

pool_svd_g <- RH_human_log_delta_g[,5:ncol(RH_human_log_delta_g)]

# clean Inf values from RH_human_log_delta_g and replace with NA
is.na(pool_svd_g) <- do.call(cbind,lapply(pool_svd_g, is.infinite))


# # Replace NAs with row means, gives similar results to below
pool_svd_g <- as.matrix(pool_svd_g)
k <- which(is.na(pool_svd_g), arr.ind=TRUE)
pool_svd_g[k] <- rowMeans(pool_svd_g, na.rm=TRUE)[k[,1]]
pool_svd_g <- as.data.frame(pool_svd_g)

# # Remove ~4% rows that contain all NAs, and which cannot therefore be replaced by rowMeans
pool_svd_g <- na.omit(pool_svd_g)


# # or replace NAs with 0. Gives similar results
# pool_svd_g[is.na(pool_svd_g)] <- 0

# # or replace NAs with global means, gives similar results.
# pool_svd_g <- as.matrix(pool_svd_g)
# pool_svd_g[is.na(pool_svd_g)] <- mean(pool_svd_g, na.rm=TRUE)
# pool_svd_g <- as.data.frame(pool_svd_g)

# svd_g on mean centered (and preferably scaled) data is equivalent to PCA cf 
# http://genomicsclass.github.io/book/pages/pca_svd_g.html
# https://stats.stackexchange.com/questions/134282/relationship-between-svd-and-pca-how-to-use-svd-to-perform-pca
# svd() gives similar results to prcomp(), but with different scaling

standev_g <- apply(pool_svd_g,1,sd)
svd_g = svd((pool_svd_g - rowMeans(pool_svd_g))/standev_g)


# % var due to PC1
svd_g$d[1]^2/sum(svd_g$d^2)
# [1] 0.1573744 <<<<<<<< use in paper

# % var due to PC2
svd_g$d[2]^2/sum(svd_g$d^2)
# [1] 0.1481887 <<<<<<<< use in paper

svd_g_data = as.data.frame(svd_g$v)
svd_g_data <- svd_g_data[,1:2]
colnames(svd_g_data) <- c("PC1","PC2")
svd_g_data$RH_pool <- colnames(pool_svd_g)

svd_g_data[grep("^RH1", svd_g_data$RH_pool),"RH_pool"] <- "RH1"
svd_g_data[grep("^RH2", svd_g_data$RH_pool),"RH_pool"] <- "RH2"
svd_g_data[grep("^RH3", svd_g_data$RH_pool),"RH_pool"] <- "RH3"
svd_g_data[grep("^RH4", svd_g_data$RH_pool),"RH_pool"] <- "RH4"
svd_g_data[grep("^RH5", svd_g_data$RH_pool),"RH_pool"] <- "RH5"
svd_g_data[grep("^RH6", svd_g_data$RH_pool),"RH_pool"] <- "RH6"


n = length(unique(svd_g_data$RH_pool))
colores_1 = gg_color_hue(n)

		

p3 <- ggplot() + 
		theme2 + 
		theme(legend.key=element_blank()) +
		geom_point(
			shape=1,
			stroke=0.4,
			data= svd_g_data, 
			size=1.0,
			aes(
				x=PC1, 
				y=PC2, 
				colour=RH_pool)
				) + 
		scale_color_manual(
			values=colores_1,
			name ="Pool", 
			labels=c(1,2,3,4,5,6)
			) +
 		guides(
 			colour = guide_legend(override.aes = list(fill=NA,shape=1,size=1),ncol=1,byrow=TRUE)
 			) +
	 	theme(
		 	plot.margin = unit(c(1.0,0.5,1.0,1.0), "cm"),
	 		legend.position = "right", 
	 		legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=-0,b=0,l=0,unit = "pt"),
		 	legend.key.height = unit(0.25, "cm"),
			legend.key.width = unit(0.1, "cm"),
		 	legend.spacing.y = unit(0.07, 'cm'),
		 	legend.spacing.x = unit(0.07, 'cm'),
		 	legend.title = element_text(size = 11), 
		 	legend.text = element_text(size = 10),
		 	legend.title.align=0.0,
			# axis.title=element_text(size=9), #titles of x and y axes
			axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
			axis.title.x=element_text(margin=margin(10,0,0,0))  #moves x axis title by adding margin space to top
	 		) +
		# scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) +
		# ggtitle("") + 
		xlab("PC1") + 
		ylab(expression("PC2")) +
		labs(subtitle="Growth PCA")
print(p3)


# ~~~~~~~~~~~~~~ PCA paclitaxel ~~~~~~~~~~~~~~~~~~~~~

pool_svd_d <- RH_human_log_delta_d[,5:ncol(RH_human_log_delta_d)]

# clean Inf values from RH_human_log_delta_d and replace with NA
is.na(pool_svd_d) <- do.call(cbind,lapply(pool_svd_d, is.infinite))


# # Replace NAs with row means, gives similar results to below
pool_svd_d <- as.matrix(pool_svd_d)
k <- which(is.na(pool_svd_d), arr.ind=TRUE)
pool_svd_d[k] <- rowMeans(pool_svd_d, na.rm=TRUE)[k[,1]]
pool_svd_d <- as.data.frame(pool_svd_d)

# # Remove ~4% rows that contain all NAs, and which cannot therefore be replaced by rowMeans
pool_svd_d <- na.omit(pool_svd_d)


# # or replace NAs with 0. Gives similar results
# pool_svd_d[is.na(pool_svd_d)] <- 0

# # or replace NAs with global means, gives similar results.
# pool_svd_d <- as.matrix(pool_svd_d)
# pool_svd_d[is.na(pool_svd_d)] <- mean(pool_svd_d, na.rm=TRUE)
# pool_svd_d <- as.data.frame(pool_svd_d)

# svd_g on mean centered (and preferably scaled) data is equivalent to PCA cf 
# http://genomicsclass.github.io/book/pages/pca_svd_g.html
# https://stats.stackexchange.com/questions/134282/relationship-between-svd-and-pca-how-to-use-svd-to-perform-pca
# svd() gives similar results to prcomp(), but with different scaling

standev_d <- apply(pool_svd_d,1,sd)
svd_d = svd((pool_svd_d - rowMeans(pool_svd_d))/standev_d)

# % var due to PC1
svd_d$d[1]^2/sum(svd_d$d^2)
# [1] 0.23016 <<<<<<<<<<<< use in paper

# % var due to PC2
svd_d$d[2]^2/sum(svd_d$d^2)
# [1] 0.1466023 <<<<<<<<<<<< use in paper

svd_d_data = as.data.frame(svd_d$v)
svd_d_data <- svd_d_data[,1:2]
colnames(svd_d_data) <- c("PC1","PC2")
svd_d_data$RH_pool <- colnames(pool_svd_d)

svd_d_data[grep("^RH1", svd_d_data$RH_pool),"RH_pool"] <- "RH1"
svd_d_data[grep("^RH2", svd_d_data$RH_pool),"RH_pool"] <- "RH2"
svd_d_data[grep("^RH3", svd_d_data$RH_pool),"RH_pool"] <- "RH3"
svd_d_data[grep("^RH4", svd_d_data$RH_pool),"RH_pool"] <- "RH4"
svd_d_data[grep("^RH5", svd_d_data$RH_pool),"RH_pool"] <- "RH5"
svd_d_data[grep("^RH6", svd_d_data$RH_pool),"RH_pool"] <- "RH6"


n = length(unique(svd_d_data$RH_pool))
colores_1 = gg_color_hue(n)

		

p4 <- ggplot() + 
		theme2 + 
		theme(legend.key=element_blank()) +
		geom_point(
			shape=1,
			stroke=0.4,
			data= svd_d_data, 
			size=1.0,
			aes(
				x=PC1, 
				y=PC2, 
				colour=RH_pool)
				) + 
		scale_color_manual(
			values=colores_1,
			name ="Pool", 
			labels=c(1,2,3,4,5,6)
			) +
 		guides(
 			colour = guide_legend(override.aes = list(fill=NA,shape=1,size=1),ncol=1,byrow=TRUE)
 			) +
	 	theme(
		 	plot.margin = unit(c(1.0,0.5,1.0,1.0), "cm"),
	 		legend.position = "right", 
	 		legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=-0,b=0,l=0,unit = "pt"),
		 	legend.key.height = unit(0.25, "cm"),
			legend.key.width = unit(0.1, "cm"),
		 	legend.spacing.y = unit(0.07, 'cm'),
		 	legend.spacing.x = unit(0.07, 'cm'),
		 	legend.title = element_text(size = 11), 
		 	legend.text = element_text(size = 10),
		 	legend.title.align=0.0,
			# axis.title=element_text(size=9), #titles of x and y axes
			axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
			axis.title.x=element_text(margin=margin(10,0,0,0))  #moves x axis title by adding margin space to top
	 		) +
		# scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) +
		# ggtitle("") + 
		xlab("PC1") + 
		ylab(expression("PC2")) +
		labs(subtitle="Paclitaxel PCA")
print(p4)



# ------------- Make file --------------------------


pdf("RH_batch_1.pdf",width=7.5,height= 6.67,useDingbats=FALSE)
plot_grid(p1, p2, p3, p4, labels=c("A", "B", "C", "D"), ncol = 2, nrow = 2, label_size = 16)
dev.off()


























