# Chromosome level Manhattan plot for growth. Plot of copy number changes

#install.packages("ggplot2")
library(ggplot2)
library(cowplot) #used with plot_grid 

# for summarySE()
library(Rmisc)
library(mgcv)

library(pdftools)
library(magick)



#----------------Aesthetics ---------------------------


theme2 <- theme(
	plot.margin = unit(c(t=1.2,r=0.4,b=1.2,l=0.4), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(), 
	legend.position="none", 
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=12), #numbers on tick marks of x and y axes
	axis.title=element_text(size=14), #titles of x and y axes
	axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
	axis.title.x=element_text(margin=margin(10,0,0,0)),  #moves x axis title by adding margin space to top
	plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=14, face="plain", hjust = 0.5) #hjust shifts right
	)




# size_point <- 0.3
size_hline <- 0.2


# darkest two hues from 3-class PuBuGn in color brewer
# cb1<-rep(c("#1c9099", "#a6bddb"), 12)

# # darkest two hues from 3-class PuBu in color brewer
# cb1<-rep(c("#2b8cbe", "#a6bddb"), 12)


# #attractive pinks, greys
# cb1<-c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#E69F00", "#56B4E9", "#E69F00", "#009E73", "#F0E442", "#0072B2", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#D55E00", "#CC79A7")

# cb1_rev <- c("#CC79A7", "#D55E00", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#D55E00", "#0072B2", "#D55E00", "#F0E442", "#009E73", "#56B4E9", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#E69F00","#999999")

# #'4-class RdBu'
# cb2 <- c('#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#f4a582','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#92c5de','#0571b0')

# #'4-class RdYlBu'
# cb3 <- c('#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#fdae61','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#abd9e9','#2c7bb6')
	
	




#----------------- Prepare human logP ---------------------


logP <- read.table("log10P_human.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)
human_thresh_95 <- read.table("human_thresh_95.txt",header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names=1,col.names=c("","thresh"))



# Sort:
chrOrder<-paste("chr",c(1:22,"X"),sep="")
logP$Chromosome <- factor(logP$Chromosome, levels=chrOrder)
logP <- logP[order(logP$Chromosome, logP$pos), ]
logP$Chromosome <- as.character(logP$Chromosome)



# # Transform chr1 etc. to numbers
# logP$Chromosome <- gsub('chr', '', logP$Chromosome)
# logP[logP$Chromosome == "X","Chromosome"] <- 23
# chrOrder<-c(1:23)
# logP$Chromosome <- factor(logP$Chromosome, levels=chrOrder)
# logP <- logP[order(logP$Chromosome, logP$pos), ]
# logP$Chromosome <- as.numeric(logP$Chromosome)

# # Compute chromosome size
# gen_coord <- aggregate(pos~Chromosome,FUN=max,data=logP)
# colnames(gen_coord)[2] <- "chr_size"
# gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
# gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
# gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# # Use cumsum to make genome coordinates
# gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-23])

# # merge genome coordinates with logP
# logP <- merge(logP,gen_coord[,c("Chromosome","coord")])
# logP$Chromosome <-factor(logP$Chromosome, levels=chrOrder)
# logP <- logP[order(logP$Chromosome, logP$pos), ]
# logP$Chromosome <- as.numeric(logP$Chromosome)

# logP$coord <- logP$pos + logP$coord


# # find midpoints of chromosomes for breaks in ggplot
# mid <- function(x) {(max(x)+min(x))/2}
# chr_mid <- aggregate(coord~Chromosome,FUN = mid,data=logP)
# colnames(chr_mid)[2] <- "mid"
# chr_mid$Chromosome <-factor(chr_mid$Chromosome, levels=chrOrder)
# chr_mid <- chr_mid[order(chr_mid$Chromosome), ]
# chr_mid$Chromosome <- as.numeric(chr_mid$Chromosome)

# # Define breaks as mid-points chromosomes
# breaks <- chr_mid$mid


# # attractive grey and skyblue color scheme
# cb1<-rep(c("grey", "skyblue"), 12)


# standard black color scheme
cb1 <- rep(c("black", "black"), 12)



# labels <- as.character(c(1:9,"",11,"",13,"","",16,"","","",20,"","","X"))


# --------- (1) chr3, wk3 ----------------------------

# choose chr number
i <- 3


# id points by hand

d <- read.delim("paclitaxel_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)

# # id points by hand
# plot(logP[logP=="chr3","pos"], logP[logP=="chr3","log10p_d_w3"], cex=0.05)
# abline(h= human_thresh_95["log10p_d_w3",], col="red", lwd=0.1)
# points_p1 <- identify(logP[logP=="chr3","pos"], logP[logP=="chr3","log10p_d_w3"], cex=0.5)

# points_p1
# # [1]  2752 11647


# START HERE if do not wish to do hand picking again
points_p1 <- c(2752, 11647)


labels_p1 <- merge(logP[logP$Chromosome=="chr3",][points_p1,c("Chromosome","pos")],d[,c("Chromosome","pos","wk","log10P","geneSymbol")])
labels_p1 <- labels_p1[labels_p1$wk==3,]
labels_p1 <- labels_p1[order(labels_p1$pos),]
labels_p1$nudge_x <- 0
labels_p1$nudge_y <- 0.6
labels_p1[labels_p1$geneSymbol=="NEK10","nudge_x"] <- 2.2e-16 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977
labels_p1[labels_p1$geneSymbol=="LSAMP","nudge_x"] <- 0




balloon_scale <- 0.5 # inflation factor for significant points	
size_point <- 0.1*(1 + balloon_scale*(logP[logP$Chromosome==paste0("chr",i),"log10p_d_w3"]/max(logP[logP$Chromosome==paste0("chr",i),"log10p_d_w3"]))) # scale significant points



p1 <- ggplot() + 
		geom_point(
			data = logP[logP$Chromosome==paste0("chr",i),], 
			size= size_point,
			stroke=0, 
				aes(
					x = pos/1e6, 
					y = log10p_d_w3, 
					color="as.factor(Chromosome)"
					)
				) +
		geom_text(data = labels_p1, aes(x = pos/1e6, y = log10P,label=geneSymbol, fontface = "italic"), nudge_x=labels_p1$nudge_x, nudge_y=labels_p1$nudge_y,  colour = "black", size = 2.5) + # nudge_x and nudge_y gives warning, but seems to work
		# geom_text( aes(x = 18990000/1e6, y = 44.74028), label="IFFO2", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 51710000/1e6, y = 23.24700), label="OSBPL9", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 103260000/1e6, y = 54.41426), label="COL11A1", colour = "black", size = 3, nudge_x=12, nudge_y=2) +
		# geom_text( aes(x = 159590000/1e6, y = 20.71389), label="APCS", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 225540000/1e6, y = 18.84761), label="ENAH", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		scale_color_manual(values=cb1) +
		theme2 +
		# scale_x_continuous(breaks = breaks, labels = labels) +
		scale_x_continuous() +
		scale_y_continuous(breaks = c(0, 5, 10), labels = c("0", "5", "10"), limits=c(0,10)) +
		xlab(paste0("Chromosome ", i, " (Mb)")) + 
		ylab(expression('-log'[10]*italic('P'))) +
		geom_hline(yintercept= human_thresh_95["log10p_d_w3",], linetype="solid", color = "red", size=size_hline) +
		labs(subtitle="Paclitaxel (wk 3)") #+
		#scale_y_continuous(breaks=seq(0,120,20),limit = c(0, 120))
print(p1)


# --------- (2) chr7, wk4 ----------------------------

# choose chr number
i <- 7


# id points by hand

d <- read.delim("paclitaxel_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)

# # id points by hand
# plot(logP[logP=="chr7","pos"], logP[logP=="chr7","log10p_d_w4"], cex=0.05)
# abline(h= human_thresh_95["log10p_d_w4",], col="red", lwd=0.1)
# points_p2 <- identify(logP[logP=="chr7","pos"], logP[logP=="chr7","log10p_d_w4"], cex=0.5)

# points_p2
# # [1]   457 12454

# START HERE if do not wish to do hand picking again
points_p2 <- c(457,12454)


labels_p2 <- merge(logP[logP$Chromosome=="chr7",][points_p2,c("Chromosome","pos")],d[,c("Chromosome","pos","wk","log10P","geneSymbol")])
labels_p2 <- labels_p2[labels_p2$wk==4,]
labels_p2 <- labels_p2[order(labels_p2$pos),]
labels_p2$nudge_x <- 0
labels_p2$nudge_y <- 1.5
labels_p2[labels_p2$geneSymbol=="SDK1","nudge_x"] <- 2.2e-16 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977
labels_p2[labels_p2$geneSymbol=="AC006148.1","nudge_x"] <- 0





balloon_scale <- 0.5 # inflation factor for significant points	
size_point <- 0.1*(1 + balloon_scale*(logP[logP$Chromosome==paste0("chr",i),"log10p_d_w4"]/max(logP[logP$Chromosome==paste0("chr",i),"log10p_d_w4"]))) # scale significant points



p2 <- ggplot() + 
		geom_point(
			data = logP[logP$Chromosome==paste0("chr",i),], 
			size= size_point,
			stroke=0, 
				aes(
					x = pos/1e6, 
					y = log10p_d_w4, 
					color="as.factor(Chromosome)"
					)
				) +
		geom_text(data = labels_p2, aes(x = pos/1e6, y = log10P,label=geneSymbol, fontface = "italic"), nudge_x=labels_p2$nudge_x, nudge_y=labels_p2$nudge_y,  colour = "black", size = 2.5) + # nudge_x and nudge_y gives warning, but seems to work. Subset labels_p2 to ignore AC074389.2.
		# geom_text( aes(x = 18990000/1e6, y = 44.74028), label="IFFO2", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 51710000/1e6, y = 23.24700), label="OSBPL9", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 103260000/1e6, y = 54.41426), label="COL11A1", colour = "black", size = 3, nudge_x=12, nudge_y=2) +
		# geom_text( aes(x = 159590000/1e6, y = 20.71389), label="APCS", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 225540000/1e6, y = 18.84761), label="ENAH", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		scale_color_manual(values=cb1) +
		theme2 +
		# scale_x_continuous(breaks = breaks, labels = labels) +
		scale_x_continuous() +
		xlab(paste0("Chromosome ", i, " (Mb)")) + 
		ylab(expression('-log'[10]*italic('P'))) +
		geom_hline(yintercept= human_thresh_95["log10p_d_w4",], linetype="solid", color = "red", size=size_hline) +
		labs(subtitle="Paclitaxel (wk 4)") #+
		#scale_y_continuous(breaks=seq(0,120,20),limit = c(0, 120))
print(p2)





# --------- (3) chr19, wk 6 ----------------------------


# choose chr number
i <- 19


# id points by hand

d <- read.delim("paclitaxel_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)

# # id points by hand
# plot(logP[logP=="chr19","pos"], logP[logP=="chr19","log10p_d_w6"], cex=0.05)
# abline(h= human_thresh_95["log10p_d_w6",], col="red", lwd=0.1)
# points_p3 <- identify(logP[logP=="chr19","pos"], logP[logP=="chr19","log10p_d_w6"], cex=0.5)

# points_p3
# # [1] 1996 5888

# START HERE if do not wish to do hand picking again
points_p3 <- c(1996, 5888)


labels_p3 <- merge(logP[logP$Chromosome=="chr19",][points_p3,c("Chromosome","pos")],d[,c("Chromosome","pos","wk","log10P","geneSymbol")])
labels_p3 <- labels_p3[labels_p3$wk==6,]
labels_p3 <- labels_p3[order(labels_p3$pos),]
labels_p3$nudge_x <- 0
labels_p3$nudge_y <- 1.5
labels_p3[labels_p3$geneSymbol=="GATAD2A","nudge_x"] <- 2.2e-16 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977
labels_p3[labels_p3$geneSymbol=="AC020915.5","nudge_x"] <- -4





balloon_scale <- 0.5 # inflation factor for significant points	
size_point <- 0.1*(1 + balloon_scale*(logP[logP$Chromosome==paste0("chr",i),"log10p_d_w6"]/max(logP[logP$Chromosome==paste0("chr",i),"log10p_d_w6"]))) # scale significant points


# omit AC074389.2 and AC073332.1 labels using labels_p3[-c(1,3),], because too cluttered.


p3 <- ggplot() + 
		geom_point(
			data = logP[logP$Chromosome==paste0("chr",i),], 
			size= size_point,
			stroke=0, 
				aes(
					x = pos/1e6, 
					y = log10p_d_w6, 
					color="as.factor(Chromosome)"
					)
				) +
		geom_text(data = labels_p3, aes(x = pos/1e6, y = log10P,label=geneSymbol, fontface = "italic"), nudge_x=labels_p3$nudge_x, nudge_y=labels_p3$nudge_y,  colour = "black", size = 2.5) + 
		# geom_text( aes(x = 18990000/1e6, y = 44.74028), label="IFFO2", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 51710000/1e6, y = 23.24700), label="OSBPL9", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 103260000/1e6, y = 54.41426), label="COL11A1", colour = "black", size = 3, nudge_x=12, nudge_y=2) +
		# geom_text( aes(x = 159590000/1e6, y = 20.71389), label="APCS", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 225540000/1e6, y = 18.84761), label="ENAH", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		scale_color_manual(values=cb1) +
		theme2 +
		# scale_x_continuous(breaks = breaks, labels = labels) +
		scale_x_continuous() +
		xlab(paste0("Chromosome ", i, " (Mb)")) + 
		ylab(expression('-log'[10]*italic('P'))) +
		geom_hline(yintercept= human_thresh_95["log10p_d_w6",], linetype="solid", color = "red", size=size_hline) +
		labs(subtitle="Paclitaxel (wk 6)") #+
		#scale_y_continuous(breaks=seq(0,120,20),limit = c(0, 120))
print(p3)





# ------------- (4) line plot copy number change wk6 -----------------------

# cf plot_nb_graph_1.R


gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}



d_unique <- read.delim("paclitaxel_loci_unique.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)
g_unique <- read.delim("growth_loci_unique.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)

d <- read.delim("paclitaxel_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)
g <- read.delim("growth_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)
Ix <- read.delim("Ix_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)


# copy number data
RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Read in and prepare ancillary tables 

cell <- read.table("cell_label_info.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)
sum_reads <- colSums(RH_human[,5:ncol(RH_human)])
reads <- read.table("RH_pool_human_total_align.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) # uses mapped human reads, cf human_AIC_1.R


# Selected following genes:


d_2wk <- d[d$wk == 2,]
d_3wk <- d[d$wk == 3,]
d_4wk <- d[d$wk == 4,]
d_6wk <- d[d$wk == 6,]
d_avg <- d[d$wk == "avg",]





# select paclitaxel genes at wk 3. No genes overlap with growth or Ix genes. Sort of (kinda) nicely spaced.
d_3wk[order(-d_3wk$coef_d_w3),c("Chromosome","pos","wk","log10P","geneSymbol","dist","gene_type","coef_d_w3")][c(2,4,18,21,23,25),]
   # Chromosome       pos wk   log10P geneSymbol dist      gene_type    coef_d_w3
# 47      chr10  21790000  3 6.947158     DNAJC1    0 protein_coding  0.014248712
# 46       chr9 133350000  3 6.124421      RPL7A    0 protein_coding  0.012001199
# 35       chr7   4090000  3 8.440885       SDK1    0 protein_coding -0.009755609
# 30       chr6  90000000  3 7.467754      BACH2    0 protein_coding -0.010319905
# 73      chr15  42710000  3 6.144737     STARD9    0 protein_coding -0.011594913
# 85      chr17   4080000  3 6.745259      ZZEF1    0 protein_coding -0.014202923

# above is equivalent to
# d_3wk[order(-d_3wk$coef_d_w3),][d_3wk[order(-d_3wk$coef_d_w3),c("gene_type")]=="protein_coding",][,c("Chromosome","pos","wk","log10P","geneSymbol","dist","gene_type","coef_d_w3")][c(2,3,7,9,10,11),]



intersect(g_unique$geneSymbol,d_unique$geneSymbol)
# [1] "CEN"        "LSAMP"      "KHDRBS2"    "AC074389.2" "SEMA3D"     "TBC1D12"    "AK6P1"      "RN7SL584P"  "GATAD2A"  

intersect(Ix$geneSymbol, c(g_unique$geneSymbol,d_unique$geneSymbol))
 # [1] "SLC2A5"     "NBPF8"      "PDE4DIP"    "ALK"        "SLC44A4"    "KHDRBS2"    "AC074389.2" "RNF216"     "SEMA3D"     "WASL"       "SLC24A2"    "CEN"       
# [13] "AK6P1"      "RNU6-54P"   "AC092078.2" "CDH13"      "GATAD2A"    "RF00568"       

genes <- merge(d_3wk[order(-d_3wk$coef_d_w3),c("Chromosome","pos","wk","log10P","geneSymbol","dist","gene_type","coef_d_w3")][c(2,4,18,21,23,25),],RH_human)





# Prepare for mgcv::gam

genes_l <- reshape(genes[,c(5,11:ncol(genes))], 
  varying = c(colnames(genes[c(11:ncol(genes))])), 
  v.names = "copy",
  timevar = "RH_ID", 
  times = c(colnames(genes[c(11:ncol(genes))])), 
  new.row.names = 1:1e6,
  direction = "long")
  
genes_l$week <- 0
genes_l[grepl("_w0_",genes_l$RH_ID),]$week <- 0
genes_l[grepl("_w1_",genes_l$RH_ID),]$week <- 1
genes_l[grepl("_w2_",genes_l$RH_ID),]$week <- 2
genes_l[grepl("_w3_",genes_l$RH_ID),]$week <- 3
genes_l[grepl("_w4_",genes_l$RH_ID),]$week <- 4
genes_l[grepl("_w6_",genes_l$RH_ID),]$week <- 6

genes_l$conc <- 0
genes_l[grepl("_d0",genes_l$RH_ID),]$conc <- 0
genes_l[grepl("_d8",genes_l$RH_ID),]$conc <- 8
genes_l[grepl("_d25",genes_l$RH_ID),]$conc <- 25
genes_l[grepl("_d75",genes_l$RH_ID),]$conc <- 75

genes_l$pool <- 0
genes_l[grepl("RH1_",genes_l$RH_ID),]$pool <- 1
genes_l[grepl("RH2_",genes_l$RH_ID),]$pool <- 2
genes_l[grepl("RH3_",genes_l$RH_ID),]$pool <- 3
genes_l[grepl("RH4_",genes_l$RH_ID),]$pool <- 4
genes_l[grepl("RH5_",genes_l$RH_ID),]$pool <- 5
genes_l[grepl("RH6_",genes_l$RH_ID),]$pool <- 6



genes_l <- merge(genes_l,cell)
genes_l$sum_reads <- sum_reads[genes_l$RH_ID]
genes_l  <- merge(genes_l,reads[,c(1:5,9)])
colnames(genes_l)[10] <- "total_reads"


genes_l$pool <- as.factor(genes_l$pool)
genes_l$cell <- as.factor(genes_l$cell)
gene_levels <- d_3wk[order(-d_3wk$coef_d_w3),c("Chromosome","pos","wk","log10P","geneSymbol","dist","gene_type","coef_d_w3")][c(2,4,18,21,23,25),"geneSymbol"]
genes_l$geneSymbol <- factor(genes_l$geneSymbol,levels=gene_levels)



head(genes_l)
  # week conc pool     RH_ID cell geneSymbol copy id sum_reads total_reads
# 1    0    0    1 RH1_w0_d0    1     DNAJC1  101  1  29018449      298433
# 2    0    0    1 RH1_w0_d0    1     STARD9   86  2  29018449      298433
# 3    0    0    1 RH1_w0_d0    1      ZZEF1  144  3  29018449      298433
# 4    0    0    1 RH1_w0_d0    1      BACH2  121  4  29018449      298433
# 5    0    0    1 RH1_w0_d0    1       SDK1   80  5  29018449      298433
# 6    0    0    1 RH1_w0_d0    1      RPL7A   85  6  29018449      298433





genes_l$phat = as.numeric(NA)
genes_l$phat_se = as.numeric(NA)
genes_l$phat_center = as.numeric(NA)
genes_l$log_copy = as.numeric(NA)

for (i in c(1:length(unique(genes_l$id)))) {
m1_nb <- gam(copy ~ week * conc + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(total_reads)), data = genes_l[genes_l$id == i,], family = nb, method = "REML")

genes_l[genes_l$id == i,]$phat <- predict(m1_nb, se.fit=TRUE,type="link")$fit
genes_l[genes_l$id == i,]$phat_se <- predict(m1_nb, se.fit=TRUE,type="link")$se.fit

# dedicated centers at each level of week for paclitaxel, since each line originates from a different week (1,2,3,4,6)
for (j in unique(genes_l$week)) {
genes_l[genes_l$id == i & genes_l$week == j,]$phat_center <- genes_l[genes_l$id == i & genes_l$week == j,"phat"] - mean(genes_l[genes_l$id == i & genes_l$week == j & genes_l$conc == 0,"phat"])
genes_l[genes_l$id == i & genes_l$week == j,]$log_copy <- log(genes_l[genes_l$id == i & genes_l$week == j,]$copy) - mean(log(genes_l[genes_l$id == i & genes_l$week == j & genes_l$conc == 0,"copy"]))
	}

}


head(genes_l)
  # week conc pool     RH_ID cell geneSymbol copy id sum_reads total_reads     phat   phat_se phat_center   log_copy
# 1    0    0    1 RH1_w0_d0    1     DNAJC1  101  1  29018449      298433 4.536869 0.2850121 -0.82356566 -0.7512202
# 2    0    0    1 RH1_w0_d0    1     STARD9   86  2  29018449      298433 5.064757 0.2463605  0.11991586 -0.3915277
# 3    0    0    1 RH1_w0_d0    1      ZZEF1  144  3  29018449      298433 4.917039 0.2909496 -0.69338675 -0.6460108
# 4    0    0    1 RH1_w0_d0    1      BACH2  121  4  29018449      298433 5.334604 0.1908845 -0.08926342 -0.5972130
# 5    0    0    1 RH1_w0_d0    1       SDK1   80  5  29018449      298433 4.786515 0.1304580 -0.43733458 -0.8345611
# 6    0    0    1 RH1_w0_d0    1      RPL7A   85  6  29018449      298433 4.181455 0.2538130 -1.89001083 -1.5102718




n = length(unique(genes_l$geneSymbol))
colores_1 = gg_color_hue(n)
names(colores_1) <- gene_levels


# chose jitter of zero, as made for a tidier plot
jitter_factor <- 0
jitter <- data.frame(conc=rep(unique(genes_l$conc),n),geneSymbol=rep(gene_levels,each=length(unique(genes_l$conc))),jitter=jitter(rep(unique(genes_l$conc),n),jitter_factor))
genes_l <- merge(jitter,genes_l)


# Have to redo factor levels of genes_l$geneSymbol after merge, for some reason
genes_l$geneSymbol <- factor(genes_l$geneSymbol,levels=gene_levels)


# provide summary table if wish to adjust CI to sem using group number, N
summary_genes <- summarySE(genes_l, measurevar="phat_center", groupvars=c("week","geneSymbol","conc"))








# use lm to provide line and CI

p4 <- ggplot() + 
 	theme2 + 
	theme(legend.key=element_blank()) +
	geom_point(
		data=genes_l[genes_l$week==3,],
		shape=1,
		stroke=0.2,
		size=1.0,
		aes(
			x=jitter, 
			y=log2(exp(1))*log_copy, 
			colour=geneSymbol
			)
		) +
    geom_smooth(
	    data=genes_l[genes_l$week==3,], 
	    method = "lm",  
	    	formula=	 y~0+x,
	    aes(
		    	y=log2(exp(1))*phat_center,
		    	x=conc,
		    group=geneSymbol,
		    colour=geneSymbol#,
		    # fill=geneSymbol
		    ),
	    se=TRUE,
	    level=0.95,
	    size=0.3,
	    fill="grey",
	    alpha=0.1
	    ) +
	scale_color_manual(
		values=colores_1,
		name =NULL, 
		labels=levels(genes_l[genes_l$week==3,"geneSymbol"])
		) +
	guides(
		shape=FALSE,
		fill=FALSE,
 		colour = guide_legend(
			 		override.aes = list(
			 		fill=NA,
			 		shape=NA,
			 		size=0.3
			 		),
		 		ncol=1,
		 		byrow=TRUE
		 		)
 		) +
	theme(
		legend.position = "right", 
 		legend.title = element_text(size = 9), 
 		legend.text = element_text(size = 8, face = "italic"),
 		legend.title.align=0.2,
 		legend.key.height = unit(0.1, 'lines'), 
 		legend.margin=margin(t = 0, r = -0.2, b = 0, l = -0.4, unit = "cm")
 		) +
	scale_x_continuous(breaks = c(0,8,25,75), labels = c(0,8,25,75)) +
	# ggtitle("") + 
	xlab("Paclitaxel (nM)") + 
	ylab(expression(Delta*log[2]~(Reads))) + 
	labs(subtitle="Paclitaxel (wk 3)")
print(p4)






# --------------- Provide wk2 paclitaxel to UCSC genome browser ---------------------



# ############## DO NOT DELETE #######################
# Important if want to reconstruct wk 2 paclitaxel data for UCSC genome browser


# d_w2 <- logP[,c("Chromosome","pos","log10p_d_w2")]
# d_w2$posS <- d_w2$pos + 1
# d_w2 <- d_w2[,c("Chromosome","pos","posS","log10p_d_w2")]
# d_w2$pos <- format(d_w2$pos,scientific=FALSE)
# d_w2$posS <- format(d_w2$posS,scientific=FALSE)


# head(d_w2)
  # # Chromosome       pos      posS  log10p_d_w2
# # 1       chr1      5000      5001 4.117471e-09
# # 2       chr1     10000     10001 8.683001e-01
# # 3       chr1     15000     15001 4.698029e-01
# # 4       chr1     20000     20001 5.136159e-01
# # 5       chr1     25000     25001 4.014862e-01
# # 6       chr1     30000     30001 5.275144e-01

# write.table(d_w2, "paclitaxel_wk2_log10P.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)


# # # Place following header at top of paclitaxel_logP_wk2.txt and use in bedGraph format on ucsc genome browser
# # track type=bedGraph name="-log10P" description="use name of gene zoomed in on" visibility=full color=0,0,255 altColor=255,0,0 priority=20

# # Custom track settings
# # Display mode: full
# # Type of graph: points
# # Track height: 128 pixels
# # Data view scaling: auto-scale to data view
# # Always include zero: ON
# # Vertical viewing range:  min: 0; max: 1000  (range: 0 to 1000) (greyed out)
# # Transform function: Transform data points by: NONE
# # Windowing function: mean
# # Smoothing window: OFF
# # Negate values: not selected
# # Draw y indicator lines: 
# # at y = 0.0: ON at y = 3.69993734812617 ON (corresponds to human_thresh_95.txt, for log10p_d_w2)


# # Configure Image page on ucsc genome browser:
# # image width:	400	pixels
# # label area width:	10	characters	
# # text size: 12


# # Remember to get rid of splicoforms and nc genes by clicking GENCODE v31 bar on left of diagram, if desired.




# --------------- Provide wk3 paclitaxel to UCSC genome browser ---------------------



# ############## DO NOT DELETE #######################
# Important if want to reconstruct wk 3 paclitaxel data for UCSC genome browser


# d_w3 <- logP[,c("Chromosome","pos","log10p_d_w3")]
# d_w3$posS <- d_w3$pos + 1
# d_w3 <- d_w3[,c("Chromosome","pos","posS","log10p_d_w3")]
# d_w3$pos <- format(d_w3$pos,scientific=FALSE)
# d_w3$posS <- format(d_w3$posS,scientific=FALSE)


# head(d_w3)
  # # Chromosome       pos      posS  log10p_d_w3
# # 1       chr1      5000      5001 6.092092e-09
# # 2       chr1     10000     10001 7.656705e-01
# # 3       chr1     15000     15001 3.717152e-01
# # 4       chr1     20000     20001 5.098122e-01
# # 5       chr1     25000     25001 4.860051e-01
# # 6       chr1     30000     30001 8.003008e-01

# write.table(d_w3, "paclitaxel_wk3_log10P.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)


# # # Place following header at top of paclitaxel_logP_wk3.txt and use in bedGraph format on ucsc genome browser
# # track type=bedGraph name="-log10P" description="use name of gene zoomed in on" visibility=full color=0,0,255 altColor=255,0,0 priority=20

# # Custom track settings
# # Display mode: full
# # Type of graph: points
# # Track height: 128 pixels
# # Data view scaling: auto-scale to data view
# # Always include zero: ON
# # Vertical viewing range:  min: 0; max: 1000  (range: 0 to 1000) (greyed out)
# # Transform function: Transform data points by: NONE
# # Windowing function: mean
# # Smoothing window: OFF
# # Negate values: not selected
# # Draw y indicator lines: 
# # at y = 0.0: ON at y = 6.06093189680259 ON (corresponds to human_thresh_95.txt, for log10p_d_w3)


# # Configure Image page on ucsc genome browser:
# # image width:	400	pixels
# # label area width:	10	characters	
# # text size: 12


# # Remember to get rid of splicoforms and nc genes by clicking GENCODE v31 bar on left of diagram, if desired.




# --------------- Provide wk4 paclitaxel to UCSC genome browser ---------------------



# ############## DO NOT DELETE #######################
# Important if want to reconstruct wk 4 paclitaxel data for UCSC genome browser


# d_w4 <- logP[,c("Chromosome","pos","log10p_d_w4")]
# d_w4$posS <- d_w4$pos + 1
# d_w4 <- d_w4[,c("Chromosome","pos","posS","log10p_d_w4")]
# d_w4$pos <- format(d_w4$pos,scientific=FALSE)
# d_w4$posS <- format(d_w4$posS,scientific=FALSE)


# head(d_w4)
  # # Chromosome       pos      posS  log10p_d_w4
# # 1       chr1      5000      5001 7.055740e-09
# # 2       chr1     10000     10001 5.256213e-01
# # 3       chr1     15000     15001 2.373012e-01
# # 4       chr1     20000     20001 4.505111e-01
# # 5       chr1     25000     25001 5.259548e-01
# # 6       chr1     30000     30001 1.024453e+00

# write.table(d_w4, "paclitaxel_wk4_log10P.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)


# # # Place following header at top of paclitaxel_logP_wk4.txt and use in bedGraph format on ucsc genome browser
# # track type=bedGraph name="-log10P" description="use name of gene zoomed in on" visibility=full color=0,0,255 altColor=255,0,0 priority=20

# # Custom track settings
# # Display mode: full
# # Type of graph: points
# # Track height: 128 pixels
# # Data view scaling: auto-scale to data view
# # Always include zero: ON
# # Vertical viewing range:  min: 0; max: 1000  (range: 0 to 1000) (greyed out)
# # Transform function: Transform data points by: NONE
# # Windowing function: mean
# # Smoothing window: OFF
# # Negate values: not selected
# # Draw y indicator lines: 
# # at y = 0.0: ON at y = 12.4524630438516 ON (corresponds to human_thresh_95.txt, for log10p_d_w4)


# # Configure Image page on ucsc genome browser:
# # image width:	400	pixels
# # label area width:	10	characters	
# # text size: 12


# # Remember to get rid of splicoforms and nc genes by clicking GENCODE v31 bar on left of diagram, if desired.






# --------------- Provide wk6 paclitaxel to UCSC genome browser ---------------------



# ############## DO NOT DELETE #######################
# Important if want to reconstruct wk 6 paclitaxel data for UCSC genome browser


# d_w6 <- logP[,c("Chromosome","pos","log10p_d_w6")]
# d_w6$posS <- d_w6$pos + 1
# d_w6 <- d_w6[,c("Chromosome","pos","posS","log10p_d_w6")]
# d_w6$pos <- format(d_w6$pos,scientific=FALSE)
# d_w6$posS <- format(d_w6$posS,scientific=FALSE)


# head(d_w6)
  # # Chromosome       pos      posS  log10p_d_w6
# # 1       chr1      5000      5001 5.688579e-09
# # 2       chr1     10000     10001 1.206085e-01
# # 3       chr1     15000     15001 2.530579e-02
# # 4       chr1     20000     20001 2.680977e-01
# # 5       chr1     25000     25001 4.626788e-01
# # 6       chr1     30000     30001 1.088821e+00

# write.table(d_w6, "paclitaxel_wk6_log10P.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)


# # # Place following header at top of paclitaxel_logP_wk6.txt and use in bedGraph format on ucsc genome browser
# # track type=bedGraph name="-log10P" description="use name of gene zoomed in on" visibility=full color=0,0,255 altColor=255,0,0 priority=20

# # Custom track settings
# # Display mode: full
# # Type of graph: points
# # Track height: 128 pixels
# # Data view scaling: auto-scale to data view
# # Always include zero: ON
# # Vertical viewing range:  min: 0; max: 1000  (range: 0 to 1000) (greyed out)
# # Transform function: Transform data points by: NONE
# # Windowing function: mean
# # Smoothing window: OFF
# # Negate values: not selected
# # Draw y indicator lines: 
# # at y = 0.0: ON at y = 16.4107630148017 ON (corresponds to human_thresh_95.txt, for log10p_d_w6)


# # Configure Image page on ucsc genome browser:
# # image width:	400	pixels
# # label area width:	10	characters	
# # text size: 12


# # Remember to get rid of splicoforms and nc genes by clicking GENCODE v31 bar on left of diagram, if desired.




# --------------- Provide avg paclitaxel to UCSC genome browser ---------------------



# ############## DO NOT DELETE #######################
# Important if want to reconstruct avg paclitaxel data for UCSC genome browser


# d_avg <- logP[,c("Chromosome","pos","log10p_d_avg")]
# d_avg$posS <- d_avg$pos + 1
# d_avg <- d_avg[,c("Chromosome","pos","posS","log10p_d_avg")]
# d_avg$pos <- format(d_avg$pos,scientific=FALSE)
# d_avg$posS <- format(d_avg$posS,scientific=FALSE)


# head(d_avg)
  # # Chromosome       pos      posS log10p_d_avg
# # 1       chr1      5000      5001 6.421780e-09
# # 2       chr1     10000     10001 7.259325e-01
# # 3       chr1     15000     15001 3.463619e-01
# # 4       chr1     20000     20001 5.022637e-01
# # 5       chr1     25000     25001 4.985943e-01
# # 6       chr1     30000     30001 8.532010e-01

# write.table(d_avg, "paclitaxel_avg_log10P.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)


# # # Place following header at top of paclitaxel_logP_avg.txt and use in bedGraph format on ucsc genome browser
# # track type=bedGraph name="-log10P" description="use name of gene zoomed in on" visibility=full color=0,0,255 altColor=255,0,0 priority=20

# # Custom track settings
# # Display mode: full
# # Type of graph: points
# # Track height: 128 pixels
# # Data view scaling: auto-scale to data view
# # Always include zero: ON
# # Vertical viewing range:  min: 0; max: 1000  (range: 0 to 1000) (greyed out)
# # Transform function: Transform data points by: NONE
# # Windowing function: mean
# # Smoothing window: OFF
# # Negate values: not selected
# # Draw y indicator lines: 
# # at y = 0.0: ON at y = 7.22619420441411 ON (corresponds to human_thresh_95.txt, for log10p_d_avg)


# # Configure Image page on ucsc genome browser:
# # image width:	400	pixels
# # label area width:	10	characters	
# # text size: 12


# # Remember to get rid of splicoforms and nc genes by clicking GENCODE v31 bar on left of diagram, if desired.





# ------------------ (5) PDE4DIP_wk4 logP vs Gencode, UCSC genome browser ----------------



p5 <- ggdraw() + draw_image(magick::image_read_pdf("PDE4DIP_wk4.pdf", density = 300),scale=1.4) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)

# apparis 1 and 4






# ------------------ (6) SDK1_wk3 logP vs Gencode, UCSC genome browser ----------------



p6 <- ggdraw() + draw_image(magick::image_read_pdf("SDK1_wk4.pdf", density = 300),scale=1.4) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)

# apparis 1 and 2








# --------------- Combine panels ---------------------

# warning msgs because of nudge_x, nudge_y in p1, p2, p3, but seem to be inconsequential
# File size ~ 2.8 Mb

pdf("d_loci_chr_1.pdf", width=7.5, height=10, useDingbats = FALSE)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()




tiff("d_loci_chr_1.tif",width=7.5,height= 10,units="in",res=300)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()




# if smaller file size required:
# File size ~0.798 Mb

png("d_loci_chr_1.png",width=7.5,height=10,units="in",res=300)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()




p5 <- ggdraw() + draw_image(magick::image_read_pdf("PDE4DIP_wk4.pdf", density = 1200),scale=1.4) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)
p6 <- ggdraw() + draw_image(magick::image_read_pdf("SDK1_wk4.pdf", density = 1200),scale=1.4) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)


pdf("d_loci_chr_hi_res_1.pdf", width=7.5, height=10, useDingbats = FALSE)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()



png("d_loci_chr_hi_res_1.png",width=7.5,height=10,units="in",res=1200)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()















































