# Chromosome level Manhattan plot for growth. Plot of copy number changes

#install.packages("ggplot2")
library(ggplot2)
library(cowplot) #used with plot_grid 

# for summarySE()
library(Rmisc)
library(mgcv)

library(plot3D)


#----------------Aesthetics ---------------------------


theme2 <- theme(
	plot.margin = unit(c(t=1.2,r=0.4,b=1.2,l=0.4), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(), 
	legend.position="none", 
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=12), #numbers on tick marks of x and y axes
	axis.title=element_text(size=14), #titles of x and y axes
	axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
	axis.title.x=element_text(margin=margin(10,0,0,0)),  #moves x axis title by adding margin space to top
	plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=14, face="plain", hjust = 0.5) #hjust shifts right
	)




# size_point <- 0.3
size_hline <- 0.2


# darkest two hues from 3-class PuBuGn in color brewer
# cb1<-rep(c("#1c9099", "#a6bddb"), 12)

# # darkest two hues from 3-class PuBu in color brewer
# cb1<-rep(c("#2b8cbe", "#a6bddb"), 12)


# #attractive pinks, greys
# cb1<-c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#E69F00", "#56B4E9", "#E69F00", "#009E73", "#F0E442", "#0072B2", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#D55E00", "#CC79A7")

# cb1_rev <- c("#CC79A7", "#D55E00", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#D55E00", "#0072B2", "#D55E00", "#F0E442", "#009E73", "#56B4E9", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#E69F00","#999999")

# #'4-class RdBu'
# cb2 <- c('#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#f4a582','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#92c5de','#0571b0')

# #'4-class RdYlBu'
# cb3 <- c('#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#fdae61','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#abd9e9','#2c7bb6')
	
	

# ---------------- Examine g_unique, d_unique, Ix ---------------------

g_unique <- read.delim("growth_loci_unique.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)
d_unique <- read.delim("paclitaxel_loci_unique.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)
Ix <- read.delim("Ix_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)

# genes shared between Ix and g_unique or d_unique

intersect(Ix$geneSymbol,c(g_unique$geneSymbol, d_unique$geneSymbol))
 # [1] "SLC2A5"     "NBPF8"      "PDE4DIP"    "ALK"        "SLC44A4"    "KHDRBS2"    "AC074389.2" "RNF216"     "SEMA3D"     "WASL"       "SLC24A2"    "CEN"       
# [13] "AK6P1"      "RNU6-54P"   "AC092078.2" "CDH13"      "GATAD2A"    "RF00568"   





#----------------- Prepare human logP ---------------------


logP <- read.table("log10P_human.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)
human_thresh_95 <- read.table("human_thresh_95.txt",header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names=1,col.names=c("","thresh"))



# Sort:
chrOrder<-paste("chr",c(1:22,"X"),sep="")
logP$Chromosome <- factor(logP$Chromosome, levels=chrOrder)
logP <- logP[order(logP$Chromosome, logP$pos), ]
logP$Chromosome <- as.character(logP$Chromosome)



# # Transform chr1 etc. to numbers
# logP$Chromosome <- gsub('chr', '', logP$Chromosome)
# logP[logP$Chromosome == "X","Chromosome"] <- 23
# chrOrder<-c(1:23)
# logP$Chromosome <- factor(logP$Chromosome, levels=chrOrder)
# logP <- logP[order(logP$Chromosome, logP$pos), ]
# logP$Chromosome <- as.numeric(logP$Chromosome)

# # Compute chromosome size
# gen_coord <- aggregate(pos~Chromosome,FUN=max,data=logP)
# colnames(gen_coord)[2] <- "chr_size"
# gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
# gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
# gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# # Use cumsum to make genome coordinates
# gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-23])

# # merge genome coordinates with logP
# logP <- merge(logP,gen_coord[,c("Chromosome","coord")])
# logP$Chromosome <-factor(logP$Chromosome, levels=chrOrder)
# logP <- logP[order(logP$Chromosome, logP$pos), ]
# logP$Chromosome <- as.numeric(logP$Chromosome)

# logP$coord <- logP$pos + logP$coord


# # find midpoints of chromosomes for breaks in ggplot
# mid <- function(x) {(max(x)+min(x))/2}
# chr_mid <- aggregate(coord~Chromosome,FUN = mid,data=logP)
# colnames(chr_mid)[2] <- "mid"
# chr_mid$Chromosome <-factor(chr_mid$Chromosome, levels=chrOrder)
# chr_mid <- chr_mid[order(chr_mid$Chromosome), ]
# chr_mid$Chromosome <- as.numeric(chr_mid$Chromosome)

# # Define breaks as mid-points chromosomes
# breaks <- chr_mid$mid


# # attractive grey and skyblue color scheme
# cb1<-rep(c("grey", "skyblue"), 12)


# standard black color scheme
cb1 <- rep(c("black", "black"), 12)



# labels <- as.character(c(1:9,"",11,"",13,"","",16,"","","",20,"","","X"))


# --------- (1) chr1, Ix ----------------------------

# choose chr number
i <- 1


# id points by hand

Ix <- read.delim("Ix_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)

# # id points by hand
# plot(logP[logP=="chr1","pos"], logP[logP=="chr1","log10p_g_d_Ix"], cex=0.05)
# abline(h= human_thresh_95["log10p_g_d_Ix",], col="red", lwd=0.1)
# points_p1 <- identify(logP[logP=="chr1","pos"], logP[logP=="chr1","log10p_g_d_Ix"], cex=0.5)

# points_p1
# [1]   955  3752 11529 12094 14951


# START HERE if do not wish to do hand picking again
points_p1 <- c(955, 3752, 11529, 12094, 14951)


labels_p1 <- merge(logP[logP$Chromosome=="chr1",][points_p1,c("Chromosome","pos")],Ix[,c("Chromosome","pos","wk_conc","log10P","geneSymbol")])
labels_p1 <- labels_p1[labels_p1$wk_conc=="Ix",]
labels_p1 <- labels_p1[order(labels_p1$pos),]
labels_p1$nudge_x <- 0
labels_p1$nudge_y <- 1.5
labels_p1[labels_p1$geneSymbol=="SLC2A5","nudge_x"] <- 2.2e-16 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977
labels_p1[labels_p1$geneSymbol=="GRIK3","nudge_x"] <- 0
labels_p1[labels_p1$geneSymbol=="SIKE1","nudge_x"] <- -11
labels_p1[labels_p1$geneSymbol=="NBPF8","nudge_x"] <- 0
labels_p1[labels_p1$geneSymbol=="PDE4DIP","nudge_x"] <- 0



balloon_scale <- 0.5 # inflation factor for significant points	
size_point <- 0.1*(1 + balloon_scale*(logP[logP$Chromosome==paste0("chr",i),"log10p_g_d_Ix"]/max(logP[logP$Chromosome==paste0("chr",i),"log10p_g_d_Ix"]))) # scale significant points



p1 <- ggplot() + 
		geom_point(
			data = logP[logP$Chromosome==paste0("chr",i),], 
			size= size_point,
			stroke=0, 
				aes(
					x = pos/1e6, 
					y = log10p_g_d_Ix, 
					color="as.factor(Chromosome)"
					)
				) +
		geom_text(data = labels_p1, aes(x = pos/1e6, y = log10P,label=geneSymbol, fontface = "italic"), nudge_x=labels_p1$nudge_x, nudge_y=labels_p1$nudge_y,  colour = "black", size = 2.5) + # nudge_x and nudge_y gives warning, but seems to work
		# geom_text( aes(x = 18990000/1e6, y = 44.74028), label="IFFO2", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 51710000/1e6, y = 23.24700), label="OSBPL9", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 103260000/1e6, y = 54.41426), label="COL11A1", colour = "black", size = 3, nudge_x=12, nudge_y=2) +
		# geom_text( aes(x = 159590000/1e6, y = 20.71389), label="APCS", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 225540000/1e6, y = 18.84761), label="ENAH", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		scale_color_manual(values=cb1) +
		theme2 +
		# scale_x_continuous(breaks = breaks, labels = labels) +
		scale_x_continuous() +
		xlab(paste0("Chromosome ", i, " (Mb)")) + 
		ylab(expression('-log'[10]*italic('P'))) +
		geom_hline(yintercept= human_thresh_95["log10p_g_d_Ix",], linetype="solid", color = "red", size=size_hline) +
		labs(subtitle="Ix") #+
		#scale_y_continuous(breaks=seq(0,120,20),limit = c(0, 120))
print(p1)


# --------- (2) chr7, Ix ----------------------------

# choose chr number
i <- 7


# id points by hand

Ix <- read.delim("Ix_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)

# # id points by hand
# plot(logP[logP=="chr7","pos"], logP[logP=="chr7","log10p_g_d_Ix"], cex=0.05)
# abline(h= human_thresh_95["log10p_g_d_Ix",], col="red", lwd=0.1)
# points_p2 <- identify(logP[logP=="chr7","pos"], logP[logP=="chr7","log10p_g_d_Ix"], cex=0.5)

# points_p2
# # [1]  2750  8230  8560 12419



# START HERE if do not wish to do hand picking again
points_p2 <- c(2750, 8230, 8560, 12419)


labels_p2 <- merge(logP[logP$Chromosome=="chr7",][points_p2,c("Chromosome","pos")],Ix[,c("Chromosome","pos","wk_conc","log10P","geneSymbol")])
labels_p2 <- labels_p2[labels_p2$wk_conc=="Ix",]
labels_p2 <- labels_p2[order(labels_p2$pos),]
labels_p2$nudge_x <- 0
labels_p2$nudge_y <- 1.5
labels_p2[labels_p2$geneSymbol=="SKAP2","nudge_x"] <- 2.2e-16 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977
labels_p2[labels_p2$geneSymbol=="HGF","nudge_x"] <- -5
labels_p2[labels_p2$geneSymbol=="SEMA3D","nudge_x"] <- 0
labels_p2[labels_p2$geneSymbol=="WASL","nudge_x"] <- 0



balloon_scale <- 0.5 # inflation factor for significant points	
size_point <- 0.1*(1 + balloon_scale*(logP[logP$Chromosome==paste0("chr",i),"log10p_g_d_Ix"]/max(logP[logP$Chromosome==paste0("chr",i),"log10p_g_d_Ix"]))) # scale significant points



p2 <- ggplot() + 
		geom_point(
			data = logP[logP$Chromosome==paste0("chr",i),], 
			size= size_point,
			stroke=0, 
				aes(
					x = pos/1e6, 
					y = log10p_g_d_Ix, 
					color="as.factor(Chromosome)"
					)
				) +
		geom_text(data = labels_p2, aes(x = pos/1e6, y = log10P,label=geneSymbol, fontface = "italic"), nudge_x=labels_p2$nudge_x, nudge_y=labels_p2$nudge_y,  colour = "black", size = 2.5) + # nudge_x and nudge_y gives warning, but seems to work
		# geom_text( aes(x = 18990000/1e6, y = 44.74028), label="IFFO2", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 51710000/1e6, y = 23.24700), label="OSBPL9", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 103260000/1e6, y = 54.41426), label="COL11A1", colour = "black", size = 3, nudge_x=12, nudge_y=2) +
		# geom_text( aes(x = 159590000/1e6, y = 20.71389), label="APCS", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 225540000/1e6, y = 18.84761), label="ENAH", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		scale_color_manual(values=cb1) +
		theme2 +
		# scale_x_continuous(breaks = breaks, labels = labels) +
		scale_x_continuous() +
		xlab(paste0("Chromosome ", i, " (Mb)")) + 
		ylab(expression('-log'[10]*italic('P'))) +
		geom_hline(yintercept= human_thresh_95["log10p_g_d_Ix",], linetype="solid", color = "red", size=size_hline) +
		labs(subtitle="Ix") #+
		#scale_y_continuous(breaks=seq(0,120,20),limit = c(0, 120))
print(p2)





# --------------- (3) Provide Ix to UCSC genome browser ---------------------



# ############## DO NOT DELETE #######################
# Important if want to reconstruct Ix data for UCSC genome browser


# Ix_ucsc <- logP[,c("Chromosome","pos","log10p_g_d_Ix")]
# Ix_ucsc$posS <- Ix_ucsc$pos + 1
# Ix_ucsc <- Ix_ucsc[,c("Chromosome","pos","posS","log10p_g_d_Ix")]
# Ix_ucsc$pos <- format(Ix_ucsc$pos,scientific=FALSE)
# Ix_ucsc$posS <- format(Ix_ucsc$posS,scientific=FALSE)


# head(Ix_ucsc)
  # # Chromosome       pos      posS log10p_g_d_Ix
# # 1       chr1      5000      5001  2.060848e-09
# # 2       chr1     10000     10001  3.797878e-01
# # 3       chr1     15000     15001  3.445424e-01
# # 4       chr1     20000     20001  1.067595e-01
# # 5       chr1     25000     25001  8.801078e-02
# # 6       chr1     30000     30001  4.181827e-01

# write.table(Ix_ucsc, "Ix_log10P.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)


# # # Place following header at top of Ix_log10P.txt and use in bedGraph format on ucsc genome browser
# # track type=bedGraph name="-log10P" description="use name of gene zoomed in on" visibility=full color=0,0,255 altColor=255,0,0 priority=20

# # Custom track settings
# # Display mode: full
# # Type of graph: points
# # Track height: 128 pixels
# # Data view scaling: auto-scale to data view
# # Always include zero: ON
# # Vertical viewing range:  min: 0; max: 1000  (range: 0 to 1000) (greyed out)
# # Transform function: Transform data points by: NONE
# # Windowing function: mean
# # Smoothing window: OFF
# # Negate values: not selected
# # Draw y indicator lines: 
# # at y = 0.0: ON at y = 8.4804717939419 ON (corresponds to human_thresh_95.txt, for log10p_g_d_Ix)


# # Configure Image page on ucsc genome browser:
# # image width:	400	pixels
# # label area width:	10	characters	
# # text size: 12


# # Remember to get rid of splicoforms and nc genes by clicking GENCODE v31 bar on left of diagram, if desired.







# ------------------ KHDRBS2_Ix logP vs Gencode, UCSC genome browser ----------------



p3 <- ggdraw() + draw_image(magick::image_read_pdf("KHDRBS2_Ix.pdf", density = 300),scale=1.2) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)





# # ------------------ SLC24A2_Ix logP vs Gencode, UCSC genome browser ----------------



# p6 <- ggdraw() + draw_image(magick::image_read_pdf("SLC24A2_Ix.pdf", density = 300),scale=1.6) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)










# ------------- (4) and (5) Ix line plot copy number changes -----------------------

# cf plot_nb_graph_1.R


gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}



g_unique <- read.delim("growth_loci_unique.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)
d_unique <- read.delim("paclitaxel_loci_unique.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)
Ix <- read.delim("Ix_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)

# genes shared between Ix and g_unique or d_unique

intersect(Ix$geneSymbol,c(g_unique$geneSymbol, d_unique$geneSymbol))
 # [1] "SLC2A5"     "NBPF8"      "PDE4DIP"    "ALK"        "SLC44A4"    "KHDRBS2"    "AC074389.2" "RNF216"     "SEMA3D"     "WASL"       "SLC24A2"    "CEN"       
# [13] "AK6P1"      "RNU6-54P"   "AC092078.2" "CDH13"      "GATAD2A"    "RF00568"


d <- read.delim("paclitaxel_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)
g <- read.delim("growth_loci.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)


# copy number data
RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Read in and prepare ancillary tables 

cell <- read.table("cell_label_info.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)
sum_reads <- colSums(RH_human[,5:ncol(RH_human)])
reads <- read.table("RH_pool_human_total_align.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) # uses mapped human reads, cf human_AIC_1.R


# Selected following genes:



# select Ix genes using:
Ix[order(-Ix$coef_g_d_Ix),c("Chromosome","pos","wk_conc","log10P","geneSymbol","dist","gene_type","sig_coef","coef_g_d_Ix")]



Ix[c(35,3,55,21,30,2),c("Chromosome","pos","wk_conc","log10P","geneSymbol","dist","gene_type","sig_coef","coef_g_d_Ix")]
   # Chromosome      pos wk_conc    log10P geneSymbol dist      gene_type     sig_coef  coef_g_d_Ix
# 35      chr11 11030000      Ix  9.771507 AC111188.1    0         lncRNA  0.005713439  0.005713439
# 3        chr1 40310000      Ix  9.576563     COL9A2    0 protein_coding -0.005598677 -0.005598677
# 55      chr17 10450000      Ix  8.651159       MYH4    0 protein_coding -0.005955006 -0.005955006
# 21       chr6 89180000      Ix 12.063463     GABRR1    0 protein_coding -0.006224130 -0.006224130
# 30       chr9 19690000      Ix 12.363473    SLC24A2    0 protein_coding -0.006407033 -0.006407033
# 2        chr1 37020000      Ix 13.880037      GRIK3    0 protein_coding -0.008177810 -0.008177810

     

genes <- merge(Ix[c(35,3,55,21,30,2),c("Chromosome","pos","wk_conc","log10P","geneSymbol","dist","gene_type","sig_coef","coef_g_d_Ix")],RH_human)





# Prepare for mgcv::gam

genes_l <- reshape(genes[,c(5,11:ncol(genes))], 
  varying = c(colnames(genes[c(11:ncol(genes))])), 
  v.names = "copy",
  timevar = "RH_ID", 
  times = c(colnames(genes[c(11:ncol(genes))])), 
  new.row.names = 1:1e6,
  direction = "long")
  
genes_l$week <- 0
genes_l[grepl("_w0_",genes_l$RH_ID),]$week <- 0
genes_l[grepl("_w1_",genes_l$RH_ID),]$week <- 1
genes_l[grepl("_w2_",genes_l$RH_ID),]$week <- 2
genes_l[grepl("_w3_",genes_l$RH_ID),]$week <- 3
genes_l[grepl("_w4_",genes_l$RH_ID),]$week <- 4
genes_l[grepl("_w6_",genes_l$RH_ID),]$week <- 6

genes_l$conc <- 0
genes_l[grepl("_d0",genes_l$RH_ID),]$conc <- 0
genes_l[grepl("_d8",genes_l$RH_ID),]$conc <- 8
genes_l[grepl("_d25",genes_l$RH_ID),]$conc <- 25
genes_l[grepl("_d75",genes_l$RH_ID),]$conc <- 75

genes_l$pool <- 0
genes_l[grepl("RH1_",genes_l$RH_ID),]$pool <- 1
genes_l[grepl("RH2_",genes_l$RH_ID),]$pool <- 2
genes_l[grepl("RH3_",genes_l$RH_ID),]$pool <- 3
genes_l[grepl("RH4_",genes_l$RH_ID),]$pool <- 4
genes_l[grepl("RH5_",genes_l$RH_ID),]$pool <- 5
genes_l[grepl("RH6_",genes_l$RH_ID),]$pool <- 6



genes_l <- merge(genes_l,cell)
genes_l$sum_reads <- sum_reads[genes_l$RH_ID]
genes_l  <- merge(genes_l,reads[,c(1:5,9)])
colnames(genes_l)[10] <- "total_reads"


genes_l$pool <- as.factor(genes_l$pool)
genes_l$cell <- as.factor(genes_l$cell)

# base gene levels on lm below
# gene_levels <- Ix[c(35,3,55,21,30,2),c("Chromosome","pos","wk_conc","log10P","geneSymbol","dist","gene_type","sig_coef","coef_g_d_Ix")][,c("geneSymbol")]
# genes_l$geneSymbol <- factor(genes_l$geneSymbol,levels=gene_levels)



head(genes_l)
  # week conc pool     RH_ID cell geneSymbol copy id sum_reads total_reads
# 1    0    0    1 RH1_w0_d0    1      GRIK3   37  1  29018449      298433
# 2    0    0    1 RH1_w0_d0    1     COL9A2   50  2  29018449      298433
# 3    0    0    1 RH1_w0_d0    1 AC111188.1   55  3  29018449      298433
# 4    0    0    1 RH1_w0_d0    1       MYH4  174  4  29018449      298433
# 5    0    0    1 RH1_w0_d0    1     GABRR1  121  5  29018449      298433
# 6    0    0    1 RH1_w0_d0    1    SLC24A2   75  6  29018449      298433





genes_l$phat = as.numeric(NA)
genes_l$phat_se = as.numeric(NA)
genes_l$phat_center_g = as.numeric(NA)
genes_l$log_copy_g = as.numeric(NA)
genes_l$phat_center_d = as.numeric(NA)
genes_l$log_copy_d = as.numeric(NA)

for (i in c(1:length(unique(genes_l$id)))) {
m1_nb <- gam(copy ~ week * conc + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(total_reads)), data = genes_l[genes_l$id == i,], family = nb, method = "REML")

genes_l[genes_l$id == i,]$phat <- predict(m1_nb, se.fit=TRUE,type="link")$fit
genes_l[genes_l$id == i,]$phat_se <- predict(m1_nb, se.fit=TRUE,type="link")$se.fit


# omnibus center for growth since all lines originate from week == 0, conc == 0
genes_l[genes_l$id == i,]$phat_center_g <- genes_l[genes_l$id == i,"phat"] - mean(genes_l[genes_l$id == i & genes_l$week == 0 & genes_l$conc == 0,"phat"])
genes_l[genes_l$id == i,]$log_copy_g <- log(genes_l[genes_l$id == i,]$copy) - mean(log(genes_l[genes_l$id == i & genes_l$week==0 & genes_l$conc==0,"copy"]))


# dedicated centers at each level of week for paclitaxel, since each line originates from a different week (1,2,3,4,6)
for (j in unique(genes_l$week)) {
genes_l[genes_l$id == i & genes_l$week == j,]$phat_center_d <- genes_l[genes_l$id == i & genes_l$week == j,"phat"] - mean(genes_l[genes_l$id == i & genes_l$week == j & genes_l$conc == 0,"phat"])
genes_l[genes_l$id == i & genes_l$week == j,]$log_copy_d <- log(genes_l[genes_l$id == i & genes_l$week == j,]$copy) - mean(log(genes_l[genes_l$id == i & genes_l$week == j & genes_l$conc == 0,"copy"]))
	}

}



# replace GENCODE v31 name with GENCODE v32 name
genes_l[genes_l$geneSymbol == "AC111188.1","geneSymbol"] <- c("LINC02752")



head(genes_l)
  # week conc pool     RH_ID cell geneSymbol copy id sum_reads total_reads     phat   phat_se phat_center_g log_copy_g phat_center_d log_copy_d
# 1    0    0    1 RH1_w0_d0    1      GRIK3   37  1  29018449      298433 3.349682 0.3936606    -1.8320905 -1.6088474    -1.8320905 -1.6088474
# 2    0    0    1 RH1_w0_d0    1     COL9A2   50  2  29018449      298433 4.159549 0.3269245    -0.8815671 -1.1269868    -0.8815671 -1.1269868
# 3    0    0    1 RH1_w0_d0    1  LINC02752   55  3  29018449      298433 4.066401 0.2693721    -0.7660599 -0.8335254    -0.7660599 -0.8335254
# 4    0    0    1 RH1_w0_d0    1       MYH4  174  4  29018449      298433 5.158065 0.3056002    -0.2434843 -0.3459674    -0.2434843 -0.3459674
# 5    0    0    1 RH1_w0_d0    1     GABRR1  121  5  29018449      298433 5.074319 0.2928100    -0.3186310 -0.5797884    -0.3186310 -0.5797884
# 6    0    0    1 RH1_w0_d0    1    SLC24A2   75  6  29018449      298433 4.434585 0.2979492    -0.6613996 -0.7792281    -0.6613996 -0.7792281







# calculate growth coefs at 0 nM and 75 nM, so can label graph in order of lm growth slopes.

gene_levels_df <- data.frame(geneSymbol=character(),slope=numeric(),stringsAsFactors=FALSE)


for (i in c(1:length(unique(genes_l$id)))) {
	gene_levels_df[i,"geneSymbol"] <- unique(as.character(genes_l[genes_l$conc==0 & genes_l$id == i,"geneSymbol"]))
	gene_levels_df[i,"slope"] <- coef(summary(lm(log2(exp(1))* phat_center_g ~ 0 + week,data=genes_l[genes_l$conc==0 & genes_l$id == i,])))[,c("Estimate")]	
}


gene_levels <- gene_levels_df[order(-gene_levels_df$slope),c("geneSymbol")]

genes_l$geneSymbol <- factor(genes_l$geneSymbol,levels=gene_levels)




# only define colores_1 once, so that genes have same color (though not same order) in all graphs.
n = length(unique(genes_l$geneSymbol))
colores_1 = gg_color_hue(n)
names(colores_1) <- gene_levels





# chose jitter of zero, as made for a tidier plot
jitter_factor_g <- 0
jitter_g <- data.frame(week=rep(unique(genes_l$week),n),geneSymbol=rep(gene_levels,each=length(unique(genes_l$week))),jitter_g=jitter(rep(unique(genes_l$week),n),jitter_factor_g))
genes_l <- merge(jitter_g,genes_l)



# chose jitter of zero, as made for a tidier plot
jitter_factor_d <- 0
jitter_d <- data.frame(conc=rep(unique(genes_l$conc),n),geneSymbol=rep(gene_levels,each=length(unique(genes_l$conc))),jitter_d=jitter(rep(unique(genes_l$conc),n),jitter_factor_d))
genes_l <- merge(jitter_d,genes_l)



# Use calculated growth coefs at 0 nM, so can label graph in order of lm growth slopes.
# Must do again after merge, which discards gene levels

genes_l$geneSymbol <- factor(genes_l$geneSymbol,levels=gene_levels)








# provide summary tables if wish to adjust CI to sem using group number, N
summary_genes_g <- summarySE(genes_l, measurevar="phat_center_g", groupvars=c("week","geneSymbol","conc"))
summary_genes_d <- summarySE(genes_l, measurevar="phat_center_d", groupvars=c("week","geneSymbol","conc"))

colnames(summary_genes_g)[5] <- "phat_center_g_mean"
colnames(summary_genes_d)[5] <- "phat_center_d_mean"






# use lm to provide line and CI

p4 <- ggplot() + 
			 	theme2 + 
				theme(legend.key=element_blank()) +
				geom_point(
					data=genes_l[genes_l$conc==0,], # combine wk==0,d==0 with d ==75
					shape=1,
					stroke=0.2,
					size=1.0,
					aes(
						x=jitter_g, 
						y=log2(exp(1))*log_copy_g, 
						colour=geneSymbol
						)
					) +
			    geom_smooth(
				    data=genes_l[genes_l$conc==0,], 
				    method = "lm",  
				    	formula=	 y~0+x,
				    aes(
					    	y=log2(exp(1))*phat_center_g,
					    	x=week,
					    group=geneSymbol,
					    colour=geneSymbol#,
					    # fill=geneSymbol
					    ),
				    se=TRUE,
				    level=0.95,
				    size=0.3,
				    fill="grey",
				    alpha=0.1
				    ) +
				scale_color_manual(
					values=colores_1,
					name =NULL, 
					labels=levels(genes_l[genes_l$conc==75,"geneSymbol"])
					) +
				guides(
					shape=FALSE,
					fill=FALSE,
			 		colour = guide_legend(
						 		override.aes = list(
						 		fill=NA,
						 		shape=NA,
						 		size=0.3
						 		),
					 		ncol=1,
					 		byrow=TRUE
					 		)
			 		) +
				theme(
					legend.position = "right", 
			 		legend.title = element_text(size = 9), 
			 		legend.text = element_text(size = 8, face = "italic"),
			 		legend.title.align=0.2,
			 		legend.key.height = unit(0.1, 'lines'), 
			 		legend.margin=margin(t = 0, r = -0.2, b = 0, l = -0.4, unit = "cm")
			 		) +
				scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) +
				# ggtitle("") + 
				xlab("Weeks") + 
				ylab(expression(Delta*log[2]~(Reads))) + 
				labs(subtitle="Ix (Growth 0 nM)")
print(p4)









# calculate growth coefs at 75 nM, so can label graph in order of lm growth slopes.

gene_levels_df <- data.frame(geneSymbol=character(),slope=numeric(),stringsAsFactors=FALSE)


for (i in c(1:length(unique(genes_l$id)))) {
	gene_levels_df[i,"geneSymbol"] <- unique(as.character(genes_l[(genes_l$week==0 | genes_l$conc==75) & genes_l$id == i,c("geneSymbol")]))
	gene_levels_df[i,"slope"] <- coef(summary(lm(log2(exp(1))* phat_center_g ~ 0 + week,data=genes_l[(genes_l$week==0 | genes_l$conc==75) & genes_l$id == i,])))[,c("Estimate")]	
}


gene_levels <- gene_levels_df[order(-gene_levels_df$slope),c("geneSymbol")]

genes_l$geneSymbol <- factor(genes_l$geneSymbol,levels=gene_levels)







# use lm to provide line and CI

p5 <- ggplot() + 
			 	theme2 + 
				theme(legend.key=element_blank()) +
				geom_point(
					data=rbind(genes_l[genes_l$week==0,],genes_l[genes_l$conc==75,]), # combine wk==0,d==0 with d ==75
					shape=1,
					stroke=0.2,
					size=1.0,
					aes(
						x=jitter_g, 
						y=log2(exp(1))*log_copy_g, 
						colour=geneSymbol
						)
					) +
			    geom_smooth(
				    data=rbind(genes_l[genes_l$week==0,],genes_l[genes_l$conc==75,]), 
				    method = "lm",  
				    	formula=	 y~0+x,
				    aes(
					    	y=log2(exp(1))*phat_center_g,
					    	x=week,
					    group=geneSymbol,
					    colour=geneSymbol#,
					    # fill=geneSymbol
					    ),
				    se=TRUE,
				    level=0.95,
				    size=0.3,
				    fill="grey",
				    alpha=0.1
				    ) +
				scale_color_manual(
					values=colores_1,
					name =NULL, 
					labels=levels(genes_l[genes_l$conc==75,"geneSymbol"])
					) +
				guides(
					shape=FALSE,
					fill=FALSE,
			 		colour = guide_legend(
						 		override.aes = list(
						 		fill=NA,
						 		shape=NA,
						 		size=0.3
						 		),
					 		ncol=1,
					 		byrow=TRUE
					 		)
			 		) +
				theme(
					legend.position = "right", 
			 		legend.title = element_text(size = 9), 
			 		legend.text = element_text(size = 8, face = "italic"),
			 		legend.title.align=0.2,
			 		legend.key.height = unit(0.1, 'lines'), 
			 		legend.margin=margin(t = 0, r = -0.2, b = 0, l = -0.4, unit = "cm")
			 		) +
				scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) +
				# ggtitle("") + 
				xlab("Weeks") + 
				ylab(expression(Delta*log[2]~(Reads))) + 
				labs(subtitle="Ix (Growth 75 nM)")
print(p5)







# # Same data as above, but plotting using reads vs paclitaxel conc at week 1 vs week 6.

# # calculate drug coefs at wk 1, so can label graph in order of lm growth slopes.

# gene_levels_df <- data.frame(geneSymbol=character(),slope=numeric(),stringsAsFactors=FALSE)


# for (i in c(1:length(unique(genes_l$id)))) {
	# gene_levels_df[i,"geneSymbol"] <- unique(as.character(genes_l[genes_l$week==1 & genes_l$id == i,c("geneSymbol")]))
	# gene_levels_df[i,"slope"] <- coef(summary(lm(log2(exp(1))* phat_center_d ~ 0 + conc,data=genes_l[genes_l$week==1 & genes_l$id == i,])))[,c("Estimate")]	
# }


# gene_levels <- gene_levels_df[order(-gene_levels_df$slope),c("geneSymbol")]

# genes_l$geneSymbol <- factor(genes_l$geneSymbol,levels=gene_levels)





# # use lm to provide line and CI

# p4_d_wk1 <- ggplot() + 
			 	# theme2 + 
				# theme(legend.key=element_blank()) +
				# geom_point(
					# data=genes_l[genes_l$week==1,],
					# shape=1,
					# stroke=0.2,
					# size=1.0,
					# aes(
						# x=jitter_d, 
						# y=log2(exp(1))*log_copy_d, 
						# colour=geneSymbol
						# )
					# ) +
			    # geom_smooth(
				    # data=genes_l[genes_l$week==1,], 
				    # method = "lm",  
				    	# formula=	 y~0+x,
				    # aes(
					    	# y=log2(exp(1))*phat_center_d,
					    	# x=conc,
					    # group=geneSymbol,
					    # colour=geneSymbol#,
					    # # fill=geneSymbol
					    # ),
				    # se=TRUE,
				    # level=0.95,
				    # size=0.3,
				    # fill="grey",
				    # alpha=0.1
				    # ) +
				# scale_color_manual(
					# values=colores_1,
					# name =NULL, 
					# labels=levels(genes_l[genes_l$week==6,"geneSymbol"])
					# ) +
				# guides(
					# shape=FALSE,
					# fill=FALSE,
			 		# colour = guide_legend(
						 		# override.aes = list(
						 		# fill=NA,
						 		# shape=NA,
						 		# size=0.3
						 		# ),
					 		# ncol=1,
					 		# byrow=TRUE
					 		# )
			 		# ) +
				# theme(
					# legend.position = "right", 
			 		# legend.title = element_text(size = 6), 
			 		# legend.text = element_text(size = 5),
			 		# legend.title.align=0.2,
			 		# legend.key.height = unit(0.1, 'lines'), 
			 		# legend.margin=margin(t = 0, r = -0.2, b = 0, l = -0.4, unit = "cm")
			 		# ) +
				# scale_x_continuous(breaks = c(0,8,25,75), labels = c(0,8,25,75)) +
				# # ggtitle("") + 
				# xlab("Paclitaxel (nM)") + 
				# ylab(expression(Delta*log[2]~(Reads))) + 
				# labs(subtitle="Ix (Paclitaxel wk 1)")
# print(p4_d_wk1)








# # calculate drug coefs at wk 6, so can label graph in order of lm growth slopes.

# gene_levels_df <- data.frame(geneSymbol=character(),slope=numeric(),stringsAsFactors=FALSE)


# for (i in c(1:length(unique(genes_l$id)))) {
	# gene_levels_df[i,"geneSymbol"] <- unique(as.character(genes_l[genes_l$week==6 & genes_l$id == i,c("geneSymbol")]))
	# gene_levels_df[i,"slope"] <- coef(summary(lm(log2(exp(1))* phat_center_d ~ 0 + conc,data=genes_l[genes_l$week==6 & genes_l$id == i,])))[,c("Estimate")]	
# }


# gene_levels <- gene_levels_df[order(-gene_levels_df$slope),c("geneSymbol")]

# genes_l$geneSymbol <- factor(genes_l$geneSymbol,levels=gene_levels)





# # use lm to provide line and CI

# p4_d_wk6 <- ggplot() + 
			 	# theme2 + 
				# theme(legend.key=element_blank()) +
				# geom_point(
					# data=genes_l[genes_l$week==6,],
					# shape=1,
					# stroke=0.2,
					# size=1.0,
					# aes(
						# x=jitter_d, 
						# y=log2(exp(1))*log_copy_d, 
						# colour=geneSymbol
						# )
					# ) +
			    # geom_smooth(
				    # data=genes_l[genes_l$week==6,], 
				    # method = "lm",  
				    	# formula=	 y~0+x,
				    # aes(
					    	# y=log2(exp(1))*phat_center_d,
					    	# x=conc,
					    # group=geneSymbol,
					    # colour=geneSymbol#,
					    # # fill=geneSymbol
					    # ),
				    # se=TRUE,
				    # level=0.95,
				    # size=0.3,
				    # fill="grey",
				    # alpha=0.1
				    # ) +
				# scale_color_manual(
					# values=colores_1,
					# name =NULL, 
					# labels=levels(genes_l[genes_l$week==6,"geneSymbol"])
					# ) +
				# guides(
					# shape=FALSE,
					# fill=FALSE,
			 		# colour = guide_legend(
						 		# override.aes = list(
						 		# fill=NA,
						 		# shape=NA,
						 		# size=0.3
						 		# ),
					 		# ncol=1,
					 		# byrow=TRUE
					 		# )
			 		# ) +
				# theme(
					# legend.position = "right", 
			 		# legend.title = element_text(size = 6), 
			 		# legend.text = element_text(size = 5),
			 		# legend.title.align=0.2,
			 		# legend.key.height = unit(0.1, 'lines'), 
			 		# legend.margin=margin(t = 0, r = -0.2, b = 0, l = -0.4, unit = "cm")
			 		# ) +
				# scale_x_continuous(breaks = c(0,8,25,75), labels = c(0,8,25,75)) +
				# # ggtitle("") + 
				# xlab("Paclitaxel (nM)") + 
				# ylab(expression(Delta*log[2]~(Reads))) + 
				# labs(subtitle="Paclitaxel (wk 6)")
# print(p4_d_wk6)




# ---------------------- (6) 3D plot ---------------------------



gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

colores_1 <- gg_color_hue(100)		
		


# simplify to one gene


# gene_1 <- genes_l[genes_l$geneSymbol=="AC111188.1",]
gene_1 <- genes_l[genes_l$geneSymbol=="GRIK3",]







m1 <- lm(phat_center_g~0+week*conc,data=gene_1)



grid.lines = 26
x.pred <- seq(min(gene_1$week), max(gene_1$week), length.out = grid.lines)
y.pred <- seq(min(gene_1$conc), max(gene_1$conc), length.out = grid.lines)
xy <- expand.grid( week = x.pred, conc = y.pred)


z.pred <- matrix(predict(m1, newdata = xy), nrow = grid.lines, ncol = grid.lines)

fitpoints <- predict(m1)





# using phat_center_g instead of log_copy_g


pdf("p6.pdf", width=7, height=7, useDingbats = FALSE)
scatter3D(
			x = gene_1$week, 
			y = gene_1$conc,
			z = log2(exp(1))*gene_1$phat_center_g,
			# CI = CI,
			xlab="Weeks", 
			ylab="Paclitaxel (nM)",
			zlab="log2(Reads)",
			theta = 40, # default
			phi = 40, # default
			clab = c("Reads"),
			pch = 16, 
			ticktype = "detailed",
			type = "p", 
			cex = 0.6,
			cex.lab = 2,
			cex.axis=1.8,
			cex.main=3,
			# colvar = as.integer(factor(gene_1$geneSymbol,levels=c("BACH2","DNAJC1"))), 
			col = colores_1,
			# col = gg2.col(100),
			colkey = list(
				at = c(-8,-4,0,4), 
				cex.clab = 1.8,
				cex.axis=2,
				side = 4, 
				# labels = c("BACH2", "DNAJC1"),
				length = 0.3, 
				width = 0.3
				), 
			surf = list(
				x = x.pred, 
				y = y.pred, 
				z = log2(exp(1))*z.pred,
				facets = NA,
				fit = log2(exp(1))*fitpoints
				),
			main = "Ix (GRIK3)",
			bty = "b"
		)

dev.off()


# Change p6.pdf to GRIK3_Ix.pdf. Edit GRIK3_Ix.pdf by hand, if desired, to improve appearance of axis and scale numbers
p6 <- ggdraw() + draw_image(magick::image_read_pdf("GRIK3_Ix.pdf", density = 300),scale=2) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)




# --------------- Combine panels ---------------------


# File size ~ 2.2 Mb

pdf("Ix_loci_chr_1.pdf", width=7.5, height=10, useDingbats = FALSE)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()




tiff("Ix_loci_chr_1.tif",width=7.5,height=10,units="in",res=300)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()




# if smaller file size required:
# File size ~0.95 Mb

png("Ix_loci_chr_1.png",width=7.5,height=10,units="in",res=300)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()





p3 <- ggdraw() + draw_image(magick::image_read_pdf("KHDRBS2_Ix.pdf", density = 1200),scale=1.2) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)
p6 <- ggdraw() + draw_image(magick::image_read_pdf("GRIK3_Ix.pdf", density = 1200),scale=2) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)


pdf("Ix_loci_chr_hi_res_1.pdf", width=7.5, height=10, useDingbats = FALSE)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()




png("Ix_loci_chr_hi_res_1.png",width=7.5,height=10,units="in",res=1200)
plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()










