library(ggplot2)
library(cowplot) #used with plot_grid 


#----------------Aesthetics ---------------------------


theme2 <- theme(
	plot.margin = unit(c(t=0.5,r=1,b=0.5,l=1), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(), 
	legend.position="none", 
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=12), #numbers on tick marks of x and y axes
	axis.title=element_text(size=14), #titles of x and y axes
	axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
	axis.title.x=element_text(margin=margin(10,0,0,0)),  #moves x axis title by adding margin space to top
	# plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=14, face="plain", hjust = 0.5, margin=margin(0,0,13,0)) #hjust shifts right
	)

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

size_hline <- 0.1


# balloon_scale <- 5 # inflation factor for significant points	
# size_point <- 0.1*(1 + balloon_scale*(logP$log10p_g_avg/max(logP$log10p_g_avg))) # scale significant points



# P < 0.05 horizontal line
p_line <- -log10(0.05)



#------------- sabbatini_crispr_KBM7_fish_ovlap (1) ------------

# crisp.txt is from Sabbatini supp data file aac7041_SM_Table_S3.xlsx

crisp <- read.table("crisp.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)
g_unique <- read.delim("growth_loci_unique.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE,check.names=FALSE)
crisp$RH <- 0
crisp[crisp$Gene %in% g_unique$geneSymbol,]$RH <- 1

crisp_KBM7.CS_RH_overlap <- data.frame(
minus_KBM7.CS_thres = numeric(), 	#col 1
crisp_rh=numeric(), 				#col 2
nocrisp_rh=numeric(), 			#col 3
crisp_norh=numeric(), 			#col 4
nocrisp_norh=numeric(), 			#col 5
obs=numeric(), 					#col 6
exp=numeric(), 					#col 7
chi_sq=numeric(),				#col 8
chi_df=numeric(),				#col 9
P_chi=numeric(),				#col 10
fish_OR=numeric(),				#col 11
fish_conf_1=numeric(),			#col 12
fish_conf_2=numeric(),			#col 13RH
P_fish=numeric()				#col 14
)


KBM7.CS_max <- -crisp[which.max(-crisp$KBM7.CS),]$KBM7.CS
KBM7.CS_min <- -crisp[which.min(-crisp$KBM7.CS),]$KBM7.CS

vec <- seq(KBM7.CS_min, KBM7.CS_max,(KBM7.CS_max-KBM7.CS_min)/100)


for(i in 1:101) {

crisp_KBM7.CS_RH_overlap[i,1] <- vec[i]
crisp_KBM7.CS_RH_overlap[i,2] <- dim(crisp[-crisp$KBM7.CS > vec[i] & !is.na(crisp$KBM7.CS) & crisp$RH == 1,])[1]
crisp_KBM7.CS_RH_overlap[i,3] <- dim(crisp[-crisp$KBM7.CS < vec[i] & !is.na(crisp$KBM7.CS) & crisp$RH == 1,])[1]
crisp_KBM7.CS_RH_overlap[i,4] <- dim(crisp[-crisp$KBM7.CS > vec[i] & !is.na(crisp$KBM7.CS) & crisp$RH == 0,])[1]
crisp_KBM7.CS_RH_overlap[i,5] <- dim(crisp[-crisp$KBM7.CS < vec[i] & !is.na(crisp$KBM7.CS) & crisp$RH == 0,])[1]

chi <- chisq.test(matrix(c(crisp_KBM7.CS_RH_overlap[i,2],crisp_KBM7.CS_RH_overlap[i,3],crisp_KBM7.CS_RH_overlap[i,4], crisp_KBM7.CS_RH_overlap[i,5]),2,2,byrow=TRUE))
fish <- fisher.test(matrix(c(crisp_KBM7.CS_RH_overlap[i,2],crisp_KBM7.CS_RH_overlap[i,3],crisp_KBM7.CS_RH_overlap[i,4], crisp_KBM7.CS_RH_overlap[i,5]),2,2,byrow=TRUE))

crisp_KBM7.CS_RH_overlap[i,6] <- chi$observed[1,1]
crisp_KBM7.CS_RH_overlap[i,7] <- chi$expected[1,1]
crisp_KBM7.CS_RH_overlap[i,8] <- chi$statistic
crisp_KBM7.CS_RH_overlap[i,9] <- chi$parameter[[1]]
crisp_KBM7.CS_RH_overlap[i,10] <- chi$p.value
crisp_KBM7.CS_RH_overlap[i,11] <- fish$estimate[[1]]
crisp_KBM7.CS_RH_overlap[i,12] <- fish$conf.int[[1]]
crisp_KBM7.CS_RH_overlap[i,13] <- fish$conf.int[[2]]
crisp_KBM7.CS_RH_overlap[i,14] <- fish$p.value

}

crisp_KBM7.CS_RH_overlap$q_fish <- p.adjust(crisp_KBM7.CS_RH_overlap$P_fish, method = "BH")
h_line_1 <- max(crisp_KBM7.CS_RH_overlap[crisp_KBM7.CS_RH_overlap$q_fish < 0.05,]$P_fish)





p1 <- ggplot() + 
		theme2 + 
		theme(legend.key=element_blank()) +
		geom_line(
			data=crisp_KBM7.CS_RH_overlap, 
			lwd=0.2,
			colour="black",
			show.legend=FALSE,
			aes(
				x=minus_KBM7.CS_thres, 
				y= -log10(crisp_KBM7.CS_RH_overlap$P_fish)
				)
			) +
		geom_hline(
			color = "red", 
			size=size_hline,
			aes(
				yintercept = -log10(h_line_1), 
				linetype = "a"
				)
			) +
		geom_hline(
			color = "blue",
			size=size_hline,
			aes(
				yintercept = p_line, 
				linetype = "b"
				)
			) +
		scale_linetype_manual(
			name = NULL, 
			labels = c(expression(FDR==0.05),expression(italic(P)==0.05)),
			values = c("solid","dashed"), 
			guide = guide_legend(
						override.aes = list(
											color = c("red","blue"),
											size=c(size_hline, size_hline)
											)
							)
			) +
		theme(
		 	legend.position = c(0.8,0.9), 
			legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.key.height = unit(0.3, "cm"),
			legend.key.width = unit(0.4, "cm"),
		 	legend.spacing.y = unit(0.3, 'cm'),
		 	legend.spacing.x = unit(0.2, 'cm'),
		 	legend.title = element_text(size = 11),  
		 	legend.text = element_text(size = 10),
	 		legend.title.align=0.7,
	 		legend.text.align=0
 			) +
		labs(subtitle="KBM7") +
		xlab("-CS") + 
		# scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) + 
		ylab(expression('-log'[10]*italic('P'))) 
print(p1)



#------------- sabbatini_crispr_Raji_fish_ovlap (2) ------------

# crisp.txt is from Sabbatini supp data file aac7041_SM_Table_S3.xlsx

crisp <- read.table("crisp.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)
g_unique <- read.delim("growth_loci_unique.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE,check.names=FALSE)
crisp$RH <- 0
crisp[crisp$Gene %in% g_unique$geneSymbol,]$RH <- 1

crisp_Raji.CS_RH_overlap <- data.frame(
minus_Raji.CS_thres = numeric(), 	#col 1
crisp_rh=numeric(), 				#col 2
nocrisp_rh=numeric(), 			#col 3
crisp_norh=numeric(), 			#col 4
nocrisp_norh=numeric(), 			#col 5
obs=numeric(), 					#col 6
exp=numeric(), 					#col 7
chi_sq=numeric(),				#col 8
chi_df=numeric(),				#col 9
P_chi=numeric(),				#col 10
fish_OR=numeric(),				#col 11
fish_conf_1=numeric(),			#col 12
fish_conf_2=numeric(),			#col 13RH
P_fish=numeric()				#col 14
)


Raji.CS_max <- -crisp[which.max(-crisp$Raji.CS),]$Raji.CS
Raji.CS_min <- -crisp[which.min(-crisp$Raji.CS),]$Raji.CS

vec <- seq(Raji.CS_min, Raji.CS_max,(Raji.CS_max-Raji.CS_min)/100)


for(i in 1:101) {

crisp_Raji.CS_RH_overlap[i,1] <- vec[i]
crisp_Raji.CS_RH_overlap[i,2] <- dim(crisp[-crisp$Raji.CS > vec[i] & !is.na(crisp$Raji.CS) & crisp$RH == 1,])[1]
crisp_Raji.CS_RH_overlap[i,3] <- dim(crisp[-crisp$Raji.CS < vec[i] & !is.na(crisp$Raji.CS) & crisp$RH == 1,])[1]
crisp_Raji.CS_RH_overlap[i,4] <- dim(crisp[-crisp$Raji.CS > vec[i] & !is.na(crisp$Raji.CS) & crisp$RH == 0,])[1]
crisp_Raji.CS_RH_overlap[i,5] <- dim(crisp[-crisp$Raji.CS < vec[i] & !is.na(crisp$Raji.CS) & crisp$RH == 0,])[1]

chi <- chisq.test(matrix(c(crisp_Raji.CS_RH_overlap[i,2],crisp_Raji.CS_RH_overlap[i,3],crisp_Raji.CS_RH_overlap[i,4], crisp_Raji.CS_RH_overlap[i,5]),2,2,byrow=TRUE))
fish <- fisher.test(matrix(c(crisp_Raji.CS_RH_overlap[i,2],crisp_Raji.CS_RH_overlap[i,3],crisp_Raji.CS_RH_overlap[i,4], crisp_Raji.CS_RH_overlap[i,5]),2,2,byrow=TRUE))

crisp_Raji.CS_RH_overlap[i,6] <- chi$observed[1,1]
crisp_Raji.CS_RH_overlap[i,7] <- chi$expected[1,1]
crisp_Raji.CS_RH_overlap[i,8] <- chi$statistic
crisp_Raji.CS_RH_overlap[i,9] <- chi$parameter[[1]]
crisp_Raji.CS_RH_overlap[i,10] <- chi$p.value
crisp_Raji.CS_RH_overlap[i,11] <- fish$estimate[[1]]
crisp_Raji.CS_RH_overlap[i,12] <- fish$conf.int[[1]]
crisp_Raji.CS_RH_overlap[i,13] <- fish$conf.int[[2]]
crisp_Raji.CS_RH_overlap[i,14] <- fish$p.value

}

crisp_Raji.CS_RH_overlap$q_fish <- p.adjust(crisp_Raji.CS_RH_overlap$P_fish, method = "BH")
h_line_2 <- max(crisp_Raji.CS_RH_overlap[crisp_Raji.CS_RH_overlap$q_fish < 0.05,]$P_fish)




p2 <- ggplot() + 
		theme2 + 
		theme(legend.key=element_blank()) +
		geom_line(
			data=crisp_Raji.CS_RH_overlap, 
			lwd=0.2,
			colour="black",
			show.legend=FALSE,
			aes(
				x=minus_Raji.CS_thres, 
				y= -log10(P_fish)
				)
			) +
		geom_hline(
			color = "red", 
			size=size_hline,
			aes(
				yintercept = -log10(h_line_2), 
				linetype = "a"
				)
			) +
		geom_hline(
			color = "blue",
			size=size_hline,
			aes(
				yintercept = p_line, 
				linetype = "b"
				)
			) +
		scale_linetype_manual(
			name = NULL, 
			labels = c(expression(FDR==0.05),expression(italic(P)==0.05)),
			values = c("solid","dashed"),  
			guide = guide_legend(
						override.aes = list(
											color = c("red","blue"),
											size=c(size_hline, size_hline)
											)
							)
			) +
		theme(
		 	legend.position = c(0.8,0.9), 
			legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.key.height = unit(0.3, "cm"),
			legend.key.width = unit(0.4, "cm"),
		 	legend.spacing.y = unit(0.3, 'cm'),
		 	legend.spacing.x = unit(0.2, 'cm'),
		 	legend.title = element_text(size = 11),  
		 	legend.text = element_text(size = 10),
	 		legend.title.align=0.7,
	 		legend.text.align=0
 			) +
		labs(subtitle="Raji") +
		xlab("-CS") + 
		# scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) + 
		ylab(expression('-log'[10]*italic('P'))) 
print(p2)



#------------- sabbatini_crispr_Jiyoye_fish_ovlap (3) ------------

# crisp.txt is from Sabbatini supp data file aac7041_SM_Table_S3.xlsx

crisp <- read.table("crisp.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)
g_unique <- read.delim("growth_loci_unique.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE,check.names=FALSE)
crisp$RH <- 0
crisp[crisp$Gene %in% g_unique$geneSymbol,]$RH <- 1

crisp_Jiyoye.CS_RH_overlap <- data.frame(
minus_Jiyoye.CS_thres = numeric(), 	#col 1
crisp_rh=numeric(), 				#col 2
nocrisp_rh=numeric(), 			#col 3
crisp_norh=numeric(), 			#col 4
nocrisp_norh=numeric(), 			#col 5
obs=numeric(), 					#col 6
exp=numeric(), 					#col 7
chi_sq=numeric(),				#col 8
chi_df=numeric(),				#col 9
P_chi=numeric(),				#col 10
fish_OR=numeric(),				#col 11
fish_conf_1=numeric(),			#col 12
fish_conf_2=numeric(),			#col 13RH
P_fish=numeric()				#col 14
)


Jiyoye.CS_max <- -crisp[which.max(-crisp$Jiyoye.CS),]$Jiyoye.CS
Jiyoye.CS_min <- -crisp[which.min(-crisp$Jiyoye.CS),]$Jiyoye.CS

vec <- seq(Jiyoye.CS_min, Jiyoye.CS_max,(Jiyoye.CS_max-Jiyoye.CS_min)/100)


for(i in 1:101) {

crisp_Jiyoye.CS_RH_overlap[i,1] <- vec[i]
crisp_Jiyoye.CS_RH_overlap[i,2] <- dim(crisp[-crisp$Jiyoye.CS > vec[i] & !is.na(crisp$Jiyoye.CS) & crisp$RH == 1,])[1]
crisp_Jiyoye.CS_RH_overlap[i,3] <- dim(crisp[-crisp$Jiyoye.CS < vec[i] & !is.na(crisp$Jiyoye.CS) & crisp$RH == 1,])[1]
crisp_Jiyoye.CS_RH_overlap[i,4] <- dim(crisp[-crisp$Jiyoye.CS > vec[i] & !is.na(crisp$Jiyoye.CS) & crisp$RH == 0,])[1]
crisp_Jiyoye.CS_RH_overlap[i,5] <- dim(crisp[-crisp$Jiyoye.CS < vec[i] & !is.na(crisp$Jiyoye.CS) & crisp$RH == 0,])[1]

chi <- chisq.test(matrix(c(crisp_Jiyoye.CS_RH_overlap[i,2],crisp_Jiyoye.CS_RH_overlap[i,3],crisp_Jiyoye.CS_RH_overlap[i,4], crisp_Jiyoye.CS_RH_overlap[i,5]),2,2,byrow=TRUE))
fish <- fisher.test(matrix(c(crisp_Jiyoye.CS_RH_overlap[i,2],crisp_Jiyoye.CS_RH_overlap[i,3],crisp_Jiyoye.CS_RH_overlap[i,4], crisp_Jiyoye.CS_RH_overlap[i,5]),2,2,byrow=TRUE))

crisp_Jiyoye.CS_RH_overlap[i,6] <- chi$observed[1,1]
crisp_Jiyoye.CS_RH_overlap[i,7] <- chi$expected[1,1]
crisp_Jiyoye.CS_RH_overlap[i,8] <- chi$statistic
crisp_Jiyoye.CS_RH_overlap[i,9] <- chi$parameter[[1]]
crisp_Jiyoye.CS_RH_overlap[i,10] <- chi$p.value
crisp_Jiyoye.CS_RH_overlap[i,11] <- fish$estimate[[1]]
crisp_Jiyoye.CS_RH_overlap[i,12] <- fish$conf.int[[1]]
crisp_Jiyoye.CS_RH_overlap[i,13] <- fish$conf.int[[2]]
crisp_Jiyoye.CS_RH_overlap[i,14] <- fish$p.value

}

crisp_Jiyoye.CS_RH_overlap$q_fish <- p.adjust(crisp_Jiyoye.CS_RH_overlap$P_fish, method = "BH")
h_line_3 <- max(crisp_Jiyoye.CS_RH_overlap[crisp_Jiyoye.CS_RH_overlap$q_fish < 0.05,]$P_fish)





p3 <- ggplot() + 
		theme2 + 
		theme(legend.key=element_blank()) +
		geom_line(
			data=crisp_Jiyoye.CS_RH_overlap, 
			lwd=0.2,
			colour="black",
			show.legend=FALSE,
			aes(
				x=minus_Jiyoye.CS_thres, 
				y= -log10(crisp_Jiyoye.CS_RH_overlap$P_fish)
				)
			) +
		geom_hline(
			color = "red", 
			size=size_hline,
			aes(
				yintercept = -log10(h_line_3), 
				linetype = "a"
				)
			) +
		geom_hline(
			color = "blue",
			size=size_hline,
			aes(
				yintercept = p_line, 
				linetype = "b"
				)
			) +
		scale_linetype_manual(
			name = NULL, 
			labels = c(expression(FDR==0.05),expression(italic(P)==0.05)),
			values = c("solid","dashed"), 
			guide = guide_legend(
						override.aes = list(
											color = c("red","blue"),
											size=c(size_hline, size_hline)
											)
							)
			) +
		theme(
		 	legend.position = c(0.8,0.9), 
			legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.key.height = unit(0.3, "cm"),
			legend.key.width = unit(0.4, "cm"),
		 	legend.spacing.y = unit(0.3, 'cm'),
		 	legend.spacing.x = unit(0.2, 'cm'),
		 	legend.title = element_text(size = 11),  
		 	legend.text = element_text(size = 10),
	 		legend.title.align=0.7,
	 		legend.text.align=0
 			) +
		labs(subtitle="Jiyoye") +
		xlab("-CS") + 
		# scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) + 
		ylab(expression('-log'[10]*italic('P'))) 
print(p3)



#------------- sabbatini_crispr_K562_fish_ovlap (4) ------------

# crisp.txt is from Sabbatini supp data file aac7041_SM_Table_S3.xlsx

crisp <- read.table("crisp.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)
g_unique <- read.delim("growth_loci_unique.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE,check.names=FALSE)
crisp$RH <- 0
crisp[crisp$Gene %in% g_unique$geneSymbol,]$RH <- 1

crisp_K562.CS_RH_overlap <- data.frame(
minus_K562.CS_thres = numeric(), 	#col 1
crisp_rh=numeric(), 				#col 2
nocrisp_rh=numeric(), 			#col 3
crisp_norh=numeric(), 			#col 4
nocrisp_norh=numeric(), 			#col 5
obs=numeric(), 					#col 6
exp=numeric(), 					#col 7
chi_sq=numeric(),				#col 8
chi_df=numeric(),				#col 9
P_chi=numeric(),				#col 10
fish_OR=numeric(),				#col 11
fish_conf_1=numeric(),			#col 12
fish_conf_2=numeric(),			#col 13RH
P_fish=numeric()				#col 14
)


K562.CS_max <- -crisp[which.max(-crisp$K562.CS),]$K562.CS
K562.CS_min <- -crisp[which.min(-crisp$K562.CS),]$K562.CS

vec <- seq(K562.CS_min, K562.CS_max,(K562.CS_max-K562.CS_min)/100)


for(i in 1:101) {

crisp_K562.CS_RH_overlap[i,1] <- vec[i]
crisp_K562.CS_RH_overlap[i,2] <- dim(crisp[-crisp$K562.CS > vec[i] & !is.na(crisp$K562.CS) & crisp$RH == 1,])[1]
crisp_K562.CS_RH_overlap[i,3] <- dim(crisp[-crisp$K562.CS < vec[i] & !is.na(crisp$K562.CS) & crisp$RH == 1,])[1]
crisp_K562.CS_RH_overlap[i,4] <- dim(crisp[-crisp$K562.CS > vec[i] & !is.na(crisp$K562.CS) & crisp$RH == 0,])[1]
crisp_K562.CS_RH_overlap[i,5] <- dim(crisp[-crisp$K562.CS < vec[i] & !is.na(crisp$K562.CS) & crisp$RH == 0,])[1]

chi <- chisq.test(matrix(c(crisp_K562.CS_RH_overlap[i,2],crisp_K562.CS_RH_overlap[i,3],crisp_K562.CS_RH_overlap[i,4], crisp_K562.CS_RH_overlap[i,5]),2,2,byrow=TRUE))
fish <- fisher.test(matrix(c(crisp_K562.CS_RH_overlap[i,2],crisp_K562.CS_RH_overlap[i,3],crisp_K562.CS_RH_overlap[i,4], crisp_K562.CS_RH_overlap[i,5]),2,2,byrow=TRUE))

crisp_K562.CS_RH_overlap[i,6] <- chi$observed[1,1]
crisp_K562.CS_RH_overlap[i,7] <- chi$expected[1,1]
crisp_K562.CS_RH_overlap[i,8] <- chi$statistic
crisp_K562.CS_RH_overlap[i,9] <- chi$parameter[[1]]
crisp_K562.CS_RH_overlap[i,10] <- chi$p.value
crisp_K562.CS_RH_overlap[i,11] <- fish$estimate[[1]]
crisp_K562.CS_RH_overlap[i,12] <- fish$conf.int[[1]]
crisp_K562.CS_RH_overlap[i,13] <- fish$conf.int[[2]]
crisp_K562.CS_RH_overlap[i,14] <- fish$p.value

}

crisp_K562.CS_RH_overlap$q_fish <- p.adjust(crisp_K562.CS_RH_overlap$P_fish, method = "BH")
h_line_4 <- max(crisp_K562.CS_RH_overlap[crisp_K562.CS_RH_overlap$q_fish < 0.05,]$P_fish)





p4 <- ggplot() + 
		theme2 + 
		theme(legend.key=element_blank()) +
		geom_line(
			data=crisp_K562.CS_RH_overlap, 
			lwd=0.2,
			colour="black",
			show.legend=FALSE,
			aes(
				x=minus_K562.CS_thres, 
				y= -log10(crisp_K562.CS_RH_overlap$P_fish)
				)
			) +
		geom_hline(
			color = "red", 
			size=size_hline,
			aes(
				yintercept = -log10(h_line_4), 
				linetype = "a"
				)
			) +
		geom_hline(
			color = "blue",
			size=size_hline,
			aes(
				yintercept = p_line, 
				linetype = "b"
				)
			) +
		scale_linetype_manual(
			name = NULL, 
			labels = c(expression(FDR==0.05),expression(italic(P)==0.05)),
			values = c("solid","dashed"),
			guide = guide_legend(
						override.aes = list(
											color = c("red","blue"),
											size=c(size_hline, size_hline)
											)
							)
			) +
		theme(
		 	legend.position = c(0.8,0.9), 
			legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.key.height = unit(0.3, "cm"),
			legend.key.width = unit(0.4, "cm"),
		 	legend.spacing.y = unit(0.3, 'cm'),
		 	legend.spacing.x = unit(0.2, 'cm'),
		 	legend.title = element_text(size = 11),  
		 	legend.text = element_text(size = 10),
	 		legend.title.align=0.7,
	 		legend.text.align=0
 			) +
		labs(subtitle="K562") +
		xlab("-CS") + 
		# scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) + 
		ylab(expression('-log'[10]*italic('P'))) 
print(p4)


# #------------- sabbatini_gene_trap_KBM7_fish_ovlap (5) ------------

# # gene_trap.txt is from Sabbatini supp data file aac7041_SM_Table_S4.xlsx

# gt <- read.table("gene_trap.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)
# gt <- gt[,c("Symbol","GTS")]

# # convert GTS for easier interpretation
# gt$KBM7.GTS <- 1-gt$GTS

# g_unique <- read.delim("growth_loci_unique.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE,check.names=FALSE)
# gt$RH <- numeric(nrow(gt))
# gt[gt$Symbol %in% g_unique$geneSymbol,"RH"] <- 1



# gt_KBM7.GTS_RH_overlap <- data.frame(
# minus_KBM7.GTS_thres = numeric(), 	#col 1
# gt_rh=numeric(), 				#col 2
# nogt_rh=numeric(), 			#col 3
# gt_norh=numeric(), 			#col 4
# nogt_norh=numeric(), 			#col 5
# obs=numeric(), 					#col 6
# exp=numeric(), 					#col 7
# chi_sq=numeric(),				#col 8
# chi_df=numeric(),				#col 9
# P_chi=numeric(),				#col 10
# fish_OR=numeric(),				#col 11
# fish_conf_1=numeric(),			#col 12
# fish_conf_2=numeric(),			#col 13RH
# P_fish=numeric()				#col 14
# )


# KBM7.GTS_max <- gt[which.max(gt$KBM7.GTS),]$KBM7.GTS
# KBM7.GTS_min <- gt[which.min(gt$KBM7.GTS),]$KBM7.GTS

# vec <- seq(KBM7.GTS_min, KBM7.GTS_max,(KBM7.GTS_max-KBM7.GTS_min)/100)


# for(i in 1:101) {

# gt_KBM7.GTS_RH_overlap[i,1] <- vec[i]
# gt_KBM7.GTS_RH_overlap[i,2] <- dim(gt[gt$KBM7.GTS > vec[i] & !is.na(gt$KBM7.GTS) & gt$RH == 1,])[1]
# gt_KBM7.GTS_RH_overlap[i,3] <- dim(gt[gt$KBM7.GTS < vec[i] & !is.na(gt$KBM7.GTS) & gt$RH == 1,])[1]
# gt_KBM7.GTS_RH_overlap[i,4] <- dim(gt[gt$KBM7.GTS > vec[i] & !is.na(gt$KBM7.GTS) & gt$RH == 0,])[1]
# gt_KBM7.GTS_RH_overlap[i,5] <- dim(gt[gt$KBM7.GTS < vec[i] & !is.na(gt$KBM7.GTS) & gt$RH == 0,])[1]

# chi <- chisq.test(matrix(c(gt_KBM7.GTS_RH_overlap[i,2],gt_KBM7.GTS_RH_overlap[i,3],gt_KBM7.GTS_RH_overlap[i,4], gt_KBM7.GTS_RH_overlap[i,5]),2,2,byrow=TRUE))
# fish <- fisher.test(matrix(c(gt_KBM7.GTS_RH_overlap[i,2],gt_KBM7.GTS_RH_overlap[i,3],gt_KBM7.GTS_RH_overlap[i,4], gt_KBM7.GTS_RH_overlap[i,5]),2,2,byrow=TRUE))

# gt_KBM7.GTS_RH_overlap[i,6] <- chi$observed[1,1]
# gt_KBM7.GTS_RH_overlap[i,7] <- chi$expected[1,1]
# gt_KBM7.GTS_RH_overlap[i,8] <- chi$statistic
# gt_KBM7.GTS_RH_overlap[i,9] <- chi$parameter[[1]]
# gt_KBM7.GTS_RH_overlap[i,10] <- chi$p.value
# gt_KBM7.GTS_RH_overlap[i,11] <- fish$estimate[[1]]
# gt_KBM7.GTS_RH_overlap[i,12] <- fish$conf.int[[1]]
# gt_KBM7.GTS_RH_overlap[i,13] <- fish$conf.int[[2]]
# gt_KBM7.GTS_RH_overlap[i,14] <- fish$p.value

# }

# gt_KBM7.GTS_RH_overlap$q_fish <- p.adjust(gt_KBM7.GTS_RH_overlap$P_fish, method = "BH")
# h_line_5 <- max(gt_KBM7.GTS_RH_overlap[gt_KBM7.GTS_RH_overlap$q_fish < 0.05,]$P_fish)





# p5 <- ggplot() + 
		# theme2 + 
		# theme(legend.key=element_blank()) +
		# geom_line(
			# data=gt_KBM7.GTS_RH_overlap, 
			# lwd=0.2,
			# colour="black",
			# show.legend=FALSE,
			# aes(
				# x=minus_KBM7.GTS_thres, 
				# y= -log10(gt_KBM7.GTS_RH_overlap$P_fish)
				# )
			# ) +
		# geom_hline(
			# color = "red", 
			# size=size_hline,
			# aes(
				# yintercept = -log10(h_line_5), 
				# linetype = "FDR = 0.05"
				# )
			# ) +
		# scale_linetype_manual(
			# name = NULL, 
			# values = c(1), 
			# guide = guide_legend(
						# override.aes = list(
											# color = c("red"),
											# size=size_hline
											# )
							# )
			# ) +
		# theme(
		 	# plot.margin = unit(c(1.2,1.5,1.2,0.9), "cm"),
	 		# legend.position = c(0.8,0.9), 
			# legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	# legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	# legend.key.height = unit(0.6, "cm"),
			# legend.key.width = unit(0.3, "cm"),
		 	# legend.spacing.y = unit(0.6, 'cm'),
		 	# legend.spacing.x = unit(0.3, 'cm'),
		 	# legend.title = element_text(size = 11),  
		 	# legend.text = element_text(size = 10),
	 		# legend.title.align=0.7
 			# ) +
		# labs(subtitle="KBM7 gene trap") +
		# xlab("1-GTS") + 
		# # scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) + 
		# ylab(expression('-log'[10]*italic('P'))) 
# print(p5)



# --------------- CRISPRi overlap (6) -------------------

# crispra.txt is from Weissman supp data file 1-s2.0-S0092867414011787-mmc3.xlsx

crisprai <- read.table("crisprai.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,skip=1,check.names=FALSE)
crisprai <- crisprai[,c(1,2,4)]
colnames(crisprai) <- c("Gene","i_gamma","a_gamma")


crisprai[3257,]
     # Gene a_gamma    i_gamma
# 3257 CTRL     err 0.00700599

# get rid of CTRL:
crisprai <- crisprai[crisprai$Gene != "CTRL",]


crisprai$i_gamma <- as.numeric(crisprai$i_gamma)


g_unique <- read.delim("growth_loci_unique.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE,check.names=FALSE)


crisprai$RH <- numeric(nrow(crisprai))
crisprai[crisprai$Gene %in% g_unique$geneSymbol,"RH"] <- 1

crispri_RH_overlap <- data.frame(
minus_crispri_thres = numeric(), 	#col 1
crispri_rh=numeric(), 				#col 2
nocrispri_rh=numeric(), 			#col 3
crispri_norh=numeric(), 			#col 4
nocrispri_norh=numeric(), 			#col 5
obs=numeric(), 					#col 6
exp=numeric(), 					#col 7
chi_sq=numeric(),				#col 8
chi_df=numeric(),				#col 9
P_chi=numeric(),				#col 10
fish_OR=numeric(),				#col 11
fish_conf_1=numeric(),			#col 12
fish_conf_2=numeric(),			#col 13RH
P_fish=numeric()				#col 14
)


crispri_max <- -crisprai[which.max(-crisprai$i_gamma),]$i_gamma
crispri_min <- -crisprai[which.min(-crisprai$i_gamma),]$i_gamma

vec <- seq(crispri_min, crispri_max,(crispri_max-crispri_min)/100)



for(i in 1:101) {

crispri_RH_overlap[i,1] <- vec[i]
crispri_RH_overlap[i,2] <- dim(crisprai[-crisprai$i_gamma > vec[i] & !is.na(crisprai$i_gamma) & crisprai$RH == 1,])[1]
crispri_RH_overlap[i,3] <- dim(crisprai[-crisprai$i_gamma < vec[i] & !is.na(crisprai$i_gamma) & crisprai$RH == 1,])[1]
crispri_RH_overlap[i,4] <- dim(crisprai[-crisprai$i_gamma > vec[i] & !is.na(crisprai$i_gamma) & crisprai$RH == 0,])[1]
crispri_RH_overlap[i,5] <- dim(crisprai[-crisprai$i_gamma < vec[i] & !is.na(crisprai$i_gamma) & crisprai$RH == 0,])[1]

chi <- chisq.test(matrix(c(crispri_RH_overlap[i,2],crispri_RH_overlap[i,3],crispri_RH_overlap[i,4], crispri_RH_overlap[i,5]),2,2,byrow=TRUE))
fish <- fisher.test(matrix(c(crispri_RH_overlap[i,2],crispri_RH_overlap[i,3],crispri_RH_overlap[i,4], crispri_RH_overlap[i,5]),2,2,byrow=TRUE))

crispri_RH_overlap[i,6] <- chi$observed[1,1]
crispri_RH_overlap[i,7] <- chi$expected[1,1]
crispri_RH_overlap[i,8] <- chi$statistic
crispri_RH_overlap[i,9] <- chi$parameter[[1]]
crispri_RH_overlap[i,10] <- chi$p.value
crispri_RH_overlap[i,11] <- fish$estimate[[1]]
crispri_RH_overlap[i,12] <- fish$conf.int[[1]]
crispri_RH_overlap[i,13] <- fish$conf.int[[2]]
crispri_RH_overlap[i,14] <- fish$p.value

}


crispri_RH_overlap$q_fish <- p.adjust(crispri_RH_overlap$P_fish, method = "BH")
h_line_6 <- max(crispri_RH_overlap[crispri_RH_overlap$q_fish < 0.05,]$P_fish)





p6 <- ggplot() + 
		theme2 + 
		theme(legend.key=element_blank()) +
		geom_line(
			data= crispri_RH_overlap, 
			lwd=0.2,
			colour="black",
			show.legend=FALSE,
			aes(
				x= minus_crispri_thres, 
				y= -log10(crispri_RH_overlap$P_fish)
				)
			) +
		geom_hline(
			color = "red", 
			size=size_hline,
			aes(
				yintercept = -log10(h_line_6), 
				linetype = "a"
				)
			) +
		geom_hline(
			color = "blue",
			size=size_hline,
			aes(
				yintercept = p_line, 
				linetype = "b"
				)
			) +
		scale_linetype_manual(
			name = NULL, 
			labels = c(expression(FDR==0.05),expression(italic(P)==0.05)),
			values = c("solid","dashed"),  
			guide = guide_legend(
						override.aes = list(
											color = c("red","blue"),
											size=c(size_hline, size_hline)
											)
							)
			) +
		theme(
		 	legend.position = c(0.8,0.9), 
			legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.key.height = unit(0.3, "cm"),
			legend.key.width = unit(0.4, "cm"),
		 	legend.spacing.y = unit(0.3, 'cm'),
		 	legend.spacing.x = unit(0.2, 'cm'),
		 	legend.title = element_text(size = 11),  
		 	legend.text = element_text(size = 10),
	 		legend.title.align=0.7,
	 		legend.text.align=0
 			) +
		labs(subtitle="K562 (CRISPRi)") +
		xlab(expression(-gamma)) + 
		# scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) + 
		ylab(expression('-log'[10]*italic('P'))) 
print(p6)


# --------------- CRISPRa overlap (7) -------------------

# crispra.txt is from Weissman supp data file 1-s2.0-S0092867414011787-mmc3.xlsx

crisprai <- read.table("crisprai.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,skip=1,check.names=FALSE)
crisprai <- crisprai[,c(1,2,4)]
colnames(crisprai) <- c("Gene","i_gamma","a_gamma")


crisprai[3257,]
     # Gene a_gamma    i_gamma
# 3257 CTRL     err 0.00700599

# get rid of CTRL:
crisprai <- crisprai[crisprai$Gene != "CTRL",]


crisprai$i_gamma <- as.numeric(crisprai$i_gamma)


g_unique <- read.delim("growth_loci_unique.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE,check.names=FALSE)


crisprai$RH <- numeric(nrow(crisprai))
crisprai[crisprai$Gene %in% g_unique$geneSymbol,"RH"] <- 1

crispra_RH_overlap <- data.frame(
minus_crispra_thres = numeric(), 	#col 1
crispra_rh=numeric(), 				#col 2
nocrispra_rh=numeric(), 			#col 3
crispra_norh=numeric(), 			#col 4
nocrispra_norh=numeric(), 			#col 5
obs=numeric(), 					#col 6
exp=numeric(), 					#col 7
chi_sq=numeric(),				#col 8
chi_df=numeric(),				#col 9
P_chi=numeric(),				#col 10
fish_OR=numeric(),				#col 11
fish_conf_1=numeric(),			#col 12
fish_conf_2=numeric(),			#col 13RH
P_fish=numeric()				#col 14
)


crispra_max <- -crisprai[which.max(-crisprai$a_gamma),]$a_gamma
crispra_min <- -crisprai[which.min(-crisprai$a_gamma),]$a_gamma

vec <- seq(crispra_min, crispra_max,(crispra_max-crispra_min)/100)



for(i in 1:101) {

crispra_RH_overlap[i,1] <- vec[i]
crispra_RH_overlap[i,2] <- dim(crisprai[-crisprai$a_gamma > vec[i] & !is.na(crisprai$a_gamma) & crisprai$RH == 1,])[1]
crispra_RH_overlap[i,3] <- dim(crisprai[-crisprai$a_gamma < vec[i] & !is.na(crisprai$a_gamma) & crisprai$RH == 1,])[1]
crispra_RH_overlap[i,4] <- dim(crisprai[-crisprai$a_gamma > vec[i] & !is.na(crisprai$a_gamma) & crisprai$RH == 0,])[1]
crispra_RH_overlap[i,5] <- dim(crisprai[-crisprai$a_gamma < vec[i] & !is.na(crisprai$a_gamma) & crisprai$RH == 0,])[1]

chi <- chisq.test(matrix(c(crispra_RH_overlap[i,2],crispra_RH_overlap[i,3],crispra_RH_overlap[i,4], crispra_RH_overlap[i,5]),2,2,byrow=TRUE))
fish <- fisher.test(matrix(c(crispra_RH_overlap[i,2],crispra_RH_overlap[i,3],crispra_RH_overlap[i,4], crispra_RH_overlap[i,5]),2,2,byrow=TRUE))

crispra_RH_overlap[i,6] <- chi$observed[1,1]
crispra_RH_overlap[i,7] <- chi$expected[1,1]
crispra_RH_overlap[i,8] <- chi$statistic
crispra_RH_overlap[i,9] <- chi$parameter[[1]]
crispra_RH_overlap[i,10] <- chi$p.value
crispra_RH_overlap[i,11] <- fish$estimate[[1]]
crispra_RH_overlap[i,12] <- fish$conf.int[[1]]
crispra_RH_overlap[i,13] <- fish$conf.int[[2]]
crispra_RH_overlap[i,14] <- fish$p.value

}


crispra_RH_overlap$q_fish <- p.adjust(crispra_RH_overlap$P_fish, method = "BH")
h_line_6 <- max(crispra_RH_overlap[crispra_RH_overlap$q_fish < 0.05,]$P_fish)





p7 <- ggplot() + 
		theme2 + 
		theme(legend.key=element_blank()) +
		geom_line(
			data= crispra_RH_overlap, 
			lwd=0.2,
			colour="black",
			show.legend=FALSE,
			aes(
				x= minus_crispra_thres, 
				y= -log10(crispra_RH_overlap$P_fish)
				)
			) +
		geom_hline(
			color = "red", 
			size=size_hline,
			aes(
				yintercept = -log10(h_line_6), 
				linetype = "a"
				)
			) +
		geom_hline(
			color = "blue",
			size=size_hline,
			aes(
				yintercept = p_line, 
				linetype = "b"
				)
			) +
		scale_linetype_manual(
			name = NULL, 
			labels = c(expression(FDR==0.05),expression(italic(P)==0.05)),
			values = c("solid","dashed"),   
			guide = guide_legend(
						override.aes = list(
											color = c("red","blue"),
											size=c(size_hline, size_hline)
											)
							)
			) +
		theme(
		 	legend.position = c(0.8,0.8), 
			legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
		 	legend.key.height = unit(0.3, "cm"),
			legend.key.width = unit(0.4, "cm"),
		 	legend.spacing.y = unit(0.3, 'cm'),
		 	legend.spacing.x = unit(0.2, 'cm'),
		 	legend.title = element_text(size = 11),  
		 	legend.text = element_text(size = 10),
	 		legend.title.align=0.7,
	 		legend.text.align=0
 			) +
		labs(subtitle="K562 (CRISPRa)") +
		xlab(expression(-gamma)) + 
		# scale_x_continuous(breaks = c(0,1,2,3,4,6), labels = c(0,1,2,3,4,6)) + 
		ylab(expression('-log'[10]*italic('P'))) 
print(p7)





#------------------Make file --------------------------


(crisp_gt_ovlap_fish_2 <- plot_grid(p1, p2, p3, p4, p6, p7, labels=LETTERS[1:6], ncol = 2, nrow = 3, label_size = 16))

pdf("crisp_gt_ovlap_fish_2.pdf",width=7.5,height=7.5,useDingbats=FALSE)
crisp_gt_ovlap_fish_2
dev.off()






