# ---------- start at START HERE if xValidate_list is available --------


# ---------------- libraries -----------------------------------------------

library(mgcv)
library(multcomp)
library(emmeans)

#----------------- Prepare human gseq data ---------------------


RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Code below is to get rid of up and down ramps for copy number changes. 
# Should not be used here, where doing P val analyses.
# # Get rows at beginning of each chromosome:
# RH_human_start <- RH_human[RH_human$posS == 0 & RH_human$posE == 1e6,]

# # Get rid of ramp ups and ramp downs:
# RH_human <- RH_human[c(0,diff(RH_human$pos)) == 1e4,]

# # combine RH_human without ramps and RH_human_start:
# RH_human <- rbind(RH_human_start,RH_human)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)


# # Transform chr1 etc. to numbers
RH_human$Chromosome <- gsub('chr', '', RH_human$Chromosome)
RH_human[RH_human$Chromosome == "X","Chromosome"] <- 23
RH_human[RH_human$Chromosome == "Y","Chromosome"] <- 24
chrOrder<-c(1:24)
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_human)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# merge genome coordinates with RH_human
RH_human <- merge(RH_human,gen_coord[,c("Chromosome","coord")])
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

RH_human$coord <- RH_human$pos + RH_human$coord

# get rid of chrY, because no chrY seq in hamster genome
RH_human <- RH_human[RH_human$Chromosome != "chrY",]

# Get rid of unneeded coord column at end of RH_human
RH_human <- RH_human[,-ncol(RH_human)]



# ------------------ Read in and prepare ancillary tables -------------------------------

cell <- read.table("cell_label_info.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)
sum_reads <- colSums(RH_human[,5:ncol(RH_human)])
reads <- read.table("RH_pool_human_total_align.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) # uses mapped human reads, cf human_AIC_1.R
 


# --------- Down-sample RH_human to prevent autocorrelation and prepare for gam ------------------------------

RH_human_sub <- RH_human[seq(1,nrow(RH_human),1e2),] # down sample to non-overlapping 1 Mb windows to prevent autocorrelation



# ---------------- permute ------------------------------

perm <- combn(c(1:6), 3, FUN = NULL, simplify = TRUE)

perm
     # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
# [1,]    1    1    1    1    1    1    1    1    1     1     2     2     2     2     2     2     3     3     3     4
# [2,]    2    2    2    2    3    3    3    4    4     5     3     3     3     4     4     5     4     4     5     5
# [3,]    3    4    5    6    4    5    6    5    6     6     4     5     6     5     6     6     5     6     6     6



# ------------------- xValidate ----------------------------

xValidate <- list()

for(j in 1:ncol(perm)) {
	random <- rep(NA,6)
	random[perm[,j]] <- perm[,j]
	
	

ptm <- proc.time()

RH_human_sub_l <- reshape(RH_human_sub, 
  varying = c(colnames(RH_human_sub[c(5:ncol(RH_human_sub))])), 
  v.names = "copy",
  timevar = "RH_ID", 
  times = c(colnames(RH_human_sub[c(5:ncol(RH_human_sub))])), 
  new.row.names = 1:1e6,
  direction = "long")
  
  
RH_human_sub_l$week <- 0
RH_human_sub_l[grepl("_w0_",RH_human_sub_l$RH_ID),]$week <- 0
RH_human_sub_l[grepl("_w1_",RH_human_sub_l$RH_ID),]$week <- 1
RH_human_sub_l[grepl("_w2_",RH_human_sub_l$RH_ID),]$week <- 2
RH_human_sub_l[grepl("_w3_",RH_human_sub_l$RH_ID),]$week <- 3
RH_human_sub_l[grepl("_w4_",RH_human_sub_l$RH_ID),]$week <- 4
RH_human_sub_l[grepl("_w6_",RH_human_sub_l$RH_ID),]$week <- 6

RH_human_sub_l$conc <- 0
RH_human_sub_l[grepl("_d0",RH_human_sub_l$RH_ID),]$conc <- 0
RH_human_sub_l[grepl("_d8",RH_human_sub_l$RH_ID),]$conc <- 8
RH_human_sub_l[grepl("_d25",RH_human_sub_l$RH_ID),]$conc <- 25
RH_human_sub_l[grepl("_d75",RH_human_sub_l$RH_ID),]$conc <- 75

RH_human_sub_l$pool <- 0
RH_human_sub_l[grepl("RH1_",RH_human_sub_l$RH_ID),]$pool <- random[1]
RH_human_sub_l[grepl("RH2_",RH_human_sub_l$RH_ID),]$pool <- random[2]
RH_human_sub_l[grepl("RH3_",RH_human_sub_l$RH_ID),]$pool <- random[3]
RH_human_sub_l[grepl("RH4_",RH_human_sub_l$RH_ID),]$pool <- random[4]
RH_human_sub_l[grepl("RH5_",RH_human_sub_l$RH_ID),]$pool <- random[5]
RH_human_sub_l[grepl("RH6_",RH_human_sub_l$RH_ID),]$pool <- random[6]



RH_human_sub_l <- merge(RH_human_sub_l,cell)
RH_human_sub_l$sum_reads <- sum_reads[RH_human_sub_l$RH_ID]
RH_human_sub_l  <- merge(RH_human_sub_l,reads[,c(1:5,9)])
colnames(RH_human_sub_l)[13] <- "total_reads"


RH_human_sub_l$pool <- as.factor(RH_human_sub_l$pool)
RH_human_sub_l$cell <- as.factor(RH_human_sub_l$cell)



log10p_raw_sub <- data.frame(
							Chromosome = as.character(), 
							posS = integer(), 
							posE = integer(), 
							pos = numeric(), 
							log10p_g_0nM = numeric(),
							log10p_g_8nM = numeric(),
							log10p_g_25nM = numeric(),
							log10p_g_75nM = numeric(),
							log10p_g_avg = numeric(),
							log10p_d_w1 = numeric(),
							log10p_d_w2 = numeric(),
							log10p_d_w3 = numeric(),
							log10p_d_w4 = numeric(),
							log10p_d_w6 = numeric(),
							log10p_d_avg = numeric(),
							log10p_g_d_Ix = numeric(),
							coef_g_0nM = numeric(),
							coef_g_8nM = numeric(),
							coef_g_25nM = numeric(),
							coef_g_75nM = numeric(),
							coef_g_avg = numeric(),
							coef_d_w1 = numeric(),
							coef_d_w2 = numeric(),
							coef_d_w3 = numeric(),
							coef_d_w4 = numeric(),
							coef_d_w6 = numeric(),
							coef_d_avg = numeric(),
							coef_g_d_Ix = numeric(),
							stringsAsFactors=FALSE
							)

ngroup <- max(RH_human_sub_l$id)


# Loop takes ~ 2 hours on laptop
# In future read in xValidate_list (see below)
# use glht to replicate model used for genome-scan

for (i in 1:ngroup) { 
	cat("i = ",i,"/",ngroup, "\n")
	tryCatch ({
		
		m1 <- gam(copy ~ week * conc + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(total_reads)), data = subset(RH_human_sub_l, RH_human_sub_l$id == i), family = nb, method = "REML")

		 
		glht_growth <- glht(m1, linfct = c(
					"week == 0",
 					"week + 8*week:conc == 0",
					"week + 25*week:conc == 0",
 					"week + 75*week:conc == 0",
 					"week + (27)*week:conc == 0"))
 					
		glht_drug <- glht(m1, linfct = c(
					"conc + 1*week:conc == 0",
  					"conc + 2*week:conc == 0",
 					"conc + 3*week:conc == 0",
  					"conc + 4*week:conc == 0",
 					"conc + 6*week:conc == 0",
 					"conc + (3.2)*week:conc == 0"))
 					
 		glht_omni <- glht(m1)
 					
 		
 					
 	growth_stat <- summary(glht_growth,test = adjusted("none"))$test$tstat
 	drug_stat <- summary(glht_drug,test = adjusted("none"))$test$tstat
 	Ix_stat <- summary(glht_omni,test = adjusted("none"))$test$tstat["week:conc"]
 	growth_coef <- summary(glht_growth,test = adjusted("none"))$test$coefficients
 	drug_coef <- summary(glht_drug,test = adjusted("none"))$test$coefficients
 	Ix_coef <- summary(glht_omni,test = adjusted("none"))$test$coefficients["week:conc"]
 		
 	log10p_raw_sub[i,] <- c(
							RH_human_sub[i,1:4],
							-log10(2*pnorm(-abs(growth_stat))),
							-log10(2*pnorm(-abs(drug_stat))),
							-log10(2*pnorm(-abs(Ix_stat))),
							growth_coef, 
							drug_coef,
							Ix_coef
							)

		}, error = function(e) {cat ("Error on line ", i, ": ", conditionMessage(e),"\n")})
	}

	xValidate[[j]] <- log10p_raw_sub


}

# ---------- Save xValidate to list on disk ----------


# saveRDS(xValidate,file="xValidate_list")


# ---------- START HERE if xValidate_list is available --------

library(Rmisc)
library(lme4)
library(multcomp)
library(emmeans)

xValidate <- readRDS("xValidate_list")









prototype <- xValidate[[1]]
prototype <- prototype[0,c(5:16)]



# p vals of each individual comparison of halves using Fisher's
# ans_p calculated, but not used. ans_OR (below) used for ploting and calx instead.
ans_p <- data.frame(compare = character(),fdr = numeric(), prototype,stringsAsFactors=FALSE)



# OR of each individual comparison of halves using Fisher's
ans_OR <- data.frame(compare = character(),fdr = numeric(), prototype,stringsAsFactors=FALSE)




comparisons <- c("1_20","2_19","3_18","4_17","5_16","6_15","7_14","8_13","9_12","10_11")


thresh_fdr <- c(0.01, 0.05, 0.1, 0.25, 0.5, 0.9)




for(j in c(1:length(thresh_fdr))) {
	for(i in c(1:length(comparisons))) {
	
	A <- xValidate[[i]]
	B <- xValidate[[20-i+1]]
	
	
	A <- A[,c(5:16)]
	B <- B[,c(5:16)]
	
	
	
	
		A <- apply(A,2,FUN=function(x) {p.adjust(10^(-x),method="BH")})
		B <- apply(B,2,FUN=function(x) {p.adjust(10^(-x),method="BH")})
		
		A <- apply(A,2,FUN=function(x) {ifelse(x < thresh_fdr[j], x <- 1, x <- 0)})
		B <- apply(B,2,FUN=function(x) {ifelse(x < thresh_fdr[j], x <- 1, x <- 0)})
		
	
		fdr_df <- rbind(A,B)
		
		
		ans_p[((j - 1)*length(comparisons)) + i,1] <- comparisons[i]
		ans_p[((j - 1)*length(comparisons)) + i,2] <- thresh_fdr[j]
		



# loop and tryCatch rather than apply, because log10p_d_w1 and log10p_d_w2 can throw errors
# so can ignore error messages in following loops
		
for(h in c(3:ncol(ans_p))) {
		
		tryCatch ({
		
		ans_p[((j - 1)*length(comparisons)) + i, h] <- fisher.test(table(as.data.frame(cbind(fdr_df[,(h-2)][1:3113],fdr_df[,(h-2)][3114:6226]))))$p.value
		# ans_p[((j - 1)*length(comparisons)) + i,10:ncol(ans_p)] <- apply(fdr_df[,c(8:ncol(fdr_df))],2,FUN=function(x) {fisher.test(table(as.data.frame(cbind(x[1:3113],x[3114:6226]))))$p.value})
		}, error = function(e) {cat ("Error on line ", i, ": ", conditionMessage(e),"\n")})
		
	}
		
		


		ans_OR[((j - 1)*length(comparisons)) + i,1] <- comparisons[i]
		ans_OR[((j - 1)*length(comparisons)) + i,2] <- thresh_fdr[j]
		
				
for(g in c(3:ncol(ans_OR))) {
		
		tryCatch ({
		
		ans_OR[((j - 1)*length(comparisons)) + i, g] <- fisher.test(table(as.data.frame(cbind(fdr_df[,(g-2)][1:3113],fdr_df[,(g-2)][3114:6226]))))$estimate
		}, error = function(e) {cat ("Error on line ", i, ": ", conditionMessage(e),"\n")})
		
	}
	
		
	}

}





# get rid of log10p_d_w1, log10p_d_w2

ans_OR <- ans_OR[,c(1:7,10:ncol(ans_OR))]


ans_OR <- reshape(ans_OR, 
			  varying = c("log10p_g_0nM",  "log10p_g_8nM",  "log10p_g_25nM", "log10p_g_75nM", "log10p_g_avg",  "log10p_d_w3",   "log10p_d_w4",   "log10p_d_w6",   "log10p_d_avg" , "log10p_g_d_Ix"), 
			  v.names = "OR",
			  timevar = "expt", 
			  times = c("log10p_g_0nM",  "log10p_g_8nM",  "log10p_g_25nM", "log10p_g_75nM", "log10p_g_avg",  "log10p_d_w3",   "log10p_d_w4",   "log10p_d_w6",   "log10p_d_avg" , "log10p_g_d_Ix"), 
			  new.row.names = 1:1000,
			  direction = "long")
			  

# transform OR to log(OR), more robust statistics
ans_OR$log_OR <- log(ans_OR$OR)



ans_OR$expt <- factor(ans_OR$expt,levels=unique(ans_OR$expt))
ans_OR$expt_jitt <- jitter(as.numeric(ans_OR$expt),amount = 0.1)


# # reverse levels so that in correct order when coord_flip()
ans_OR$expt <- factor(ans_OR$expt, levels = rev(levels(ans_OR$expt)))

# Arrange x coords/jitter so that points are in correct CIs when coord_flip()
ans_OR$expt_jitt <- 12 - ans_OR$expt_jitt

# labels in reverse order, so correct for coord_flip()
labels <- rev(c("Growth (0 nM)" , "Growth (8 nM)" , "Growth (25 nM)" , "Growth (75 nM)", "Growth (avg)",  "Paclitaxel (3 wks)",   "Paclitaxel (4 wks)",   "Paclitaxel (6 wks)",   "Paclitaxel (avg)",  "Ix", "Summary"))



summ_OR <- summarySE(data=ans_OR, measurevar="log_OR",groupvars=c("expt","fdr"))







# # Calculate grand summaries by simple pooling
# grand_summ <- summarySE(data=ans_OR, measurevar="log_OR",groupvars=c("fdr"))
# grand_summ$expt <- "summ"
# summ_OR <- rbind(summ_OR,grand_summ)


# summ_OR$CI_1 <- summ_OR$log_OR-summ_OR$ci
# summ_OR$CI_2 <- summ_OR$log_OR+summ_OR$ci









# # More appropriate to calculate grand summaries, 95% CIs, using random effects and kenward-roger with emmeans, rather than simple pooling (also consistent with rest of paper)

fdr_vec <- unique(ans_OR$fdr)
grand_summ <- summ_OR[0,]
grand_summ$expt <- factor(grand_summ$expt, levels=c("summ", levels(grand_summ$expt)))


for (i in c(1:length(unique(ans_OR$fdr)))) {
	
	m1 <- lmer(log_OR ~ 1 + (1|expt) + (1|compare),data=ans_OR[ans_OR$fdr == fdr_vec[i],])
	
	grand_summ[i,1] <- "summ"
	grand_summ[i,2] <- fdr_vec[i]
	grand_summ[i,3] <- 10
	grand_summ[i,4] <- test(emmeans(m1,lmer.df = "kenward-roger",specs="1"))$emmean
	# grand_summ[i,4] <- summary(glht(m1),test=adjusted("none"))$test$coefficients
	grand_summ[i,5] <- NA
	grand_summ[i,6] <- test(emmeans(m1,lmer.df = "kenward-roger",specs="1"))$SE
	# grand_summ[i,6] <- summary(glht(m1),test=adjusted("none"))$test$sigma
	grand_summ[i,7] <- qnorm(0.975) * test(emmeans(m1,lmer.df = "kenward-roger",specs="1"))$SE
	# grand_summ[i,7] <- qnorm(0.975) * summary(glht(m1),test=adjusted("none"))$test$sigma
}

summ_OR <- rbind(summ_OR,grand_summ)
summ_OR$CI_1 <- summ_OR$log_OR-summ_OR$ci
summ_OR$CI_2 <- summ_OR$log_OR+summ_OR$ci








# # make data frame for polygon symbol in summary part of ggplot2
# polygon <- data.frame(
				# x = c(1.1, 1.0, 0.9, 1.0), 
				# y = unlist(c(as.numeric(summ_OR[summ_OR$expt == "summ" & summ_OR$fdr==0.01,c("log_OR","CI_2","log_OR","CI_1")]))),
				# type = factor("summ",levels=c("expt","summ")),
				# grouping = factor("first",levels=c("first"))
				# )









# Change levels so that in correct order when coord_flip()
summ_OR$expt <- factor(summ_OR$expt, levels = c("summ", levels(ans_OR$expt)))






# provide column to control color summary vs expts
summ_OR$type <- "expt"
summ_OR[summ_OR$expt=="summ",]$type <- "summ"
summ_OR$type <- factor(summ_OR$type, levels = c("expt","summ"))


# provide column to control color consistency of ans_OR with summ_OR
ans_OR$type <- "expt"
ans_OR$type <- factor(ans_OR$type, levels = c("expt","summ"))



# get p vals for OR observations in each comparison of halves by comparing to expected log(OR) == 0:

OR_t_stat <- aggregate(log_OR~fdr+expt,data=ans_OR,FUN = function(x) {t.test(x,mu=0)$statistic})
colnames(OR_t_stat)[3] <- c("t")
OR_df <- aggregate(log_OR~fdr+expt,data=ans_OR,FUN = function(x) {t.test(x,mu=0)$parameter})
colnames(OR_df)[3] <- c("df")
OR_pvals <- aggregate(log_OR~fdr+expt,data=ans_OR,FUN = function(x) {t.test(x,mu=0)$p.value})
colnames(OR_pvals)[3] <- c("p.value")


OR_pvals <- merge(OR_t_stat,merge(OR_df,OR_pvals))
OR_pvals$test <- c("t_test")






# # Simple grand P vals by pooling data
# grand_OR_pvals <- aggregate(log_OR~fdr,data=ans_OR,FUN = function(x) {t.test(x,mu=0)$p.value})
# grand_OR_pvals$expt <- "summ"
# OR_pvals <- rbind(OR_pvals, grand_OR_pvals)







# # More appropriate to calculate grand P vals, df, t-stats, using random effects and kenward-roger estimates with emmeans, rather than simple pooling (also consistent with rest of paper)

fdr_vec <- unique(ans_OR$fdr)
grand_OR_pvals <- OR_pvals[0,]
grand_OR_pvals$expt <- factor(grand_OR_pvals$expt, levels=c("summ",levels(grand_OR_pvals$expt)))


# "expt" has bigger ICC than "compare", so placed "expt" first 

for (i in c(1:length(unique(ans_OR$fdr)))) {
	
	m1 <- lmer(log_OR ~ 1 + (1|expt) + (1|compare),data=ans_OR[ans_OR$fdr == fdr_vec[i],])
	
	
	grand_OR_pvals[i,1] <- fdr_vec[i]
	grand_OR_pvals[i,2] <- "summ"
	grand_OR_pvals[i,3] <- test(emmeans(m1,lmer.df = "kenward-roger",specs="1"))$t.ratio
	grand_OR_pvals[i,4] <- test(emmeans(m1,lmer.df = "kenward-roger",specs="1"))$df
	grand_OR_pvals[i,5] <- test(emmeans(m1,lmer.df = "kenward-roger",specs="1"))$p.value
	# grand_OR_pvals[i,5] <- summary(glht(m1),test=adjusted("none"))$test$pvalues
	grand_OR_pvals[i,6] <- c("kenward-roger")
	
	
}



OR_pvals <- rbind(OR_pvals, grand_OR_pvals)



# split p vals into mantissa and exponent for display
OR_pvals$mant <- unlist(strsplit(format(OR_pvals$p.value,scientific=TRUE),"e"))[seq(1,length(unlist(strsplit(format(OR_pvals$p.value,scientific=TRUE),"e"))),2)]
OR_pvals$exp <- unlist(strsplit(format(OR_pvals$p.value,scientific=TRUE),"e"))[seq(2,length(unlist(strsplit(format(OR_pvals$p.value,scientific=TRUE),"e"))),2)]
OR_pvals$exp <- gsub("-0","-",OR_pvals$exp)

# alter levels so that in correct order when coord_flip()
OR_pvals$expt <- factor(OR_pvals$expt, levels = levels(summ_OR$expt))

# text labels (P values) depend on order of data frame, not factors
OR_pvals <- OR_pvals[order(OR_pvals$fdr,OR_pvals$expt),]



OR_pvals$mant <- as.numeric(OR_pvals$mant)
OR_pvals$exp <- as.numeric(OR_pvals$exp)


# round mantissa to 2 sig figs
OR_pvals[,"mant"] <- round(OR_pvals[,"mant"],1)

# Correct cases that have become eg 10 x 10^-5 
OR_pvals[OR_pvals$mant == 10.0,"exp"] <- OR_pvals[OR_pvals$mant == 10.0,"exp"] + 1
OR_pvals[OR_pvals$mant == 10.0,"mant"] <- 1




# ----------------------------- ggplot ---------------------------------


library(ggplot2)
library(cowplot)



# from https://stackoverflow.com/questions/8197559/emulate-ggplot2-default-color-palette
# function for default ggplot2 colors

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

colores_1 <- gg_color_hue(2)
names(colores_1) <- c("expt","summ")

theme2 <- theme(
	plot.margin = unit(c(0.8,0.8,0.8,0.8), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(), 
	legend.position="none", 
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=8), #numbers on tick marks of x and y axes
	axis.title=element_text(size=12), #titles of x and y axes
	axis.title.y=element_text(margin=margin(t=0,r=7,b=0,l=0), hjust = 0.5), # moves y axis title by adding margin space to bottom, trbl
	axis.title.x=element_text(margin=margin(t=7,r=0,b=0,l=0), hjust = 0.5),  # moves x axis title by adding margin space to top,trbl
	plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=13, face="plain", hjust = 0.5), #hjust shifts right
	legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
 	legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
 	legend.key.height = unit(0.05, "cm"),
	legend.key.width = unit(0.2, "cm"),
 	legend.spacing.y = unit(0.05, 'cm'),
 	legend.spacing.x = unit(0.1, 'cm'),
 	legend.title = element_text(size = 11), 
 	legend.text = element_text(size = 10)
 	# legend.title.align=0.0
	)
	
	
size_point <- 0.3
size_hline <- 0.1



# # use loop. Employ grobs to get around lazy evaluation problem of ggplot2 and loops.

p <- list()


fdr_vec <- unique(ans_OR$fdr)




for(j in c(1:length(fdr_vec))) {
	
	
	
i <- fdr_vec[j]
	


# Blob sizes are equal even for summary, because 10 obs for each growth, drug combo, and for summary with 10 conditions.


chart <- ggplot() + 
			theme2 + 
			theme(legend.key=element_blank()) +
			# geom_line(
				# data=summary_retent_1, 
				# lwd=0.2,
				# aes(
					# x=summary_retent_1$jitter, 
					# y= median_retent_seq, 
					# colour=as.factor(summary_retent_1$pool)),
					# show.legend=FALSE
					# ) + 
			geom_linerange(
				data= summ_OR[summ_OR$expt != "summ" & summ_OR$fdr == i,],
				aes(
					x=as.numeric(expt), 
					ymin= CI_1, 
					ymax= CI_2,
					colour=type
					),
				lwd=0.2,
				show.legend=TRUE
				) +
			geom_text(
				# data = OR_pvals[OR_pvals$fdr == i,],
				aes(
					label = c(paste0(formatC(OR_pvals[OR_pvals$fdr == i,"mant"],format="f",digits=1,), "%*%",10, "^", OR_pvals[OR_pvals$fdr == i,"exp"]),"italic('P')"), 
					x = c(1:12), 
					y = max(ans_OR[ans_OR$fdr == i,"log_OR"])+0.4
					), 
				parse=TRUE, 
				size = c(rep(2,11),2.5), 
				hjust = 0
				) + 
			geom_hline(
				yintercept=0, 
				linetype="dashed", 
	            color = "grey", 
	            size=0.2
	            ) +
			scale_colour_manual(
				values=as.vector(colores_1),
				# labels=labels_1,
				name=NULL		
				) +
			scale_fill_manual(
				values=as.vector(colores_1),
				# labels=labels_1,
				name=NULL		
				) +
			geom_point(
				data= ans_OR[ans_OR$fdr == i,], 
				aes(
					y= log_OR, 
					x= expt_jitt,
					colour= type
					),
				shape=16,
				stroke=0.2,
				size=0.5,
				show.legend=TRUE
				) +
			geom_point(
				data= summ_OR[summ_OR$expt != "summ" & summ_OR$fdr == i,], 
				aes(
					y= log_OR, 
					x= as.numeric(expt),
					colour= type
					),
				shape=0,
				stroke=0.5,
				size=2.0,
				show.legend=TRUE
				) +
			geom_polygon(
				data=data.frame(
					x = c(1.2, 1.0, 0.8, 1.0), 
					y = unlist(c(as.numeric(summ_OR[summ_OR$expt == "summ" & summ_OR$fdr == i, c("log_OR","CI_2","log_OR","CI_1")]))),
					type = factor("summ",levels=c("expt","summ")),
					grouping = factor("summ",levels=c("expt","summ"))
					), 
				aes(
					x = x, 
					y = y,
					group = grouping
					),
				fill = colores_1["summ"]
				) +
	 		theme(
	 			# axis.title.x=element_blank(),
	 			plot.margin = unit(c(0.8,3,0.8,0.8), "cm"),
	 			axis.title.y = element_blank(),
	 			axis.line.y = element_blank(), 
	 			axis.ticks.y = element_blank(), 
		        # axis.text.x=element_blank(),
		        # axis.ticks.x=element_blank(),
		 		# legend.position = c(0.85,0.95),
				# legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
			 	# legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
			 	# legend.key.height = unit(0.3, "cm"),
				# legend.key.width = unit(0.3, "cm"),
			 	# legend.spacing.y = unit(0.3, 'cm'),
			 	# legend.spacing.x = unit(0.3, 'cm'),
			 	# legend.title = element_text(size = 7),  
			 	# legend.text = element_text(size = 7),
		 		# legend.title.align=0.7,
	 			axis.text.x = element_text(size = 10)
	 			) +
			scale_x_continuous(
				breaks = c(1:11),
				labels = labels, 
				expand = expand_scale(add = 1.1)
				) +
			# scale_y_continuous(
				# limits = c(-0.2,max(ans_OR[ans_OR$fdr == i,"log_OR"])+0.4)
				# ) +
			ylab(expression(log*'('*odds~ratio*')')) + 
			# coord_cartesian(ylim = c(0, max(ans_OR[ans_OR$fdr == i,"log_OR"])+0.2), clip = 'off') +
			# xlab(expression(Delta*log[2]*'('*TPM*')')) +
			coord_flip(ylim = c(-0.2, max(ans_OR[ans_OR$fdr == i,"log_OR"])+0.1), clip = 'off') +
			labs(subtitle = paste0("FDR = ", i))
print(chart)

p[[j]] <- ggplotGrob(chart)

}




#------------------Make file --------------------------



(xValidate_2 <- plot_grid(plotlist=p[1:6], labels=LETTERS[1:6], ncol = 2, nrow = 3, label_size = 16))


pdf("xValidate_2.pdf",width=7.5,height=10,useDingbats=FALSE)
xValidate_2
dev.off()




# -------------- Numbers to quote in paper -----------------------

# log_OR in table below is log(OR)
summ_OR[summ_OR$expt == "summ",]
   # expt  fdr  N    log_OR sd         se        ci      CI_1      CI_2 type
# 61 summ 0.01 10 0.5584215 NA 0.09821899 0.1925057 0.3659158 0.7509271 summ <<<<<<< use in paper
# 62 summ 0.05 10 0.5715710 NA 0.14181495 0.2779522 0.2936188 0.8495232 summ
# 63 summ 0.10 10 0.6302276 NA 0.17153431 0.3362011 0.2940265 0.9664287 summ
# 64 summ 0.25 10 0.8146205 NA 0.22918994 0.4492040 0.3654164 1.2638245 summ
# 65 summ 0.50 10 1.2040593 NA 0.31618481 0.6197108 0.5843484 1.8237701 summ
# 66 summ 0.90 10 3.0785632 NA 0.51064368 1.0008432 2.0777200 4.0794064 summ


# "p.value" in table below is kenward-roger df from random effects model
OR_pvals[OR_pvals$expt == "summ",]
    # fdr expt        t        df      p.value          test mant exp
# 61 0.01 summ 5.685474 10.041750 0.0001991684 kenward-roger  2.0  -4 <<<<<<< use in paper
# 62 0.05 summ 4.030400  9.199212 0.0028411322 kenward-roger  2.8  -3
# 63 0.10 summ 3.674062  9.171909 0.0049602194 kenward-roger  5.0  -3
# 64 0.25 summ 3.554346  9.046336 0.0061234758 kenward-roger  6.1  -3
# 65 0.50 summ 3.808087  9.046221 0.0041264573 kenward-roger  4.1  -3
# 66 0.90 summ 6.028789  9.008650 0.0001947265 kenward-roger  1.9  -4


# p vals from "asymp" in table below use t-test to compare individual expts with expected log_OR == 0
# kenward-roger t-test uses mixed model to analyze summary data at each FDR
head(OR_pvals)
    # fdr          expt         t       df      p.value          test mant exp
# 61 0.01          summ  5.685474 10.04175 1.991684e-04 kenward-roger  2.0  -4
# 10 0.01 log10p_g_d_Ix  9.826160  9.00000 4.139800e-06        t_test  4.1  -6
# 1  0.01  log10p_d_avg  6.594578  9.00000 9.989690e-05        t_test  1.0  -4
# 4  0.01   log10p_d_w6 14.583379  9.00000 1.440417e-07        t_test  1.4  -7
# 3  0.01   log10p_d_w4  6.791773  9.00000 7.979077e-05        t_test  8.0  -5
# 2  0.01   log10p_d_w3  5.872326  9.00000 2.370778e-04        t_test  2.4  -4













