# Plots of genes that are permutation sub-threshold yet of interest. Plot of copy number changes

#install.packages("ggplot2")
library(ggplot2)
library(cowplot) #used with plot_grid 

# for summarySE()
library(Rmisc)
library(mgcv)


#----------------Aesthetics ---------------------------


theme_odd <- theme(
	plot.margin = unit(c(t=1.2,r=1.2,b=1.2,l=0.8), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(), 
	legend.position="none", 
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=12), #numbers on tick marks of x and y axes
	axis.title=element_text(size=14), #titles of x and y axes
	axis.title.y=element_text(margin=margin(0,13,0,0)), #moves y axis title by adding margin space to bottom
	axis.title.x=element_text(margin=margin(10,0,0,0)),  #moves x axis title by adding margin space to top
	plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=14, face="plain", hjust = 0.5) #hjust shifts right
	)
	
	
theme_even <- theme(
	plot.margin = unit(c(t=1.2,r=0.8,b=1.2,l=0.8), "cm"),
	panel.grid.major = element_blank(), 
	panel.grid.minor = element_blank(), 
	panel.background = element_blank(), 
	axis.line.x = element_line(colour = "black", size = 0.1), 
	axis.line.y = element_line(colour = "black", size = 0.1), 
	axis.ticks = element_line(colour = "black", size = 0.1),
	axis.text=element_text(size=12), #numbers on tick marks of x and y axes
	axis.title.y=element_text(margin=margin(0,13,0,0), size=12), #moves y axis title by adding margin space to bottom. Also adjusts title size.
	axis.title.x=element_text(margin=margin(10,0,0,0), size=14),  #moves x axis title by adding margin space to top. Also adjusts title size.
	plot.title = element_text(size=32, face="bold", hjust = -0.14), #can provide "A","B", by ggtitle, but used plot_grid wch can shift more left
	plot.subtitle = element_text(size=14, face="plain", hjust = 0.5), #hjust shifts right
	legend.position="right", 
	legend.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
 	legend.box.margin=margin(t=0,r=0,b=0,l=0,unit = "pt"),
 	legend.key.height = unit(0.1, "cm"),
	legend.key.width = unit(0.1, "cm"),
 	legend.spacing.y = unit(0.1, 'cm'),
 	legend.spacing.x = unit(0.1, 'cm'),
 	legend.title = element_text(size = 11),  
 	legend.text = element_text(size = 10),
	legend.title.align=0.0
	)



# size_point <- 0.3
size_hline <- 0.2


# darkest two hues from 3-class PuBuGn in color brewer
# cb1<-rep(c("#1c9099", "#a6bddb"), 12)

# # darkest two hues from 3-class PuBu in color brewer
# cb1<-rep(c("#2b8cbe", "#a6bddb"), 12)


# #attractive pinks, greys
# cb1<-c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#E69F00", "#56B4E9", "#E69F00", "#009E73", "#F0E442", "#0072B2", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#999999", "#D55E00", "#CC79A7")

# cb1_rev <- c("#CC79A7", "#D55E00", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#D55E00", "#0072B2", "#D55E00", "#F0E442", "#009E73", "#56B4E9", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00","#999999", "#CC79A7", "#E69F00","#999999")

# #'4-class RdBu'
# cb2 <- c('#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#f4a582','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#f4a582','#92c5de','#0571b0','#ca0020','#92c5de','#0571b0')

# #'4-class RdYlBu'
# cb3 <- c('#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#fdae61','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#fdae61','#abd9e9','#2c7bb6','#d7191c','#abd9e9','#2c7bb6')
	
	




#----------------- Prepare hamster mito logP ---------------------



logP_ham_mito <- read.table("log10P_human_plus_hamster_mito.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)



# Sort:
chrOrder<-paste("chr",c(1:22,"X"),sep="")
logP_ham_mito$Chromosome <- factor(logP_ham_mito$Chromosome, levels=chrOrder)
logP_ham_mito <- logP_ham_mito[order(logP_ham_mito$Chromosome, logP_ham_mito$pos), ]
logP_ham_mito$Chromosome <- as.character(logP_ham_mito$Chromosome)




# # Transform chr1 etc. to numbers
# logP_ham_mito$Chromosome <- gsub('chr', '', logP_ham_mito$Chromosome)
# logP_ham_mito[logP_ham_mito$Chromosome == "X","Chromosome"] <- 23
# chrOrder<-c(1:23)
# logP_ham_mito$Chromosome <- factor(logP_ham_mito$Chromosome, levels=chrOrder)
# logP_ham_mito <- logP_ham_mito[order(logP_ham_mito$Chromosome, logP_ham_mito$pos), ]
# logP_ham_mito$Chromosome <- as.numeric(logP_ham_mito$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=logP_ham_mito)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]


# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-23])

# merge genome coordinates with logP_ham_mito
logP_ham_mito <- merge(logP_ham_mito,gen_coord[,c("Chromosome","coord")])
logP_ham_mito$Chromosome <-factor(logP_ham_mito$Chromosome, levels=chrOrder)
logP_ham_mito <- logP_ham_mito[order(logP_ham_mito$Chromosome, logP_ham_mito$pos), ]


logP_ham_mito$coord <- logP_ham_mito$pos + logP_ham_mito$coord




# # find midpoints of chromosomes for breaks in ggplot
# mid <- function(x) {(max(x)+min(x))/2}
# chr_mid <- aggregate(coord~Chromosome,FUN = mid,data=logP)
# colnames(chr_mid)[2] <- "mid"
# chr_mid$Chromosome <-factor(chr_mid$Chromosome, levels=chrOrder)
# chr_mid <- chr_mid[order(chr_mid$Chromosome), ]
# chr_mid$Chromosome <- as.numeric(chr_mid$Chromosome)

# # Define breaks as mid-points chromosomes
# breaks <- chr_mid$mid


# # attractive grey and skyblue color scheme
# cb1<-rep(c("grey", "skyblue"), 12)


# standard black color scheme
cb1<-rep(c("black", "black"), 12)



# labels <- as.character(c(1:9,"",11,"",13,"","",16,"","","",20,"","","X"))


# # ------------------------------ fdr ----------------------------------------------

# # Add fdr column
# logP_ham_mito$log10q_hamster_mito <- -log10(p.adjust(10^{-logP_ham_mito$log10p_hamster_mito}))

# # find fdr threshold
# fdr_thresh_ham <- logP_ham_mito[logP_ham_mito$log10q_hamster_mito >= -log10(0.05),][which.min(logP_ham_mito[logP_ham_mito$log10q_hamster_mito >= -log10(0.05),"log10q_hamster_mito"]),"log10p_hamster_mito"]


# ---------------------- Permutation threshold ------------------------------------


human_plus_hamster_mito_thresh_95 <- read.table("human_plus_hamster_mito_thresh_95.txt",header=FALSE,sep="\t",stringsAsFactors=FALSE,row.names=1,col.names=c("","thresh"))




# --------- (1) chr1, logP_ham_mito ----------------------------

# choose chr number
i <- 1


# id points by hand



## max genome-wide coord for hamster mito
# plot(logP_ham_mito[logP_ham_mito$Chromosome=="chr1","coord"],logP_ham_mito[logP_ham_mito$Chromosome=="chr1","log10p_hamster_mito"],cex=0.1)
#identify(logP_ham_mito[logP_ham_mito$Chromosome=="chr1","coord"],logP_ham_mito[logP_ham_mito$Chromosome=="chr1","log10p_hamster_mito"],cex=0.1)
# # [1] 17091 17436

# # logP_ham_mito[logP_ham_mito$Chromosome=="chr1",][c(17091, 17436),]
      # Chromosome      posS      posE       pos log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3
# 17091       chr1 169910000 170910000 170410000     4.007810     6.480509     12.091352      6.580772    12.349398  0.07264087   0.1552471   0.5215804
# 17436       chr1 173360000 174360000 173860000     5.977352     7.830865      9.867266      2.670157     9.678041  0.12959537   0.1866028   0.2436041
      # log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix log10p_hamster_mito coef_g_0nM coef_g_8nM coef_g_25nM coef_g_75nM coef_g_avg     coef_d_w1
# 17091   0.9848498   1.7086538    0.6103546    1.72088522            8.563132 -0.1157277 -0.1321641  -0.1670915  -0.2698192 -0.1712006  0.0007341876
# 17436   0.2770580   0.2631539    0.2528515    0.09576568            8.700585 -0.1603405 -0.1622843  -0.1664147  -0.1785629 -0.1669006 -0.0012149768
         # coef_d_w2    coef_d_w3    coef_d_w4    coef_d_w6   coef_d_avg   coef_g_d_Ix coef_hamster_mito     coord
# 17091 -0.001320367 -0.003374921 -0.005429475 -0.009538583 -0.003785832 -0.0020545542      9.182385e-06 170410000
# 17436 -0.001457942 -0.001700907 -0.001943873 -0.002429803 -0.001749501 -0.0002429653      1.015891e-05 173860000


# Coefficients for both peaks are positive <<<<<<<<<< use in paper


# Peak at 173860000 on chr1 closer to DARS2 than GAS5-AS1:

# Distance between 3' end of DARS2 and hamster mito qtl:
173860000-173858400
# [1] 1600

# Distance between hamster mito qtl and 5' end GAS5-AS1:
173863248-173860000
# [1] 3248





# START HERE if do not wish to do hand picking again
points_p1 <- c(17091, 17436)

logP_ham_mito_sub <- logP_ham_mito[points_p1,]
logP_ham_mito_sub <- cbind(geneSymbol=c("GORAB","DARS2"), logP_ham_mito_sub)



# takes few secs
labels_p1 <- logP_ham_mito_sub[,c("Chromosome","pos","log10p_hamster_mito","geneSymbol")]
labels_p1 <- labels_p1[order(labels_p1$pos),]
labels_p1$nudge_x <- 0
labels_p1$nudge_y <- 0.5
labels_p1[labels_p1$geneSymbol=="GORAB","nudge_x"] <- -23 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977
labels_p1[labels_p1$geneSymbol=="DARS2","nudge_x"] <- 19 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977



balloon_scale <- 0.5 # inflation factor for significant points	
size_point <- 0.1*(1 + balloon_scale*(logP_ham_mito[logP_ham_mito$Chromosome==paste0("chr",i),"log10p_hamster_mito"]/max(logP_ham_mito[logP_ham_mito$Chromosome==paste0("chr",i),"log10p_hamster_mito"]))) # scale significant points



p1 <- ggplot() + 
		geom_point(
			data = logP_ham_mito[logP_ham_mito$Chromosome==paste0("chr",i),], 
			size= size_point,
			stroke=0, 
				aes(
					x = pos/1e6, 
					y = log10p_hamster_mito, 
					color="as.factor(Chromosome)"
					)
				) +
		geom_text(
			data = labels_p1, 
			aes(x = pos/1e6, 
			y = log10p_hamster_mito,
			label=geneSymbol, 
			fontface = "italic"), 
			nudge_x=labels_p1$nudge_x, 
			nudge_y=labels_p1$nudge_y,  
			colour = "black", 
			size = 2.5
			) + # nudge_x and nudge_y gives warning, but seems to work
		# geom_text( aes(x = 18990000/1e6, y = 44.74028), label="IFFO2", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 51710000/1e6, y = 23.24700), label="OSBPL9", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 103260000/1e6, y = 54.41426), label="COL11A1", colour = "black", size = 3, nudge_x=12, nudge_y=2) +
		# geom_text( aes(x = 159590000/1e6, y = 20.71389), label="APCS", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 225540000/1e6, y = 18.84761), label="ENAH", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		scale_color_manual(values=cb1) +
		theme_odd +
		scale_x_continuous() +
		xlab(paste0("Chromosome ", i, " (Mb)")) + 
		scale_y_continuous(breaks=c(0,2,4,6,8),labels=c(0,2,4,6,8)) +
		ylab(expression('-log'[10]*italic('P'))) +
		geom_hline(yintercept= human_plus_hamster_mito_thresh_95["log10p_hamster_mito",], linetype="solid", color = "red", size=size_hline) +
		labs(subtitle="Hamster mitochondria")
print(p1)



# ------------- (2) line plot DARS2 copy number change vs hamster mitochondria copy number -----------------------

# cf g_loci_chr_1.R


gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}


# copy number data
RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)


# get rid of chrY, because no chrY seq in hamster genome
RH_human <- RH_human[RH_human$Chromosome != "chrY",]




# Read in and prepare ancillary tables 

cell <- read.table("cell_label_info.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)
human_sum_reads <- colSums(RH_human[,5:ncol(RH_human)])
human_total_reads <- read.table("RH_pool_human_total_align.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) # uses mapped human reads, cf human_AIC_1.R
hamster_mito <- read.table("hamster_mito_gseq.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) 



# maximum log10P hamster mitochondria is at row 17436 and corresponds to DARS2, cf hamster_mito_1.R and p1 above
logP_ham_mito[17436,]
      # Chromosome      posS      posE       pos log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix log10p_hamster_mito
# 17436          1 173360000 174360000 173860000     5.977352     7.830865      9.867266      2.670157     9.678041   0.1295954   0.1866028   0.2436041    0.277058   0.2631539    0.2528515    0.09576568            8.700585
      # coef_g_0nM coef_g_8nM coef_g_25nM coef_g_75nM coef_g_avg    coef_d_w1    coef_d_w2    coef_d_w3    coef_d_w4    coef_d_w6   coef_d_avg   coef_g_d_Ix coef_hamster_mito     coord
# 17436 -0.1603405 -0.1622843  -0.1664147  -0.1785629 -0.1669006 -0.001214977 -0.001457942 -0.001700907 -0.001943873 -0.002429803 -0.001749501 -0.0002429653      1.015891e-05 173860000




RH_human[17436,]
      # Chromosome      posS      posE       pos RH1_w0_d0 RH1_w1_d0 RH1_w1_d8 RH1_w1_d25 RH1_w1_d75 RH1_w2_d0 RH1_w2_d8 RH1_w2_d25 RH1_w3_d0 RH1_w3_d8 RH1_w3_d25 RH1_w3_d75 RH1_w4_d0 RH1_w4_d8 RH1_w4_d25 RH1_w4_d75
# 17436       chr1 173360000 174360000 173860000        46        56        61         45         54        51        42         34        45        38         38         41        50        44         42         44
      # RH1_w6_d0 RH1_w6_d8 RH1_w6_d25 RH1_w6_d75 RH2_w0_d0 RH2_w1_d0 RH2_w1_d8 RH2_w1_d25 RH2_w1_d75 RH2_w2_d0 RH2_w2_d8 RH2_w2_d25 RH2_w3_d0 RH2_w3_d8 RH2_w3_d25 RH2_w3_d75 RH2_w4_d0 RH2_w4_d8 RH2_w4_d25 RH2_w6_d0
# 17436        86       117        195         80       169       248       289        244        195       230       292         95       328       261        160        373       278       275        160       282
      # RH2_w6_d8 RH2_w6_d25 RH2_w6_d75 RH3_w0_d0 RH3_w1_d0 RH3_w1_d8 RH3_w1_d25 RH3_w1_d75 RH3_w2_d0 RH3_w2_d8 RH3_w2_d25 RH3_w3_d0 RH3_w3_d8 RH3_w3_d25 RH3_w3_d75 RH3_w4_d0 RH3_w4_d8 RH3_w4_d25 RH3_w4_d75 RH3_w6_d0
# 17436       282        337        367       155        68        40         59         63        36        33         27        23        39         11         29        20        18         15         13        20
      # RH3_w6_d8 RH3_w6_d25 RH3_w6_d75 RH4_w0_d0 RH4_w1_d0 RH4_w1_d8 RH4_w1_d25 RH4_w1_d75 RH4_w2_d0 RH4_w2_d8 RH4_w2_d25 RH4_w3_d0 RH4_w3_d8 RH4_w3_d25 RH4_w3_d75 RH4_w4_d0 RH4_w4_d8 RH4_w4_d25 RH4_w4_d75 RH4_w6_d0
# 17436        27         25         13       228       150       191        143        139       105        96         59        71        73         43         23        44        64         10         16        43
      # RH4_w6_d8 RH4_w6_d25 RH4_w6_d75 RH5_w0_d0 RH5_w1_d0 RH5_w1_d8 RH5_w1_d25 RH5_w2_d0 RH5_w2_d8 RH5_w2_d25 RH5_w3_d0 RH5_w3_d8 RH5_w3_d25 RH5_w3_d75 RH5_w4_d0 RH5_w4_d8 RH5_w4_d25 RH5_w6_d0 RH5_w6_d8 RH5_w6_d25
# 17436        39         17         20       114        96        80         70        77        58         60        71        46         69        149        40        40         31        55        18         21
      # RH5_w6_d75 RH6_w0_d0 RH6_w1_d0 RH6_w1_d8 RH6_w1_d25 RH6_w2_d0 RH6_w2_d8 RH6_w2_d25 RH6_w3_d0 RH6_w3_d8 RH6_w3_d25 RH6_w3_d75 RH6_w4_d0 RH6_w4_d8 RH6_w4_d25 RH6_w6_d0 RH6_w6_d8 RH6_w6_d25 RH6_w6_d75
# 17436         16        96        77       119         94        94        69         45        56        55         30         75        49        34         29        35        54         22         13




RH_human_sub <- merge(logP_ham_mito[17436,c("Chromosome", "posS", "posE", "pos")], RH_human)
RH_human_sub <- cbind(geneSymbol ="DARS2",RH_human_sub)


# Prepare for mgcv::gam

RH_human_sub_l <- reshape(RH_human_sub[c(1,6:ncol(RH_human_sub))], 
  varying = c(colnames(RH_human_sub[c(6:ncol(RH_human_sub))])), 
  v.names = "copy",
  timevar = "RH_ID", 
  times = c(colnames(RH_human_sub[c(6:ncol(RH_human_sub))])), 
  new.row.names = 1:1e6,
  direction = "long")
  
RH_human_sub_l$week <- 0
RH_human_sub_l[grepl("_w0_",RH_human_sub_l$RH_ID),]$week <- 0
RH_human_sub_l[grepl("_w1_",RH_human_sub_l$RH_ID),]$week <- 1
RH_human_sub_l[grepl("_w2_",RH_human_sub_l$RH_ID),]$week <- 2
RH_human_sub_l[grepl("_w3_",RH_human_sub_l$RH_ID),]$week <- 3
RH_human_sub_l[grepl("_w4_",RH_human_sub_l$RH_ID),]$week <- 4
RH_human_sub_l[grepl("_w6_",RH_human_sub_l$RH_ID),]$week <- 6

RH_human_sub_l$conc <- 0
RH_human_sub_l[grepl("_d0",RH_human_sub_l$RH_ID),]$conc <- 0
RH_human_sub_l[grepl("_d8",RH_human_sub_l$RH_ID),]$conc <- 8
RH_human_sub_l[grepl("_d25",RH_human_sub_l$RH_ID),]$conc <- 25
RH_human_sub_l[grepl("_d75",RH_human_sub_l$RH_ID),]$conc <- 75

RH_human_sub_l$pool <- 0
RH_human_sub_l[grepl("RH1_",RH_human_sub_l$RH_ID),]$pool <- 1
RH_human_sub_l[grepl("RH2_",RH_human_sub_l$RH_ID),]$pool <- 2
RH_human_sub_l[grepl("RH3_",RH_human_sub_l$RH_ID),]$pool <- 3
RH_human_sub_l[grepl("RH4_",RH_human_sub_l$RH_ID),]$pool <- 4
RH_human_sub_l[grepl("RH5_",RH_human_sub_l$RH_ID),]$pool <- 5
RH_human_sub_l[grepl("RH6_",RH_human_sub_l$RH_ID),]$pool <- 6




RH_human_sub_l <- merge(RH_human_sub_l,cell)
RH_human_sub_l <- merge(RH_human_sub_l, hamster_mito[,c("RH_ID","pool","conc","week","cell","hamster_mito_reads")])
RH_human_sub_l$human_sum_reads <- human_sum_reads[RH_human_sub_l$RH_ID]
RH_human_sub_l  <- merge(RH_human_sub_l, human_total_reads[,c(1:5,9)])
colnames(RH_human_sub_l)[11] <- "human_total_reads"


RH_human_sub_l$pool <- as.factor(RH_human_sub_l$pool)
RH_human_sub_l$cell <- as.factor(RH_human_sub_l$cell)


# order by coef, useful for multiple genes, though here we only have one
gene_levels <-  merge(RH_human_sub,logP_ham_mito[,c("Chromosome","posS","posE","pos","coef_hamster_mito")],all.x=TRUE)
gene_levels <- gene_levels[order(+gene_levels$coef_hamster_mito),]
RH_human_sub_l$geneSymbol <- factor(RH_human_sub_l$geneSymbol,levels=gene_levels$geneSymbol)



RH_human_sub_l$phat = as.numeric(NA)
RH_human_sub_l$phat_se = as.numeric(NA)
RH_human_sub_l$phat_center = as.numeric(NA)
RH_human_sub_l$log_copy_center = as.numeric(NA)

# get effect of hamster mitochondria at origin
RH_human_sub_l$week <- 0
RH_human_sub_l$conc <- 0



# loop below useful when coloring by (multiple) gene(s). cf g_loci_chr_1.R, d_loci_chr_1.R
# here only one gene, hence loop acts once. Instead color by pool (see below).

for (i in c(1:1)) {
	
# at week == 0 and conc == 0, model below equivalent to:
# m1_nb <- gam(copy ~ hamster_mito_reads + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(human_total_reads)), data = subset(RH_human_sub_l, RH_human_sub_l$id == i), family = nb, method = "REML")

m1_nb <- gam(copy ~ week * conc + hamster_mito_reads + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(human_total_reads)), data = subset(RH_human_sub_l, RH_human_sub_l$id == i), family = nb, method = "REML")

RH_human_sub_l[RH_human_sub_l$id == i,]$phat <- predict(m1_nb, se.fit=TRUE,type="link")$fit
RH_human_sub_l[RH_human_sub_l$id == i,]$phat_se <- predict(m1_nb, se.fit=TRUE,type="link")$se.fit

# find intercept of phat using lm and use to center phat and log_copy_center. phat already on natural log scale.
RH_human_sub_l[RH_human_sub_l$id == i,]$phat_center <- RH_human_sub_l[RH_human_sub_l$id == i,]$phat-lm(phat~hamster_mito_reads,data=RH_human_sub_l[RH_human_sub_l$id == i,])$coefficients["(Intercept)"]
RH_human_sub_l[RH_human_sub_l$id == i,]$log_copy_center <- log(RH_human_sub_l[RH_human_sub_l$id == i,]$copy)-lm(phat~hamster_mito_reads,data=RH_human_sub_l[RH_human_sub_l$id == i,])$coefficients["(Intercept)"]

}



# # color by gene
# n = length(unique(RH_human_sub_l$geneSymbol))
# colores_1 = gg_color_hue(n)
# names(colores_1) <- gene_levels$geneSymbol



# color by pool
n = length(unique(RH_human_sub_l$pool))
colores_1 = gg_color_hue(n)
names(colores_1) <- levels(RH_human_sub_l$pool)




p2 <- ggplot() + 
			theme(legend.key=element_blank()) +
			geom_point(
				data=RH_human_sub_l,
				shape=1,
				stroke=0.4,
				size=1.0,
				aes(
					x= hamster_mito_reads, 
					y=log2(exp(1))*log_copy_center, 
					colour=pool
					)
				) +
		    geom_smooth(
			    data= RH_human_sub_l, 
			    method = "lm",  
			    	formula=	 y~0+x,
			    	fullrange=TRUE,
			    aes(
				    	x= hamster_mito_reads,
				    	y=log2(exp(1))*phat_center,
				    group=geneSymbol#,
				    #colour=geneSymbol,
				    # fill=geneSymbol
				    ),
			    colour="red",
			    se=TRUE,
			    level=0.95,
			    size=0.3,
			    fill="grey",
			    alpha=0.3
			    ) + 
		    scale_color_manual(
				values=colores_1,
				name ="Pool", 
				labels=levels(RH_human_sub_l[,"pool"])
				) +
			guides(
				# shape=1,
				# fill=FALSE,
		 		colour = guide_legend(
					 		override.aes = list(
						 		fill=NA,
						 		shape=1,
						 		size=1
					 		),
				 		ncol=1,
				 		byrow=TRUE
				 		)
		 		) +
			theme_even +
		   coord_cartesian(xlim=c(0,3e5)) +
		   scale_x_continuous(
			   expand = expand_scale(add = .1e5), 
			   limits=c(0,3e5),
			   breaks = c(0,1e5,2e5,3e5), 
			   labels = c(0,1,2,3)
			   ) +
			xlab(expression(Mito~reads~(""%*%~10^-5))) + 
			ylab(expression(Delta*log[2]~(Human~italic(DARS2)~reads))) + 
			labs(subtitle="Hamster mitochondria")
print(p2)






# # ------------------ (5) DARS2 logP vs Gencode, UCSC genome browser ----------------

# # Not useful

# ## Restore logP_ham_mito Chromosome names to "chr1" etc. for benefit of UCSC genome browser
# # logP_ham_mito[logP_ham_mito$Chromosome==23,"Chromosome"] <- "X"
# # logP_ham_mito$Chromosome <- paste0("chr", logP_ham_mito$Chromosome)

# ## max genome-wide coord for hamster mito
# # plot(logP_ham_mito[,"coord"],logP_ham_mito[,"log10p_hamster_mito"],cex=0.1)
# # identify(logP_ham_mito[,"coord"],logP_ham_mito[,"log10p_hamster_mito"],cex=0.1)
# # [1] 17436

# # logP_ham_mito[17436,]
      # # Chromosome      posS      posE       pos log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix log10p_hamster_mito
# # 17436       chr1 173360000 174360000 173860000     5.977352     7.830865      9.867266      2.670157     9.678041   0.1295954   0.1866028   0.2436041    0.277058   0.2631539    0.2528515    0.09576568            8.700585
      # # coef_g_0nM coef_g_8nM coef_g_25nM coef_g_75nM coef_g_avg    coef_d_w1    coef_d_w2    coef_d_w3    coef_d_w4    coef_d_w6   coef_d_avg   coef_g_d_Ix coef_hamster_mito     coord
# # 17436 -0.1603405 -0.1622843  -0.1664147  -0.1785629 -0.1669006 -0.001214977 -0.001457942 -0.001700907 -0.001943873 -0.002429803 -0.001749501 -0.0002429653      1.015891e-05 173860000




# # delta <- 4e6
# # DARS2 <- logP_ham_mito[logP_ham_mito$Chromosome == "chr1" & logP_ham_mito$pos >= 173860000 - delta & logP_ham_mito$pos <= 173860000 + delta,c("Chromosome","pos","log10p_hamster_mito")]

# # dim(DARS2)
# # # [1] 801   3

# # DARS2$posS <- DARS2$pos + 1
# # DARS2 <- DARS2[,c("Chromosome","pos","posS","log10p_hamster_mito")]

# # DARS2$pos <- format(DARS2$pos,scientific=FALSE)
# # DARS2$posS <- format(DARS2$posS,scientific=FALSE)

# # head(DARS2)
      # # Chromosome       pos      posS log10p_hamster_mito
# # 17036       chr1 169860000 169860001            6.292762
# # 17037       chr1 169870000 169870001            6.434747
# # 17038       chr1 169880000 169880001            6.436689
# # 17039       chr1 169890000 169890001            6.572945
# # 17040       chr1 169900000 169900001            6.712489
# # 17041       chr1 169910000 169910001            6.758370


# # write.table(DARS2, "DARS2.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)

# # # Place following header at top of DARS2.txt and use in bedGraph format on ucsc genome browser
# # browser position chr1:169860000-177860001
# # track type=bedGraph name="-log10P" description="DARS2" visibility=full color=0,0,255 altColor=255,0,0 priority=20

# # Custom track settings
# # Display mode: full
# # Type of graph: points
# # Track height: 128 pixels
# # Data view scaling: auto-scale to data view
# # Always include zero: ON
# # Vertical viewing range:  min: 0; max: 1000  (range: 0 to 1000) (greyed out)
# # Transform function: Transform data points by: NONE
# # Windowing function: mean
# # Smoothing window: OFF
# # Negate values: not selected
# # Draw y indicator lines: 
# # at y = 0.0: ON at y =12.1293 ON (corresponds to human_thresh_95.txt, for log10p_g_0nM)


# # Configure Image page on ucsc genome browser:
# # image width:	400	pixels
# # label area width:	7	characters	
# # text size: 12


# p5 <- ggdraw() + draw_image(magick::image_read_pdf("CTTNBP2_0nM.pdf", density = 300),scale=1.4) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)





# --------- (3) chr4, logP_ham_mito ----------------------------

# choose chr number
i <- 4


# id points by hand



## 2nd highest genome-wide coord for hamster mito
# plot(logP_ham_mito[,"coord"],logP_ham_mito[,"log10p_hamster_mito"],cex=0.1)
# identify(logP_ham_mito[,"coord"],logP_ham_mito[,"log10p_hamster_mito"],cex=0.1)
# [1] 78592

# logP_ham_mito[78592,]
       # Chromosome     coord     posS     posE      pos log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3
# 199896       chr4 782427755 92490000 93490000 92990000     3.632715     5.161592      7.623873      3.075413     7.612941   0.1155442   0.3077881   0.5773944
       # log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix log10p_hamster_mito coef_g_0nM coef_g_8nM coef_g_25nM coef_g_75nM coef_g_avg     coef_d_w1
# 199896   0.8212055   0.9506659    0.6329973     0.5275414            8.024453  -0.107954  -0.115248  -0.1307478  -0.1763353 -0.1325713 -0.0009322967
          # coef_d_w2    coef_d_w3    coef_d_w4    coef_d_w6   coef_d_avg   coef_g_d_Ix coef_hamster_mito
# 199896 -0.001844047 -0.002755798 -0.003667548 -0.005491049 -0.002938148 -0.0009117506      8.947832e-06


# Coefficients for this peak is positive <<<<<<<<<< use in paper



# START HERE if do not wish to do hand picking again
points_p3 <- c(78592)

logP_ham_mito_sub <- logP_ham_mito[points_p3,]
logP_ham_mito_sub <- cbind(geneSymbol="GRID2", logP_ham_mito_sub)



# takes few secs
labels_p3 <- logP_ham_mito_sub[,c("Chromosome","pos","log10p_hamster_mito","geneSymbol")]
labels_p3 <- labels_p3[order(labels_p3$pos),]
labels_p3$nudge_x <- 0
labels_p3$nudge_y <- 0
labels_p3[labels_p3$geneSymbol=="GRID2","nudge_x"] <- 2.2e-16 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977
labels_p3[labels_p3$geneSymbol=="GRID2","nudge_y"] <- 0.5



balloon_scale <- 0.5 # inflation factor for significant points	
size_point <- 0.1*(1 + balloon_scale*(logP_ham_mito[logP_ham_mito$Chromosome==paste0("chr",i),"log10p_hamster_mito"]/max(logP_ham_mito[logP_ham_mito$Chromosome==paste0("chr",i),"log10p_hamster_mito"]))) # scale significant points



p3 <- ggplot() + 
		geom_point(
			data = logP_ham_mito[logP_ham_mito$Chromosome==paste0("chr",i),], 
			size= size_point,
			stroke=0, 
				aes(
					x = pos/1e6, 
					y = log10p_hamster_mito, 
					color="as.factor(Chromosome)"
					)
				) +
		geom_text(
			data = labels_p3, 
			aes(x = pos/1e6, 
			y = log10p_hamster_mito,
			label=geneSymbol, 
			fontface = "italic"), 
			nudge_x=labels_p3$nudge_x, 
			nudge_y=labels_p3$nudge_y,  
			colour = "black", 
			size = 2.5
			) + # nudge_x and nudge_y gives warning, but seems to work
		# geom_text( aes(x = 18990000/1e6, y = 44.74028), label="IFFO2", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 51710000/1e6, y = 23.24700), label="OSBPL9", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 103260000/1e6, y = 54.41426), label="COL11A1", colour = "black", size = 3, nudge_x=12, nudge_y=2) +
		# geom_text( aes(x = 159590000/1e6, y = 20.71389), label="APCS", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 225540000/1e6, y = 18.84761), label="ENAH", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		scale_color_manual(values=cb1) +
		theme_odd +
		scale_x_continuous() +
		xlab(paste0("Chromosome ", i, " (Mb)")) + 
		scale_y_continuous(breaks=c(0,2,4,6,8),labels=c(0,2,4,6,8)) +
		ylab(expression('-log'[10]*italic('P'))) +
		geom_hline(yintercept= human_plus_hamster_mito_thresh_95["log10p_hamster_mito",], linetype="solid", color = "red", size=size_hline) +
		labs(subtitle="Hamster mitochondria")
print(p3)






# ------------- (4) line plot GRID2 copy number change vs hamster mitochondria copy number -----------------------

# cf g_loci_chr_1.R


gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}


# copy number data
RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)


# get rid of chrY, because no chrY seq in hamster genome
RH_human <- RH_human[RH_human$Chromosome != "chrY",]




# Read in and prepare ancillary tables 

cell <- read.table("cell_label_info.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)
human_sum_reads <- colSums(RH_human[,5:ncol(RH_human)])
human_total_reads <- read.table("RH_pool_human_total_align.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) # uses mapped human reads, cf human_AIC_1.R
hamster_mito <- read.table("hamster_mito_gseq.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) 



# maximum log10P hamster mitochondria is at row 78592 and corresponds to GRID2, cf hamster_mito_1.R and p3 above
logP_ham_mito[78592,]
        # Chromosome     coord     posS     posE      pos log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3
# 199896       chr4 782427755 92490000 93490000 92990000     3.632715     5.161592      7.623873      3.075413     7.612941   0.1155442   0.3077881   0.5773944
       # log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix log10p_hamster_mito coef_g_0nM coef_g_8nM coef_g_25nM coef_g_75nM coef_g_avg     coef_d_w1
# 199896   0.8212055   0.9506659    0.6329973     0.5275414            8.024453  -0.107954  -0.115248  -0.1307478  -0.1763353 -0.1325713 -0.0009322967
          # coef_d_w2    coef_d_w3    coef_d_w4    coef_d_w6   coef_d_avg   coef_g_d_Ix coef_hamster_mito
# 199896 -0.001844047 -0.002755798 -0.003667548 -0.005491049 -0.002938148 -0.0009117506      8.947832e-06




RH_human[78592,]
      # Chromosome     posS     posE      pos RH1_w0_d0 RH1_w1_d0 RH1_w1_d8 RH1_w1_d25 RH1_w1_d75 RH1_w2_d0 RH1_w2_d8 RH1_w2_d25 RH1_w3_d0 RH1_w3_d8 RH1_w3_d25
# 78592       chr4 92490000 93490000 92990000        57        45        43         37         39        25        15         14        30        20         28
      # RH1_w3_d75 RH1_w4_d0 RH1_w4_d8 RH1_w4_d25 RH1_w4_d75 RH1_w6_d0 RH1_w6_d8 RH1_w6_d25 RH1_w6_d75 RH2_w0_d0 RH2_w1_d0 RH2_w1_d8 RH2_w1_d25 RH2_w1_d75
# 78592         12        49        19         31          5       104        81         62         11       131       117       104        101         81
      # RH2_w2_d0 RH2_w2_d8 RH2_w2_d25 RH2_w3_d0 RH2_w3_d8 RH2_w3_d25 RH2_w3_d75 RH2_w4_d0 RH2_w4_d8 RH2_w4_d25 RH2_w6_d0 RH2_w6_d8 RH2_w6_d25 RH2_w6_d75
# 78592        67        60         51        82        64        129        232        84        47        148       125        79        276        197
      # RH3_w0_d0 RH3_w1_d0 RH3_w1_d8 RH3_w1_d25 RH3_w1_d75 RH3_w2_d0 RH3_w2_d8 RH3_w2_d25 RH3_w3_d0 RH3_w3_d8 RH3_w3_d25 RH3_w3_d75 RH3_w4_d0 RH3_w4_d8
# 78592       151       111        93         78        103        87        97         84        67       119        114         54        95        94
      # RH3_w4_d25 RH3_w4_d75 RH3_w6_d0 RH3_w6_d8 RH3_w6_d25 RH3_w6_d75 RH4_w0_d0 RH4_w1_d0 RH4_w1_d8 RH4_w1_d25 RH4_w1_d75 RH4_w2_d0 RH4_w2_d8 RH4_w2_d25
# 78592        126         20        63        80         70          5       191       105       131        111        121        82        55         57
      # RH4_w3_d0 RH4_w3_d8 RH4_w3_d25 RH4_w3_d75 RH4_w4_d0 RH4_w4_d8 RH4_w4_d25 RH4_w4_d75 RH4_w6_d0 RH4_w6_d8 RH4_w6_d25 RH4_w6_d75 RH5_w0_d0 RH5_w1_d0
# 78592        30        42         27         52        25        16         21         56        16        14         15         41        67        38
      # RH5_w1_d8 RH5_w1_d25 RH5_w2_d0 RH5_w2_d8 RH5_w2_d25 RH5_w3_d0 RH5_w3_d8 RH5_w3_d25 RH5_w3_d75 RH5_w4_d0 RH5_w4_d8 RH5_w4_d25 RH5_w6_d0 RH5_w6_d8
# 78592        56         46        47        22         41        40        39         52         94        28        26         24        38        18
      # RH5_w6_d25 RH5_w6_d75 RH6_w0_d0 RH6_w1_d0 RH6_w1_d8 RH6_w1_d25 RH6_w2_d0 RH6_w2_d8 RH6_w2_d25 RH6_w3_d0 RH6_w3_d8 RH6_w3_d25 RH6_w3_d75 RH6_w4_d0
# 78592         33          7        59        48        43         15        43        41         21        15        25         18         34        16
      # RH6_w4_d8 RH6_w4_d25 RH6_w6_d0 RH6_w6_d8 RH6_w6_d25 RH6_w6_d75
# 78592        15         17        21        38         12         11





RH_human_sub <- merge(logP_ham_mito[78592,c("Chromosome", "posS", "posE", "pos")], RH_human)
RH_human_sub <- cbind(geneSymbol ="GRID2",RH_human_sub)


# Prepare for mgcv::gam

RH_human_sub_l <- reshape(RH_human_sub[c(1,6:ncol(RH_human_sub))], 
  varying = c(colnames(RH_human_sub[c(6:ncol(RH_human_sub))])), 
  v.names = "copy",
  timevar = "RH_ID", 
  times = c(colnames(RH_human_sub[c(6:ncol(RH_human_sub))])), 
  new.row.names = 1:1e6,
  direction = "long")
  
RH_human_sub_l$week <- 0
RH_human_sub_l[grepl("_w0_",RH_human_sub_l$RH_ID),]$week <- 0
RH_human_sub_l[grepl("_w1_",RH_human_sub_l$RH_ID),]$week <- 1
RH_human_sub_l[grepl("_w2_",RH_human_sub_l$RH_ID),]$week <- 2
RH_human_sub_l[grepl("_w3_",RH_human_sub_l$RH_ID),]$week <- 3
RH_human_sub_l[grepl("_w4_",RH_human_sub_l$RH_ID),]$week <- 4
RH_human_sub_l[grepl("_w6_",RH_human_sub_l$RH_ID),]$week <- 6

RH_human_sub_l$conc <- 0
RH_human_sub_l[grepl("_d0",RH_human_sub_l$RH_ID),]$conc <- 0
RH_human_sub_l[grepl("_d8",RH_human_sub_l$RH_ID),]$conc <- 8
RH_human_sub_l[grepl("_d25",RH_human_sub_l$RH_ID),]$conc <- 25
RH_human_sub_l[grepl("_d75",RH_human_sub_l$RH_ID),]$conc <- 75

RH_human_sub_l$pool <- 0
RH_human_sub_l[grepl("RH1_",RH_human_sub_l$RH_ID),]$pool <- 1
RH_human_sub_l[grepl("RH2_",RH_human_sub_l$RH_ID),]$pool <- 2
RH_human_sub_l[grepl("RH3_",RH_human_sub_l$RH_ID),]$pool <- 3
RH_human_sub_l[grepl("RH4_",RH_human_sub_l$RH_ID),]$pool <- 4
RH_human_sub_l[grepl("RH5_",RH_human_sub_l$RH_ID),]$pool <- 5
RH_human_sub_l[grepl("RH6_",RH_human_sub_l$RH_ID),]$pool <- 6




RH_human_sub_l <- merge(RH_human_sub_l,cell)
RH_human_sub_l <- merge(RH_human_sub_l, hamster_mito[,c("RH_ID","pool","conc","week","cell","hamster_mito_reads")])
RH_human_sub_l$human_sum_reads <- human_sum_reads[RH_human_sub_l$RH_ID]
RH_human_sub_l  <- merge(RH_human_sub_l, human_total_reads[,c(1:5,9)])
colnames(RH_human_sub_l)[11] <- "human_total_reads"


RH_human_sub_l$pool <- as.factor(RH_human_sub_l$pool)
RH_human_sub_l$cell <- as.factor(RH_human_sub_l$cell)


# order by coef, useful for multiple genes, though here we only have one
gene_levels <-  merge(RH_human_sub,logP_ham_mito[,c("Chromosome","posS","posE","pos","coef_hamster_mito")],all.x=TRUE)
gene_levels <- gene_levels[order(+gene_levels$coef_hamster_mito),]
RH_human_sub_l$geneSymbol <- factor(RH_human_sub_l$geneSymbol,levels=gene_levels$geneSymbol)



RH_human_sub_l$phat = as.numeric(NA)
RH_human_sub_l$phat_se = as.numeric(NA)
RH_human_sub_l$phat_center = as.numeric(NA)
RH_human_sub_l$log_copy_center = as.numeric(NA)

# get effect of hamster mitochondria at origin
RH_human_sub_l$week <- 0
RH_human_sub_l$conc <- 0



# loop below useful when coloring by (multiple) gene(s). cf g_loci_chr_1.R, d_loci_chr_1.R
# here only one gene, hence loop acts once. Instead color by pool (see below).

for (i in c(1:1)) {
	
# at week == 0 and conc == 0, model below equivalent to:
# m1_nb <- gam(copy ~ hamster_mito_reads + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(human_total_reads)), data = subset(RH_human_sub_l, RH_human_sub_l$id == i), family = nb, method = "REML")

m1_nb <- gam(copy ~ week * conc + hamster_mito_reads + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(human_total_reads)), data = subset(RH_human_sub_l, RH_human_sub_l$id == i), family = nb, method = "REML")

RH_human_sub_l[RH_human_sub_l$id == i,]$phat <- predict(m1_nb, se.fit=TRUE,type="link")$fit
RH_human_sub_l[RH_human_sub_l$id == i,]$phat_se <- predict(m1_nb, se.fit=TRUE,type="link")$se.fit

# find intercept of phat using lm and use to center phat and log_copy_center. phat already on natural log scale.
RH_human_sub_l[RH_human_sub_l$id == i,]$phat_center <- RH_human_sub_l[RH_human_sub_l$id == i,]$phat-lm(phat~hamster_mito_reads,data=RH_human_sub_l[RH_human_sub_l$id == i,])$coefficients["(Intercept)"]
RH_human_sub_l[RH_human_sub_l$id == i,]$log_copy_center <- log(RH_human_sub_l[RH_human_sub_l$id == i,]$copy)-lm(phat~hamster_mito_reads,data=RH_human_sub_l[RH_human_sub_l$id == i,])$coefficients["(Intercept)"]

}



# # color by gene
# n = length(unique(RH_human_sub_l$geneSymbol))
# colores_1 = gg_color_hue(n)
# names(colores_1) <- gene_levels$geneSymbol



# color by pool
n = length(unique(RH_human_sub_l$pool))
colores_1 = gg_color_hue(n)
names(colores_1) <- levels(RH_human_sub_l$pool)




p4 <- ggplot() + 
			theme(legend.key=element_blank()) +
			geom_point(
				data=RH_human_sub_l,
				shape=1,
				stroke=0.4,
				size=1.0,
				aes(
					x= hamster_mito_reads, 
					y=log2(exp(1))*log_copy_center, 
					colour=pool
					)
				) +
		    geom_smooth(
			    data= RH_human_sub_l, 
			    method = "lm",  
			    	formula=	 y~0+x,
			    	fullrange=TRUE,
			    aes(
				    	x= hamster_mito_reads,
				    	y=log2(exp(1))*phat_center,
				    group=geneSymbol#,
				    #colour=geneSymbol,
				    # fill=geneSymbol
				    ),
			    colour="red",
			    se=TRUE,
			    level=0.95,
			    size=0.3,
			    fill="grey",
			    alpha=0.3
			    ) + 
		    scale_color_manual(
				values=colores_1,
				name ="Pool", 
				labels=levels(RH_human_sub_l[,"pool"])
				) +
			guides(
				# shape=1,
				# fill=FALSE,
		 		colour = guide_legend(
					 		override.aes = list(
						 		fill=NA,
						 		shape=1,
						 		size=1
					 		),
				 		ncol=1,
				 		byrow=TRUE
				 		)
		 		) +
			theme_even +
		   coord_cartesian(xlim=c(0,3e5)) +
		   scale_x_continuous(
			   expand = expand_scale(add = .1e5), 
			   limits=c(0,3e5),
			   breaks = c(0,1e5,2e5,3e5), 
			   labels = c(0,1,2,3)
			   ) +
			xlab(expression(Mito~reads~(""%*%~10^-5))) + 
			ylab(expression(Delta*log[2]~(Human~italic(GRID2)~reads))) + 
			labs(subtitle="Hamster mitochondria")
print(p4)









#----------------- Prepare HUMAN mito logP ---------------------



logP_hum_mito <- read.table("log10P_human_plus_human_mito.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)



# Sort:
chrOrder<-paste("chr",c(1:22,"X"),sep="")
logP_hum_mito$Chromosome <- factor(logP_hum_mito$Chromosome, levels=chrOrder)
logP_hum_mito <- logP_hum_mito[order(logP_hum_mito$Chromosome, logP_hum_mito$pos), ]
logP_hum_mito$Chromosome <- as.character(logP_hum_mito$Chromosome)




# # Transform chr1 etc. to numbers
# logP_hum_mito$Chromosome <- gsub('chr', '', logP_hum_mito$Chromosome)
# logP_hum_mito[logP_hum_mito$Chromosome == "X","Chromosome"] <- 23
# chrOrder<-c(1:23)
# logP_hum_mito$Chromosome <- factor(logP_hum_mito$Chromosome, levels=chrOrder)
# logP_hum_mito <- logP_hum_mito[order(logP_hum_mito$Chromosome, logP_hum_mito$pos), ]
# logP_hum_mito$Chromosome <- as.numeric(logP_hum_mito$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=logP_hum_mito)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]


# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-23])

# merge genome coordinates with logP_hum_mito
logP_hum_mito <- merge(logP_hum_mito,gen_coord[,c("Chromosome","coord")])
logP_hum_mito$Chromosome <-factor(logP_hum_mito$Chromosome, levels=chrOrder)
logP_hum_mito <- logP_hum_mito[order(logP_hum_mito$Chromosome, logP_hum_mito$pos), ]


logP_hum_mito$coord <- logP_hum_mito$pos + logP_hum_mito$coord




# # find midpoints of chromosomes for breaks in ggplot
# mid <- function(x) {(max(x)+min(x))/2}
# chr_mid <- aggregate(coord~Chromosome,FUN = mid,data=logP)
# colnames(chr_mid)[2] <- "mid"
# chr_mid$Chromosome <-factor(chr_mid$Chromosome, levels=chrOrder)
# chr_mid <- chr_mid[order(chr_mid$Chromosome), ]
# chr_mid$Chromosome <- as.numeric(chr_mid$Chromosome)

# # Define breaks as mid-points chromosomes
# breaks <- chr_mid$mid


# # attractive grey and skyblue color scheme
# cb1<-rep(c("grey", "skyblue"), 12)


# standard black color scheme
cb1<-rep(c("black", "black"), 12)



# labels <- as.character(c(1:9,"",11,"",13,"","",16,"","","",20,"","","X"))


# ------------------------------ fdr ----------------------------------------------

# Add fdr column
logP_hum_mito$log10q_human_mito <- -log10(p.adjust(10^{-logP_hum_mito$log10p_human_mito}))

# find fdr threshold
fdr_thresh_hum <- logP_hum_mito[logP_hum_mito$log10q_human_mito >= -log10(0.05),][which.min(logP_hum_mito[logP_hum_mito$log10q_human_mito >= -log10(0.05),"log10q_human_mito"]),"log10p_human_mito"]



# --------- (5) chr3, logP_hum_mito ----------------------------

# choose chr number
i <- 3


# id points by hand

## max genome-wide coord for human mito
# plot(logP_hum_mito[,"coord"],logP_hum_mito[,"log10p_human_mito"],cex=0.1)
# identify(logP_hum_mito[,"coord"],logP_hum_mito[,"log10p_human_mito"],cex=0.1)
# [1] 50888

# logP_hum_mito[50888,]
      # Chromosome     posS     posE      pos log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix log10p_human_mito
# 50888       chr3 14740000 15740000 15240000     9.966099     13.11474      15.32606      3.464472       14.815   0.4351296     0.61558   0.7880651   0.8060612   0.5503856    0.8075419    0.06951203          8.574216
      # coef_g_0nM coef_g_8nM coef_g_25nM coef_g_75nM coef_g_avg    coef_d_w1    coef_d_w2    coef_d_w3    coef_d_w4   coef_d_w6   coef_d_avg   coef_g_d_Ix coef_human_mito
# 50888 -0.1693058 -0.1705389  -0.1731593  -0.1808662 -0.1734676 -0.002450295 -0.002604434 -0.002758573 -0.002912712 -0.00322099 -0.002789401 -0.0001541389      0.00162057


# START HERE if do not wish to do hand picking again
points_p5 <- c(50888)

logP_hum_mito_sub <- logP_hum_mito[points_p5,]
logP_hum_mito_sub <- cbind(geneSymbol="CAPN7/SH3BP5", logP_hum_mito_sub)



# takes few secs
labels_p5 <- logP_hum_mito_sub[,c("Chromosome","pos","log10p_human_mito","geneSymbol")]
labels_p5 <- labels_p5[order(labels_p5$pos),]
labels_p5$nudge_x <- 0
labels_p5$nudge_y <- 0
# labels_p5[labels_p5$geneSymbol=="CAPN7/SH3BP5","nudge_x"] <- 2.2e-16 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977
labels_p5[labels_p5$geneSymbol=="CAPN7/SH3BP5","nudge_x"] <- 15 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977
labels_p5[labels_p5$geneSymbol=="CAPN7/SH3BP5","nudge_y"] <- 0.5 # cannot use 0 as 1st vector member because bug in ggplot2, cf https://github.com/tidyverse/ggplot2/issues/2977



balloon_scale <- 0.5 # inflation factor for significant points	
size_point <- 0.1*(1 + balloon_scale*(logP_ham_mito[logP_ham_mito$Chromosome==paste0("chr",i),"log10p_hamster_mito"]/max(logP_ham_mito[logP_ham_mito$Chromosome==paste0("chr",i),"log10p_hamster_mito"]))) # scale significant points



p5 <- ggplot() + 
		geom_point(
			data = logP_hum_mito[logP_hum_mito$Chromosome==paste0("chr",i),], 
			size= size_point,
			stroke=0, 
				aes(
					x = pos/1e6, 
					y = log10p_human_mito, 
					color="as.factor(Chromosome)"
					)
				) +
		geom_text(
			data = labels_p5, 
			aes(x = pos/1e6, 
			y = log10p_human_mito,
			label=geneSymbol, 
			fontface = "italic"), 
			nudge_x=labels_p5$nudge_x, 
			nudge_y=labels_p5$nudge_y,  
			colour = "black", 
			size = 2.5
			) + # nudge_x and nudge_y gives warning, but seems to work
		# geom_text( aes(x = 18990000/1e6, y = 44.74028), label="IFFO2", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 51710000/1e6, y = 23.24700), label="OSBPL9", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 103260000/1e6, y = 54.41426), label="COL11A1", colour = "black", size = 3, nudge_x=12, nudge_y=2) +
		# geom_text( aes(x = 159590000/1e6, y = 20.71389), label="APCS", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		# geom_text( aes(x = 225540000/1e6, y = 18.84761), label="ENAH", colour = "black", size = 3, nudge_x=0, nudge_y=2) +
		scale_color_manual(values=cb1) +
		theme_odd +
		scale_x_continuous() +
		xlab(paste0("Chromosome ", i, " (Mb)")) + 
		coord_cartesian(ylim=c(0,9)) +
		scale_y_continuous(breaks=c(0,2,4,6,8),labels=c(0,2,4,6,8)) +
		ylab(expression('-log'[10]*italic('P'))) +
		geom_hline(yintercept= fdr_thresh_hum, linetype="solid", color = "red", size=size_hline) +
		labs(subtitle="Human mitochondria")
print(p5)



# ------------- (6) line plot copy number change vs human mitochondria copy number -----------------------

# cf g_loci_chr_1.R


gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}


# copy number data
RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)


# get rid of chrY, because no chrY seq in human genome
RH_human <- RH_human[RH_human$Chromosome != "chrY",]




# Read in and prepare ancillary tables 

cell <- read.table("cell_label_info.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)
human_sum_reads <- colSums(RH_human[,5:ncol(RH_human)])
human_total_reads <- read.table("RH_pool_human_total_align.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) # uses mapped human reads, cf human_AIC_1.R
human_mito <- read.table("human_mito_gseq.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) 



# maximum log10P human mitochondria is at row 50888 and corresponds to CAPN7/SH3BP5, cf human_mito_1.R and p1 above
logP_hum_mito[which.max(logP_hum_mito$log10p_human_mito),]
      # Chromosome     posS     posE      pos log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix log10p_human_mito
# 50888       chr3 14740000 15740000 15240000     9.966099     13.11474      15.32606      3.464472       14.815   0.4351296     0.61558   0.7880651   0.8060612   0.5503856    0.8075419    0.06951203          8.574216
      # coef_g_0nM coef_g_8nM coef_g_25nM coef_g_75nM coef_g_avg    coef_d_w1    coef_d_w2    coef_d_w3    coef_d_w4   coef_d_w6   coef_d_avg   coef_g_d_Ix coef_human_mito     coord
# 50888 -0.1693058 -0.1705389  -0.1731593  -0.1808662 -0.1734676 -0.002450295 -0.002604434 -0.002758573 -0.002912712 -0.00322099 -0.002789401 -0.0001541389      0.00162057 506384975




RH_human[50888,]
      # Chromosome     posS     posE      pos RH1_w0_d0 RH1_w1_d0 RH1_w1_d8 RH1_w1_d25 RH1_w1_d75 RH1_w2_d0 RH1_w2_d8 RH1_w2_d25 RH1_w3_d0 RH1_w3_d8 RH1_w3_d25 RH1_w3_d75 RH1_w4_d0 RH1_w4_d8 RH1_w4_d25 RH1_w4_d75
# 50888       chr3 14740000 15740000 15240000        59        57        46         50         37        41        26         20        41        33         24         14        32        19         14          2
      # RH1_w6_d0 RH1_w6_d8 RH1_w6_d25 RH1_w6_d75 RH2_w0_d0 RH2_w1_d0 RH2_w1_d8 RH2_w1_d25 RH2_w1_d75 RH2_w2_d0 RH2_w2_d8 RH2_w2_d25 RH2_w3_d0 RH2_w3_d8 RH2_w3_d25 RH2_w3_d75 RH2_w4_d0 RH2_w4_d8 RH2_w4_d25 RH2_w6_d0
# 50888        38        43         22          5       317       492       478        364        256       367       423         94       514       349        327        325       236       134        110       126
      # RH2_w6_d8 RH2_w6_d25 RH2_w6_d75 RH3_w0_d0 RH3_w1_d0 RH3_w1_d8 RH3_w1_d25 RH3_w1_d75 RH3_w2_d0 RH3_w2_d8 RH3_w2_d25 RH3_w3_d0 RH3_w3_d8 RH3_w3_d25 RH3_w3_d75 RH3_w4_d0 RH3_w4_d8 RH3_w4_d25 RH3_w4_d75 RH3_w6_d0
# 50888        95         41         91       114        24        16         20         24        14        25         15        48        19         75         34        32        15         16         15        57
      # RH3_w6_d8 RH3_w6_d25 RH3_w6_d75 RH4_w0_d0 RH4_w1_d0 RH4_w1_d8 RH4_w1_d25 RH4_w1_d75 RH4_w2_d0 RH4_w2_d8 RH4_w2_d25 RH4_w3_d0 RH4_w3_d8 RH4_w3_d25 RH4_w3_d75 RH4_w4_d0 RH4_w4_d8 RH4_w4_d25 RH4_w4_d75 RH4_w6_d0
# 50888       173         15         11       168        53        63         65         50        50        33         42        89        88         73         58        29        39         26         19        66
      # RH4_w6_d8 RH4_w6_d25 RH4_w6_d75 RH5_w0_d0 RH5_w1_d0 RH5_w1_d8 RH5_w1_d25 RH5_w2_d0 RH5_w2_d8 RH5_w2_d25 RH5_w3_d0 RH5_w3_d8 RH5_w3_d25 RH5_w3_d75 RH5_w4_d0 RH5_w4_d8 RH5_w4_d25 RH5_w6_d0 RH5_w6_d8 RH5_w6_d25
# 50888       182         20         54       107        77        88         78        74        59         83        57        61         54         79        54        48         35        39        15         37
      # RH5_w6_d75 RH6_w0_d0 RH6_w1_d0 RH6_w1_d8 RH6_w1_d25 RH6_w2_d0 RH6_w2_d8 RH6_w2_d25 RH6_w3_d0 RH6_w3_d8 RH6_w3_d25 RH6_w3_d75 RH6_w4_d0 RH6_w4_d8 RH6_w4_d25 RH6_w6_d0 RH6_w6_d8 RH6_w6_d25 RH6_w6_d75
# 50888         10        77        67        94         65        85        81         61        58        92         61         55        56        72         78        73        84         33         40




RH_human_sub <- merge(logP_ham_mito[50888,c("Chromosome", "posS", "posE", "pos")], RH_human)
RH_human_sub <- cbind(geneSymbol ="CAPN7\nSH3BP5",RH_human_sub)


# Prepare for mgcv::gam

RH_human_sub_l <- reshape(RH_human_sub[c(1,6:ncol(RH_human_sub))], 
  varying = c(colnames(RH_human_sub[c(6:ncol(RH_human_sub))])), 
  v.names = "copy",
  timevar = "RH_ID", 
  times = c(colnames(RH_human_sub[c(6:ncol(RH_human_sub))])), 
  new.row.names = 1:1e6,
  direction = "long")
  
RH_human_sub_l$week <- 0
RH_human_sub_l[grepl("_w0_",RH_human_sub_l$RH_ID),]$week <- 0
RH_human_sub_l[grepl("_w1_",RH_human_sub_l$RH_ID),]$week <- 1
RH_human_sub_l[grepl("_w2_",RH_human_sub_l$RH_ID),]$week <- 2
RH_human_sub_l[grepl("_w3_",RH_human_sub_l$RH_ID),]$week <- 3
RH_human_sub_l[grepl("_w4_",RH_human_sub_l$RH_ID),]$week <- 4
RH_human_sub_l[grepl("_w6_",RH_human_sub_l$RH_ID),]$week <- 6

RH_human_sub_l$conc <- 0
RH_human_sub_l[grepl("_d0",RH_human_sub_l$RH_ID),]$conc <- 0
RH_human_sub_l[grepl("_d8",RH_human_sub_l$RH_ID),]$conc <- 8
RH_human_sub_l[grepl("_d25",RH_human_sub_l$RH_ID),]$conc <- 25
RH_human_sub_l[grepl("_d75",RH_human_sub_l$RH_ID),]$conc <- 75

RH_human_sub_l$pool <- 0
RH_human_sub_l[grepl("RH1_",RH_human_sub_l$RH_ID),]$pool <- 1
RH_human_sub_l[grepl("RH2_",RH_human_sub_l$RH_ID),]$pool <- 2
RH_human_sub_l[grepl("RH3_",RH_human_sub_l$RH_ID),]$pool <- 3
RH_human_sub_l[grepl("RH4_",RH_human_sub_l$RH_ID),]$pool <- 4
RH_human_sub_l[grepl("RH5_",RH_human_sub_l$RH_ID),]$pool <- 5
RH_human_sub_l[grepl("RH6_",RH_human_sub_l$RH_ID),]$pool <- 6




RH_human_sub_l <- merge(RH_human_sub_l,cell)
RH_human_sub_l <- merge(RH_human_sub_l, human_mito[,c("RH_ID","pool","conc","week","cell","human_mito_reads")])
RH_human_sub_l$human_sum_reads <- human_sum_reads[RH_human_sub_l$RH_ID]
RH_human_sub_l  <- merge(RH_human_sub_l, human_total_reads[,c(1:5,9)])
colnames(RH_human_sub_l)[11] <- "human_total_reads"


RH_human_sub_l$pool <- as.factor(RH_human_sub_l$pool)
RH_human_sub_l$cell <- as.factor(RH_human_sub_l$cell)


# order by coef, useful for multiple genes, though here we only have one
gene_levels <-  merge(RH_human_sub,logP_hum_mito[,c("Chromosome","posS","posE","pos","coef_human_mito")],all.x=TRUE)
gene_levels <- gene_levels[order(+gene_levels$coef_human_mito),]
RH_human_sub_l$geneSymbol <- factor(RH_human_sub_l$geneSymbol,levels=gene_levels$geneSymbol)



RH_human_sub_l$phat = as.numeric(NA)
RH_human_sub_l$phat_se = as.numeric(NA)
RH_human_sub_l$phat_center = as.numeric(NA)
RH_human_sub_l$log_copy_center = as.numeric(NA)

# get effect of human mitochondria at origin
RH_human_sub_l$week <- 0
RH_human_sub_l$conc <- 0



# loop below useful when coloring by (multiple) gene(s). cf g_loci_chr_1.R, d_loci_chr_1.R
# here only one gene, hence loop acts once. Instead color by pool (see below).

for (i in c(1:1)) {
	
# at week == 0 and conc == 0, model below equivalent to:
# m1_nb <- gam(copy ~ human_mito_reads + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(human_total_reads)), data = subset(RH_human_sub_l, RH_human_sub_l$id == i), family = nb, method = "REML")

m1_nb <- gam(copy ~ week * conc + human_mito_reads + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(human_total_reads)), data = subset(RH_human_sub_l, RH_human_sub_l$id == i), family = nb, method = "REML")

RH_human_sub_l[RH_human_sub_l$id == i,]$phat <- predict(m1_nb, se.fit=TRUE,type="link")$fit
RH_human_sub_l[RH_human_sub_l$id == i,]$phat_se <- predict(m1_nb, se.fit=TRUE,type="link")$se.fit

# find intercept of phat using lm and use to center phat and log_copy_center. phat already on natural log scale.
RH_human_sub_l[RH_human_sub_l$id == i,]$phat_center <- RH_human_sub_l[RH_human_sub_l$id == i,]$phat-lm(phat~human_mito_reads,data=RH_human_sub_l[RH_human_sub_l$id == i,])$coefficients["(Intercept)"]
RH_human_sub_l[RH_human_sub_l$id == i,]$log_copy_center <- log(RH_human_sub_l[RH_human_sub_l$id == i,]$copy)-lm(phat~human_mito_reads,data=RH_human_sub_l[RH_human_sub_l$id == i,])$coefficients["(Intercept)"]

}



# # color by gene
# n = length(unique(RH_human_sub_l$geneSymbol))
# colores_1 = gg_color_hue(n)
# names(colores_1) <- gene_levels$geneSymbol



# color by pool
n = length(unique(RH_human_sub_l$pool))
colores_1 = gg_color_hue(n)
names(colores_1) <- levels(RH_human_sub_l$pool)




p6 <- ggplot() + 
			geom_point(
				data=RH_human_sub_l,
				shape=1,
				stroke=0.4,
				size=1.0,
				aes(
					x= human_mito_reads, 
					y=log2(exp(1))*log_copy_center, 
					colour=pool
					)
				) +
		    geom_smooth(
			    data= RH_human_sub_l, 
			    method = "lm",  
			    	formula=	 y~0+x,
			    	fullrange=TRUE,
			    aes(
				    	x= human_mito_reads,
				    	y=log2(exp(1))*phat_center,
				    group=geneSymbol#,
				    #colour=geneSymbol,
				    # fill=geneSymbol
				    ),
			    colour="red",
			    se=TRUE,
			    level=0.95,
			    size=0.3,
			    fill="grey",
			    alpha=0.3
			    ) + 
		    scale_color_manual(
				values=colores_1,
				name ="Pool", 
				labels=levels(RH_human_sub_l[,"pool"])
				) +
			guides(
				# shape=1,
				# fill=FALSE,
		 		colour = guide_legend(
					 		override.aes = list(
						 		fill=NA,
						 		shape=1,
						 		size=1
					 		),
				 		ncol=1,
				 		byrow=TRUE
				 		)
		 		) +
			theme_even +
		   # coord_cartesian(xlim=c(0,3e5)) +
		   # scale_x_continuous(
			   # expand = expand_scale(add = .1e5), 
			   # limits=c(0,3e5),
			   # breaks = c(0,1e5,2e5,3e5), 
			   # labels = c(0,0.1,0.2,0.3)
			   # ) +
			xlab("Mito reads") + 
			ylab(expression(Delta*log[2]~(Human~italic(CAPN7)~reads))) + 
			labs(subtitle="Human mitochondria")
print(p6)






# --------------- Combine panels ---------------------

# warning msgs because of nudge_x, nudge_y in p1, p2, p3, but are due to bug in ggplot 2 (https://github.com/tidyverse/ggplot2/issues/2977) and are inconsequential.



pdf("mito_loci_1.pdf",width=7.5,height=10, useDingbats=FALSE)
plot_grid(p1, p2, p3, p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E", "F"), label_size = 16, align="h")
dev.off()



tiff("mito_loci_1.tif",width=7.5,height=10,units="in",res=300)
plot_grid(p1, p2, p3, p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E", "F"), label_size = 16, align="h")
dev.off()




# if smaller file size required:
# File size ~0.3 Mb
png("mito_loci_1.png",width=7.5,height=10,units="in",res=300)
plot_grid(p1, p2, p3, p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E", "F"), label_size = 16, align="h")
dev.off()



png("mito_loci_hi_res_1.png",width=7.5,height=10,units="in",res=1200)
plot_grid(p1, p2, p3, p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E", "F"), label_size = 16, align="h")
dev.off()


















