library(ggplot2)
library(cowplot)
library(magick)


#####################################################
################# DO NOT DELETE #####################
#####################################################

# Plot retention of CENs as zoom-in


#-----------------HUMAN retain mean RH pools---------------------

# # # If desired, and if RH_pool_retent_ratio.txt is available, begin from START HERE to save time.

# # read in RH human sequence reads:
# RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# # Get rows at beginning of each chromosome:
# RH_human_start <- RH_human[RH_human$posS == 0 & RH_human$posE == 1e6,]

# # Get rid of ramp ups and ramp downs:
# RH_human <- RH_human[c(0,diff(RH_human$pos)) == 1e4,]

# # combine RH_human without ramps and RH_human_start:
# RH_human <- rbind(RH_human_start,RH_human)

# # Sort:
# chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
# RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
# RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
# RH_human$Chromosome <- as.character(RH_human$Chromosome)



# # Mean human reads for RH pools:
# RH_human$read_ratio <- rowMeans(apply(RH_human[,c(grep("_w0_d0$",colnames(RH_human)))],2,FUN=function(x) {x/mean(x)}))



# #  find middle TK1

# gencode_gtf_ensembl_ucsc <- read.delim("gencode_gtf_ensembl_ucsc_v31.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)


# gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1",]
      # # geneSymbol           tx_id Chromosome    geneS    geneE strand         gene_id      gene_type geneLength txLength cdsLength                                       gene_description    5utrS    5utrE
# # 54647        TK1 ENST00000588734      chr17 78174075 78187233      - ENSG00000167900 protein_coding      13159     1681       804 thymidine kinase 1 [Source:HGNC Symbol;Acc:HGNC:11830] 78186995 78187233
      # # 5utrDiff    3utrS    3utrE 3utrDiff exonCount
# # 54647      239 78174121 78174758      638         6


# TK1_coord <- mean(gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1","geneS"],gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1","geneE"])



# #  find peak TK1 reads 

# # For more accurate search of TK1 peak, restrict search to within delta of TK1, because CEN and TEL becomes larger than TK1 in some samples. (Though not RH pools; other higher peaks usually occur with later time and higher conc samples.) In fact, in RH pools TK1 is highest peak on whole of chr17. But for generality use this approach with delta_1.

# delta_1 <- 1e6

# TK1_max <- max(RH_human[RH_human$Chromosome=="chr17" & RH_human$pos >= TK1_coord-delta_1 & RH_human$pos <= TK1_coord+delta_1,"read_ratio"])


# TK1_max
# # [1] 10.58925


# # Normalize using peak copy number at TK1:
# RH_human$read_ratio <- RH_human$read_ratio/TK1_max


# # If desired can normalize using simpler approach of finding max seq reads on chr17:
# # RH_human$read_ratio <- RH_human$read_ratio/max(RH_human[RH_human$Chromosome == "chr17","read_ratio"])




# # # Transform chr1 etc. to numbers
# # RH_human$Chromosome <- gsub('chr', '', RH_human$Chromosome)
# # RH_human[RH_human$Chromosome == "X","Chromosome"] <- 23
# # RH_human[RH_human$Chromosome == "Y","Chromosome"] <- 24

# chrOrder <- paste0("chr",c(1:22,"X","Y"))
# RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
# RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
# RH_human$Chromosome <- as.character(RH_human$Chromosome)

# # Compute chromosome size
# gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_human)
# colnames(gen_coord)[2] <- "chr_size"
# gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
# gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
# gen_coord$Chromosome <- as.character(gen_coord$Chromosome)

# # Use cumsum to make genome coordinates
# gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# # merge genome coordinates with RH_human
# RH_human <- merge(RH_human,gen_coord[,c("Chromosome","coord")])
# chrOrder <- paste0("chr",c(1:22,"X","Y"))
# RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
# RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
# RH_human$Chromosome <- as.character(RH_human$Chromosome)

# RH_human$coord <- RH_human$pos + RH_human$coord

# # get rid of chrY
# RH_human <- RH_human[RH_human$Chromosome != "chrY",]



# RH_pool_retent <- RH_human[,c("Chromosome","pos","read_ratio")]


# dim(RH_pool_retent)
# # [1] 300814    3

# RH_pool_retent$posS <- RH_pool_retent$pos + 1
# RH_pool_retent <- RH_pool_retent[,c("Chromosome","pos","posS","read_ratio")]

# RH_pool_retent$pos <- format(RH_pool_retent$pos,scientific=FALSE)
# RH_pool_retent$posS <- format(RH_pool_retent$posS,scientific=FALSE)
# # RH_pool_retent$read_ratio <- round(RH_pool_retent$read_ratio,4)

# head(RH_pool_retent)
  # # Chromosome       pos      posS read_ratio
# # 1       chr1    500000    500001 0.07535384
# # 2       chr1    510000    510001 0.07557400
# # 3       chr1    520000    520001 0.07256259
# # 4       chr1    530000    530001 0.07255724
# # 5       chr1    540000    540001 0.07253440
# # 6       chr1    550000    550001 0.07205277




# # write.table(RH_pool_retent, "RH_pool_retent_ratio.txt",quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)


# # # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# # browser position chr17:1-100000000
# # track type=bedGraph name="Copy" description="chr17" visibility=full color=0,0,255 altColor=255,0,0 priority=20





# --------- Custom UCSC track settings for all -----------------

# Custom track settings
# Display mode: full
# Type of graph: points
# Track height: 128 pixels
# Data view scaling: auto-scale to data view
# Always include zero: ON
# Vertical viewing range:  min: 0; max: 1000  (range: 0 to 1000) (greyed out)
# Transform function: Transform data points by: NONE
# Windowing function: mean
# Smoothing window: OFF
# Negate values: not selected
# Draw y indicator lines: 
# at y = 0.0: ON at y =0 OFF (no threshold for retention)


# Configure Image page on ucsc genome browser:
# image width:	400	pixels
# label area width:	10	characters	
# text size: 12

# # Remember to get rid of splicoforms and nc genes by clicking GENCODE v31 bar on left of diagram, if desired. Then click on wrench and select "Transcript class": coding, "Transcript Annotation Method": All, "Transcript Biotype": protein_coding, "Tag":appris_principal_1, "Support Level": All. Leave "Highlight items" unselected.

# # Make genes dense.

# # Add squished centromeres.







# --------------- CEN1 ---------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr1:120,905,001-127,655,000
# track type=bedGraph name="Copy" description="CEN1" visibility=full color=0,0,255 altColor=255,0,0 priority=20



# # --------------- CEN2 ---------------------


# # # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# # browser position chr2:92710000-92710001
# # track type=bedGraph name="Copy" description="CEN2" visibility=full color=0,0,255 altColor=255,0,0 priority=20



# --------------- CEN3 ---------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr3:88,423,751-97,536,250
# track type=bedGraph name="Copy" description="CEN3" visibility=full color=0,0,255 altColor=255,0,0 priority=20



# --------------- CEN4 ---------------------



# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr4:50200000-50200001
# track type=bedGraph name="Copy" description="CEN4" visibility=full color=0,0,255 altColor=255,0,0 priority=20




# --------------- CEN5 ---------------------



# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr5:44,107,501-54,232,500
# track type=bedGraph name="Copy" description="CEN5" visibility=full color=0,0,255 altColor=255,0,0 priority=20



# --------------- CEN6 ---------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr6:55,695,001-62,445,000
# track type=bedGraph name="Copy" description="CEN6" visibility=full color=0,0,255 altColor=255,0,0 priority=20



# ------------------ CEN7 -------------------------------------

# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr7:59610000-59610001
# track type=bedGraph name="Copy" description="CEN7" visibility=full color=0,0,255 altColor=255,0,0 priority=20


# ------------------ CEN8 -------------------------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr8:44580000-44580001
# track type=bedGraph name="Copy" description="CEN8" visibility=full color=0,0,255 altColor=255,0,0 priority=20


# ------------------ CEN9 -------------------------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr9:44100000-44100001
# track type=bedGraph name="Copy" description="CEN9" visibility=full color=0,0,255 altColor=255,0,0 priority=20


# ------------------ CEN10 -------------------------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr10:41080000-41080001
# track type=bedGraph name="Copy" description="CEN10" visibility=full color=0,0,255 altColor=255,0,0 priority=20


# ------------------ CEN11 -------------------------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr11:53270000-53270001
# track type=bedGraph name="Copy" description="CEN11" visibility=full color=0,0,255 altColor=255,0,0 priority=20


# ------------------ CEN12 -------------------------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr12:35400000-35400001
# track type=bedGraph name="Copy" description="CEN12" visibility=full color=0,0,255 altColor=255,0,0 priority=20


# ------------------ CEN13 -------------------------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr13:13,515,001-20,265,000
# track type=bedGraph name="Copy" description="CEN13" visibility=full color=0,0,255 altColor=255,0,0 priority=20



# ------------------ CEN14 -------------------------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr14:14,790,001-19,290,000
# track type=bedGraph name="Copy" description="CEN14" visibility=full color=0,0,255 altColor=255,0,0 priority=20


# ------------------ CEN15 -------------------------------------



# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr15:19930000-19930001
# track type=bedGraph name="Copy" description="CEN15" visibility=full color=0,0,255 altColor=255,0,0 priority=20





# ------------------ CEN16 -------------------------------------



# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr16:46710000-46710001
# track type=bedGraph name="Copy" description="CEN16" visibility=full color=0,0,255 altColor=255,0,0 priority=20



# ------------------ CEN18 -------------------------------------



# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr18:17440000-17440000
# track type=bedGraph name="Copy" description="CEN18" visibility=full color=0,0,255 altColor=255,0,0 priority=20




# ------------------ CEN19 -------------------------------------


# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr19:26690000-26690001
# track type=bedGraph name="Copy" description="CEN19" visibility=full color=0,0,255 altColor=255,0,0 priority=20


# ------------------ CEN21 -------------------------------------



# # Place following header at top of RH_pool_retent_ratio.txt and use in bedGraph format on ucsc genome browser
# browser position chr21:8700000-8700000
# track type=bedGraph name="Copy" description="CEN21" visibility=full color=0,0,255 altColor=255,0,0 priority=20








# ------------ Read pdfs ------------------------


p1 <- ggdraw() + draw_image(magick::image_read_pdf("CEN1.pdf", density = 300),scale=1.0) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)
p2 <- ggdraw() + draw_image(magick::image_read_pdf("CEN3.pdf", density = 300),scale=1.0) + coord_cartesian(clip = "off")
p3 <- ggdraw() + draw_image(magick::image_read_pdf("CEN5.pdf", density = 300),scale=1.0) + coord_cartesian(clip = "off")
p4 <- ggdraw() + draw_image(magick::image_read_pdf("CEN6.pdf", density = 300),scale=1.0) + coord_cartesian(clip = "off")
p5 <- ggdraw() + draw_image(magick::image_read_pdf("CEN13.pdf", density = 300),scale=1.0) + coord_cartesian(clip = "off")
p6 <- ggdraw() + draw_image(magick::image_read_pdf("CEN14.pdf", density = 300),scale=1.0) + coord_cartesian(clip = "off")


# -------------- Combine pdfs ------------------------------

# File size ~ 980 kb
pdf("cen_zoom_1.pdf", width=7.5, height=10, useDingbats = FALSE)
plot_grid(p1, p2, p3, p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()

# if smaller file size required, but do not gain much, so use pdf:
# File size ~956 kb
# png("g_long_loci_chr_1.png",width=7.5,height=10,units="in",res=300)
# plot_grid(p1, p2, p3,  p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 14, align="h")
# dev.off()



# ------------------ hi res -------------------------------

# ------------ Read pdfs ------------------------


p1 <- ggdraw() + draw_image(magick::image_read_pdf("CEN1.pdf", density = 1200),scale=1.0) + coord_cartesian(clip = "off") # + draw_label("Paclitaxel", fontface='plain', size=12, x=0.55,y=0.92) + draw_label("D", fontface='bold',x=0.05,y=0.98)
p2 <- ggdraw() + draw_image(magick::image_read_pdf("CEN3.pdf", density = 1200),scale=1.0) + coord_cartesian(clip = "off")
p3 <- ggdraw() + draw_image(magick::image_read_pdf("CEN5.pdf", density = 1200),scale=1.0) + coord_cartesian(clip = "off")
p4 <- ggdraw() + draw_image(magick::image_read_pdf("CEN6.pdf", density = 1200),scale=1.0) + coord_cartesian(clip = "off")
p5 <- ggdraw() + draw_image(magick::image_read_pdf("CEN13.pdf", density = 1200),scale=1.0) + coord_cartesian(clip = "off")
p6 <- ggdraw() + draw_image(magick::image_read_pdf("CEN14.pdf", density = 1200),scale=1.0) + coord_cartesian(clip = "off")


# -------------- Combine pdfs ------------------------------


pdf("cen_zoom_hi_res_1.pdf", width=7.5, height=10, useDingbats = FALSE)
plot_grid(p1, p2, p3, p4, p5, p6, ncol = 2, nrow = 3, labels=c("A", "B", "C", "D", "E","F"), label_size = 16, align="h")
dev.off()














































































