library(viridis) #The viridis color palettes

##### adding txp to the respective nucleosomes. ####### 

genes <- read.csv("~/genes_master_table.csv", stringsAsFactors = F)

genes.gr = GRanges(seqnames= genes$chr, ranges = IRanges(start =(genes$start), 
                                                         end = (genes$end)),
                   strand = genes$strand)

#create a new GenomicRanges object using the chromosome length midpoint information 
peaks.gr = GRanges(seqnames = chr_nuc_ratios.df[,"chr"],
                   ranges = IRanges(start =  chr_nuc_ratios.df[,'nuc_peaks'],
                                    width = 1))

#find the overlaping positions between the midpoints and the feature coordinates. 
overlaps.gr = findOverlaps(peaks.gr, chrtss.gr)

#convert to a data.frame, will use this as the index 
overlaps.df <- as.data.frame(overlaps.gr)

##eliminating duplicated nucleosomes will add a score for only the first gene in the sequence 
#since a nucleosome can be occupying 2 genes. 
overlaps_ss.df <- overlaps.df[!(duplicated(overlaps.df$queryHits)),]

chr_nuc_ratios.df$NET_seq <- 0

for(i in 1:nrow(overlaps_ss.df)){
  chr_nuc_ratios.df$NET_seq[overlaps_ss.df[i,1]] <- genes$NET_seq[overlaps_ss.df[i,2]]
}

######## splitting nucs into genic and intergenic #######

intergenic.df <- chr_nuc_ratios.df[-(overlaps.df[,1]),]
genic.df <- chr_nuc_ratios.df[(overlaps.df[,1]),]

####### find which genes are highly expressed and poorly expressed #######
su <-quantile(log2(genic.df$NET_seq+1), seq(0,1, .10))

hg.df <- genic.df[(which(log2(genic.df$NET_seq +1) >=  su[10])), ]
lg.df <- genic.df[(which(log2(genic.df$NET_seq +1) <=  su[2])), ]


dens.m = matrix(c(0.05, 1, 0.1, 1,
                  0.05, 1, 0.08, 0.1, #xlab
                  0.02, 0.04, 0.1, 1), #ylab
                ncol = 4, byrow = T
)


dens_plot.s = split.screen(dens.m)

screen(dens_plot.s[1])
par(mar = c(2,2.1,2,2.1), cex=1) #bottom, left, top, right
options(repr.plot.width=5, repr.plot.height=5)

cl <- brewer.pal(9, "Set1")

plot(density(log2(lg.df$Mreads)), col= cl[2], xlim=c(3, 10), lwd=3,xaxt="n",
     main="", xlab="Occupancy")
lines(density(log2(intergenic.df$Mreads)), col=cl[3], lwd=3)
lines(density(log2(hg.df$Mreads)), col= cl[5], lwd=3)
lines(density(log2(intergenic.df$Nnormreads)), col= cl[3], lty="dotted", lwd=3)
lines(density(log2(lg.df$Nnormreads)), col= cl[2], lty="dotted", lwd=3)
lines(density(log2(hg.df$Nnormreads)), col= cl[5], lty="dotted", lwd=3)
axis(1, at=seq(2,10,1), labels = c(2^2,2^3 ,2^4,2^5 ,2^6, 2^7 ,2^8,2^9,2^10))

legend("topleft", pt.bg =c(cl[3],cl[2],cl[5]), col="black", 
       c("Intergenic", "Genic/Low Transcription", "Genic/High Transcription", "Nascent", "Mature"),
       pch=c(21,21,21, NA, NA), lty = c(NA, NA, NA, "dotted", "solid"),
       lwd = c(NA, NA, NA, 3,3),
       pt.cex=1.5,cex=1, bty = "n", h=F, y.intersp=1.25, x.intersp=1)

screen(dens_plot.s[2])
par(mar=c(0,0,0,0))
mtext('Nucleosome Occupancy', side = 1, cex=1)

screen(dens_plot.s[3])
par(mar=c(0,0,0,0))
mtext('Density Distribution', side = 2, cex=1)

t1 <- t.test((hg.df$Nnormreads), (hg.df$Mreads))
t2 <- t.test((lg.df$Nnormreads), (lg.df$Mreads))
t3 <- t.test((intergenic.df$Nnormreads), (intergenic.df$Mreads))

