library(viridis) #The viridis color palettes

##### adding txp to the respective nucleosomes. ####### 

genes <- read.csv("~/genes_master_table.csv", stringsAsFactors = F)

index_file <- genes
chrtss.gr = GRanges(seqnames= index_file$chr, ranges = IRanges(start =as.numeric(index_file$start), 
                                                               end = (index_file$end)))


#create a new GenomicRanges object using the chromosome length midpoint information 
peaks.gr = GRanges(seqnames = chr_nuc_ratios.df[,"chr"],
                   ranges = IRanges(start =  chr_nuc_ratios.df[,'nuc_peaks'],
                                    width = 1))
#find the overlaping positions between the midpoints and the feature coordinates. 
rangtss.gr = findOverlaps(peaks.gr, chrtss.gr)

#convert to a data.frame, will use this as the index for the 
rangtss.df <- as.data.frame(rangtss.gr)

##eliminating duplicated nucleosomes will add a score for only the first gene. in the sequence 
#since a nucleosome can be occupying 2 genes. 
rangtss_ss.df <- rangtss.df[!(duplicated(rangtss.df$queryHits)),]

chr_nuc_ratios.df$NET_seq <- 0

for(i in 1:nrow(rangtss_ss.df)){
  chr_nuc_ratios.df$NET_seq[rangtss_ss.df[i,1]] <- genes$NET_seq[rangtss_ss.df[i,2]]
}

######## splitting nucs into genic and intergenic #######

cn.df <- chr_nuc_ratios.df
index_file <- genes
genes.gr = GRanges(seqnames= index_file$chr, ranges = IRanges(start =(index_file$start), 
                                                              end = (index_file$end)),
                   strand = index_file$strand)

#create a new GenomicRanges object using the chromosome length midpoint information 
peaks.gr = GRanges(seqnames = cn.df[,"chr"],
                   ranges = IRanges(start =  cn.df[,"nuc_peaks"],
                                    width = 1))
#find the overlaping positions between the midpoints and the feature coordinates. 
rangtss.gr = findOverlaps(peaks.gr,genes.gr)

#convert to a data.frame, will use this as the index for the 
gene_nucs.df <- as.data.frame(rangtss.gr)


intergenic.df <- cn.df[-(gene_nucs.df[,1]),]
genic.df <- cn.df[(gene_nucs.df[,1]),]


####### find which genes are highly expressed and poorly expressed #######
su <-quantile(log2(genic.df$NET_seq+1), seq(0,1, .10))

hg.df <- genic.df[(which(log2(genic.df$NET_seq +1) >=  su[10])), ]
lg.df <- genic.df[(which(log2(genic.df$NET_seq +1 ) <= su[2])), ]


bp.m = matrix(c(0.05, 1, 0.1, 1,
                  0.05, 1, 0.08, 0.1, #xlab
                  0.02, 0.04, 0.1, 1), #ylab
                ncol = 4, byrow = T
)

bp_plot.s = split.screen(bp.m)

screen(bp_plot.s[1])
par(mar = c(2,2.1,2,2.1), cex=1)
#bottom, left, top, right
options(repr.plot.width=5, repr.plot.height=5)

cl <- brewer.pal(12, "Set3")
boxplot(intergenic.df$Nnuc_mdpAv, intergenic.df$Mnuc_mdpAv,
        lg.df$Nnuc_mdpAv, lg.df$Mnuc_mdpAv,
        hg.df$Nnuc_mdpAv, hg.df$Mnuc_mdpAv, outline=F,cex.lab =0.5,
        names=c("Nascent","Mature", "Nascent",  "Mature","Nascent", 
                "Mature"), main="", notch=T,
        col=c(cl[1], cl[1],cl[5],cl[5],cl[6],cl[6]), ylim=c(0,60))

legend("topleft", pt.bg =c(cl[1],cl[5],cl[6]), col = "black",
       c("Intergenic", "Genic/ Low Transcription", "Genic/ High Trasncription"),pch=c(21,21,21),
       pt.cex=1.5,cex=1, bty = "n", h=F, y.intersp=1.25, x.intersp=1)

xx <- c(1,2,3,4,5,6)
yy <- c(mean(intergenic.df$Nnuc_mdpAv, na.rm=T), mean(intergenic.df$Mnuc_mdpAv, na.rm=T),
             mean(lg.df$Nnuc_mdpAv, na.rm=T), mean(lg.df$Mnuc_mdpAv, na.rm=T),
                  mean(hg.df$Nnuc_mdpAv, na.rm=T), mean(hg.df$Mnuc_mdpAv, na.rm=T))

text(xx,yy+1.2, round(yy, 2))


screen(bp_plot.s[2])
par(mar=c(0,0,0,0))
#mtext('Positioning Score', side = 1, cex=1)

screen(bp_plot.s[3])
par(mar=c(0,0,0,0))
mtext('Positioning Score', side = 2, cex=1)

