nanoPARE_colors = c(
  BODY='#808285',
  CAP='#1C75BC',
  UNCAP="#F7941E",
  DIGEST='',
  XRN4='#00CC66',
  D234='#CC00FF'
)

data_directory = '../data_tables/'

sample_names = c('fb')
decorator = 'W'

bed_files = list()
total_reads = list()
total_rpm = list()

for(s in sample_names){
  bed_files[[s]] = read.table(
      paste(data_directory,s,'.',decorator,'.5P.bed',sep=''),
      stringsAsFactors = F)

  total_reads[[s]] = read.table(
    paste(data_directory,s,'.5P.counts.tsv',sep=''),
    stringsAsFactors = F, row.names = 1, header = T)

  total_rpm[[s]] = read.table(
    paste(data_directory,s,'.5P.rpm.tsv',sep=''),
    stringsAsFactors = F, row.names = 1, header = T)
}

picked_sample = 'fb'

allbed = bed_files[[picked_sample]]
rownames(allbed) = allbed[,4]

percent_ug = allbed[,10]
names(percent_ug) = rownames(allbed)

percent_ug[percent_ug>1] = 1

strand = allbed[,6]
names(strand) = rownames(allbed)

plus_strand = names(which(strand=='+'))
minus_strand = names(which(strand=='-'))
dataset = rbind(
  total_reads[[s]][plus_strand,grep('plus',colnames(total_reads[[s]]))],
  setNames(
    total_reads[[s]][minus_strand,grep('minus',colnames(total_reads[[s]]))],
    grep('plus',colnames(total_reads[[s]]),value = T)
  )
)

dataset_rpm = rbind(
  total_rpm[[s]][plus_strand,grep('plus',colnames(total_rpm[[s]]))],
  setNames(
    total_rpm[[s]][minus_strand,grep('minus',colnames(total_rpm[[s]]))],
    grep('plus',colnames(total_rpm[[s]]),value = T)
  )
)


COUNTS_WT = round(dataset)
MEAN_RPM = rowMeans(dataset_rpm[,1:3])

hvals = table(round(percent_ug,2))
# min_ug = as.numeric(names(sort(hvals[names(hvals)<.2])[1]))
min_ug = 0.1

low_uug = grep('\\.noncapped\\.',names(percent_ug),value = T)
high_uug = grep('\\.capped\\.',names(percent_ug),value=T)

#################################################
# Histogram depicting the number of features binned in each uG percentile
pdf(paste('FIG2A.',decorator,'.pdf',sep=''),useDingbats = F,width = 1.55,height = 1.8,pointsize = 8)
graph_layout=rbind(c(1,1,1),c(2,2,2),c(2,2,2),c(2,2,2))
layout(graph_layout)
par(lend="square",ljoin="mitre",mar=c(0,4,1,1),lwd=.75)
valuetable = table(round(percent_ug,2))
 
percentiles = rep(0,100)
names(percentiles) = seq(0,.99,.01)
percentiles[names(valuetable)] = valuetable

barplot(
  percentiles,
  ylim=c(0,1500),
  xlim=c(0,100),
  axisnames = F,
  axes=F,
  # xlab="Percent of reads with uuG",
  ylab="# features",
  # main="Distribution of upstream untemplated G (uuG)",
  border = NA,
  space=0,
  col=c(rep(nanoPARE_colors[['UNCAP']],10),rep(nanoPARE_colors[['CAP']],95))
)

# axis(1,seq(0,100,10),labels = seq(0,100,10))
axis(2,seq(0,1500,500),las=1,lwd=.75)
abline(v=min_ug*100,lty=2,lwd=.75)
# legend('topleft',legend = ,bty='n')
legend('topright',legend = c(
  paste('n = ',length(low_uug),sep=''),
  paste('n = ',length(high_uug),sep='')
  ),
  fill=nanoPARE_colors[c('UNCAP','CAP')],border = NA,
  bty='n')
# dev.off()

# pdf('uG_scatter.pdf',useDingbats = F)
par(lend="square",ljoin="mitre",mar=c(4,4,0,1))
plot(
  x=percent_ug[c(low_uug,high_uug)],
  y=log10(MEAN_RPM[c(low_uug,high_uug)]),
  cex=.1,
  las=1,
  # main="Distribution of upstream untemplated G (uuG)",
  xlab="Percent of reads with uuG",
  xaxt='n',
  yaxt='n',
  ylim=c(-1,6),
  xlim=c(0,1),
  ylab="mean log10(RPM)",
  pch=15,
  col=adjustcolor(
    c(rep(nanoPARE_colors[['UNCAP']],length(low_uug)),rep(nanoPARE_colors[['CAP']],length(high_uug))),
    alpha.f = 1
  )
)
abline(v=min_ug,lty=2)
axis(1,seq(0,1,.2),labels = seq(0,100,20),lwd=.75)
axis(2,c(0,1,2,3,4,5),lwd=.75,las=1)
dev.off()

######################################
COUNT_VALUES = round(dataset)

condition_1=1:3
condition_2=4:6

##############
### DEseq2 ### 
##############
# source("https://bioconductor.org/biocLite.R")
# biocLite("DESeq2")
library(DESeq2)
columndata = data.frame(
  condition=c('WT','WT','WT','XRN','XRN','XRN'),
  reps=c('1','2','3','1','2','3'),
  row.names=colnames(COUNT_VALUES),
  stringsAsFactors = T
)

dds = DESeqDataSetFromMatrix(round(COUNT_VALUES,0),colData = columndata,design = ~ condition)
dds$condition <- relevel(dds$condition, ref = "WT")
dds <- DESeq(dds)
res <- lfcShrink(dds, contrast = c('condition','XRN','WT'))
# res <- results(dds,independentFiltering = F)
res$padj <- p.adjust(res$pvalue, method="BH")

foldchange = res$log2FoldChange
adjpvalue = res$padj
names(foldchange) = rownames(res)
names(adjpvalue) = rownames(res)
z=6

min_foldchange = 1
max_pval = -log10(0.05)

pdf(paste('FIG2B.',decorator,'_capped_foldchange.pdf',sep=''),useDingbats = F,width = 1.55,height = 1.8,pointsize = 8)
graph_layout=rbind(c(1,1,1),c(2,2,2),c(2,2,2),c(2,2,2))
layout(graph_layout)
par(lend="square",ljoin="mitre",mar=c(0,4,1,1),lwd=.75)

fc=foldchange
fc[fc>z] = z
fc[fc<(-z)] = -z
pv=adjpvalue
pv[-log10(pv)>20] = 10^-20

fc_bins=rep(0,length(seq(-z,z,.1)))
names(fc_bins)=as.character(round(seq(-z,z,.1),1))
fc_table = table(as.numeric(round(fc[high_uug],1)))
fc_table = fc_table[names(fc_table)%in%names(fc_bins)]
fc_bins[names(fc_table)] = fc_table 

barplot(
  fc_bins,
  ylim=c(0,3000),
  axisnames = F,
  axes=F,
  # xlab="Percent of reads with uuG",
  ylab="# features",
  # main="Distribution of upstream untemplated G (uuG)",
  border = NA,
  space=0,
  col=nanoPARE_colors[['CAP']]
)
# Add a percentage in 4 regions
hg = fc[high_uug]
text(x=1,y=1500,labels = paste(round(sum(hg<(-1))/length(hg),2)*100,'%',sep=''))
text(x=(length(fc_bins)/2)-5,y=1500,labels = paste(round(sum(hg>=(-1) & hg<(0))/length(hg),2)*100,'%',sep=''))
text(x=(length(fc_bins)/2)+5,y=1500,labels = paste(round(sum(hg>=(0) & hg<=(1))/length(hg),2)*100,'%',sep=''))
text(x=length(fc_bins),y=1500,labels = paste(round(sum(hg>(1))/length(hg),2)*100,'%',sep=''))

abline(v=which(names(fc_bins)==0)-0.5,lty=1)
abline(v=which(names(fc_bins)==(-1))-0.5,lty=2)
abline(v=which(names(fc_bins)==(1))-0.5,lty=2)

axis(2,seq(0,3000,1000),las=1,lwd=.75)

par(lend="square",ljoin="mitre",mar=c(4,4,0,1))
plot(
  x=fc[high_uug],
  y=-log10(pv[high_uug]),
  pch=15,
  lwd=.75,
  col='gray30',
  xlab='log2 fold change (Xrn1/control)',
  ylab="-log10 p-value (BH)",
  # main="XRN1 digestion: capped peaks",
  xlim=c(-z,z),
  ylim=c(0,20),
  axes=F,
  frame.plot = T,
  cex=.1,
  las=1
)
axis(1,seq(-z,z,2),lwd=.75)
axis(2,seq(0,20,5),lwd=.75,las=1)
polygon(
  x=c(-min_foldchange,-20,-20,-min_foldchange),
  y=c(max_pval,max_pval,30,30),
  col=adjustcolor('blue',alpha.f = .1),border = NA
)
polygon(
  x=c(min_foldchange,20,20,min_foldchange),
  y=c(max_pval,max_pval,30,30),
  col=adjustcolor('red',alpha.f = .1),border = NA
)
polygon(
  x=c(-20,20,20,-20),
  y=c(0,0,max_pval,max_pval),
  col=adjustcolor('black',alpha.f = .1),border = NA
)
picked_plus=names(which(fc[high_uug]>min_foldchange & -log10(pv[high_uug])>max_pval & !is.nan(pv[high_uug])))
picked_minus=names(which(fc[high_uug]<(-min_foldchange) & -log10(pv[high_uug])>max_pval & !is.nan(pv[high_uug])))
points(x=fc[high_uug][picked_plus],y=-log10(pv[high_uug][picked_plus]),pch=15,col=adjustcolor('firebrick2'),cex=.2)
points(x=fc[high_uug][picked_minus],y=-log10(pv[high_uug][picked_minus]),pch=15,col=adjustcolor('blue'),cex=.2)

abline(h=max_pval,lty=1,lwd=.75)
abline(v=0,lty=1,lwd=.75)
abline(v=c(-min_foldchange,min_foldchange),lty=2,lwd=.75)
not_signif = length(which(-log10(pv[high_uug]) < max_pval ))
sigup = length(picked_plus)
sigdown = length(picked_minus)
text(x=-6,y=19.8,labels = paste(sigdown,' (',round(sigdown/length(high_uug),3)*100,'%)',sep=''),pos = 4)
text(x=6,y=19.8,labels = paste(sigup,' (',round(sigup/length(high_uug),3)*100,'%)',sep=''),pos = 2)
text(-6,0.2,labels = paste(not_signif,' (',round(not_signif/length(high_uug),3)*100,'%)',sep=''),pos=4)
# points(x=0.191503554,y=.7178232,pch=20,col='black',cex=1)
# text(x=0.191503554,y=.7178232,labels = 'miR156d',pos = 4,pch=20,col='black',cex=1)
dev.off()

picked_plus_cap = picked_plus
picked_minus_cap = picked_minus
########################################
pdf(paste('FIG2C.',decorator,'_noncapped_foldchange.pdf',sep=''),useDingbats = F,width = 1.55,height = 1.8,pointsize = 8)
graph_layout=rbind(c(1,1,1),c(2,2,2),c(2,2,2),c(2,2,2))
layout(graph_layout)
par(lend="square",ljoin="mitre",mar=c(0,4,1,1),lwd=.75)

fc_bins=rep(0,length(seq(-z,z,.1)))
names(fc_bins)=as.character(round(seq(-z,z,.1),1))
fc_table = table(as.numeric(round(fc[low_uug],1)))
fc_table = fc_table[names(fc_table)%in%names(fc_bins)]
fc_bins[names(fc_table)] = fc_table 

barplot(
  fc_bins,
  ylim=c(0,200),
  axisnames = F,
  axes=F,
  # xlab="Percent of reads with uuG",
  ylab="# features",
  # main="Distribution of upstream untemplated G (uuG)",
  border = NA,
  space=0,
  col=nanoPARE_colors[['UNCAP']]
)
lg = fc[low_uug]
text(x=1,y=75,labels = paste(round(sum(lg<(-1))/length(lg),2)*100,'%',sep=''))
text(x=(length(fc_bins)/2)-5,y=75,labels = paste(round(sum(lg>=(-1) & lg<(0))/length(lg),2)*100,'%',sep=''))
text(x=(length(fc_bins)/2)+5,y=75,labels = paste(round(sum(lg>=(0) & lg<=(1))/length(lg),2)*100,'%',sep=''))
text(x=length(fc_bins),y=75,labels = paste(round(sum(lg>(1))/length(lg),2)*100,'%',sep=''))

abline(v=which(names(fc_bins)==0)-0.5,lty=1)
abline(v=which(names(fc_bins)==(-1))-0.5,lty=2)
abline(v=which(names(fc_bins)==(1))-0.5,lty=2)

axis(2,seq(0,200,100),las=1,lwd=.75)

par(lend="square",ljoin="mitre",mar=c(4,4,0,1))
plot(
  x=fc[low_uug],
  y=-log10(pv[low_uug]),
  pch=15,
  col='gray30',
  xlab='log2 fold change (Xrn1/control)',
  ylab="-log10 p-value (BH)",
  # main="XRN1 digestion: capped peaks",
  xlim=c(-z,z),
  ylim=c(0,20),
  cex=.1,
  axes=F,
  frame.plot = T,
  las=1
)
axis(1,seq(-z,z,2),lwd=.75)
axis(2,seq(0,20,5),lwd=.75,las=1)
polygon(
  x=c(-min_foldchange,-20,-20,-min_foldchange),
  y=c(max_pval,max_pval,30,30),
  col=adjustcolor('blue',alpha.f = .1),border = NA
)
polygon(
  x=c(min_foldchange,20,20,min_foldchange),
  y=c(max_pval,max_pval,30,30),
  col=adjustcolor('red',alpha.f = .1),border = NA
)
polygon(
  x=c(-20,20,20,-20),
  y=c(0,0,max_pval,max_pval),
  col=adjustcolor('black',alpha.f = .1),border = NA
)
picked_plus=names(which(fc[low_uug]>min_foldchange & -log10(pv[low_uug])>max_pval & !is.nan(pv[low_uug])))
picked_minus=names(which(fc[low_uug]<(-min_foldchange) & -log10(pv[low_uug])>max_pval & !is.nan(pv[low_uug])))
points(x=fc[low_uug][picked_plus],y=-log10(pv[low_uug][picked_plus]),pch=15,col=adjustcolor('firebrick2'),cex=.2)
points(x=fc[low_uug][picked_minus],y=-log10(pv[low_uug][picked_minus]),pch=15,col=adjustcolor('blue'),cex=.2)

abline(h=max_pval,lty=1,lwd=.75)
abline(v=0,lty=1,lwd=.75)
abline(v=c(-min_foldchange,min_foldchange),lty=2,lwd=.75)
not_signif = length(which(-log10(pv[low_uug]) < max_pval ))
sigup = length(picked_plus)
sigdown = length(picked_minus)
text(x=-6,y=19.8,labels = paste(sigdown,' (',round(sigdown/length(low_uug),3)*100,'%)',sep=''),pos = 4)
text(x=6,y=19.8,labels = paste(sigup,' (',round(sigup/length(low_uug),3)*100,'%)',sep=''),pos = 2)
text(-6,0.2,labels = paste(not_signif,' (',round(not_signif/length(low_uug),3)*100,'%)',sep=''),pos=4)
# points(x=-4.0259982,y=2.1522193,pch=20,col='black',cex=1)
# text(x=-4.0259982,y=2.1522193,labels = 'miR156d',pos = 2,pch=20,col='black',cex=1)
dev.off()

picked_plus_noncap = picked_plus
picked_minus_noncap = picked_minus

#########################################
