start_sites = list()

cap_color_gradient = c(
  '#0d304c',
  '#1967a5',
  '#2a90e1',
  '#56a7e8'
)

noncap_color_gradient = c(
  '#8d5108',
  '#d77a0b',
  '#f8ab50',
  '#fbc380'
)
################################################################################################
# As a positive control set of TSS features, PEAT features are used from Morton et al. 2014
data_directory = '../data_tables/'

PEAT = read.table(paste(data_directory,'PEAT_peaks.bed',sep=''),stringsAsFactors = F,sep='\t')
chromosomes = unique(PEAT[,1])
starts = PEAT[,2]+PEAT[,7]
start_sites[['pos_control']] = cbind(PEAT[,1],PEAT[,6],starts)

for(reference_set in c('TAIR10_ERCC','Araport11')){
  reference = paste(reference_set,'.gff',sep='')
  annotation = read.table(reference,stringsAsFactors = F,sep='\t')
  annotation = annotation[annotation[,3]%in%c('mRNA','ncRNA','lnc_RNA','pseudogenic_transcript','miRNA_primary_transcript','transposable_element_gene'),]
  start_sites[[reference_set]] = rbind(as.matrix(annotation[annotation[,7]=='+',c(1,7,4)]),as.matrix(annotation[annotation[,7]=='-',c(1,7,5)]))
}

decorator = 'W'
for(samplename in c('fb','1ng_fb','100pg_fb','10pg_fb')){
    reference_set = samplename
    reference = paste(data_directory,samplename,'.',decorator,'.5P.bed',sep='')
    annotation = read.table(reference,stringsAsFactors = F,sep='\t')
    annotation = annotation[grep('\\.capped\\.',annotation[,4]),]
    cat(nrow(annotation),'\n')
    
    start_sites[[reference_set]] = cbind(annotation[,1],annotation[,6],annotation[,2]+annotation[,5]+1)
    rownames(annotation) = annotation[,4]
}

start_sites[['5ng_fb']] = start_sites[['fb']]
################################################################################################
# Testing the start/end tables

annotation_colors = c('TAIR10_ERCC'='#a8b642','Araport11'='#009933',
                      '5ng_fb'=cap_color_gradient[1],
                      '1ng_fb'=cap_color_gradient[2],
                      '100pg_fb'=cap_color_gradient[3],
                      '10pg_fb'=cap_color_gradient[4])

find_offset = function(chrom,strand,pos,controlname='pos_control'){
  possible = as.numeric(start_sites[[controlname]][start_sites[[controlname]][,1]==chrom & start_sites[[controlname]][,2]==strand,3])
  if(strand=='+'){
    return(pos-possible[order(abs(pos - possible))[1]])
  }else{
    return(possible[order(abs(pos - possible))[1]]-pos)
  }
}

TSSdiffs = list()
transcript_test_set = start_sites[['pos_control']]
plot(-10,-10,type='l',xlim=c(-20,20),log='',ylim = c(0,.6),xlab='Distance from true TSS')
for(reference_set in c('TAIR10_ERCC','Araport11','5ng_fb','1ng_fb','100pg_fb','10pg_fb')){
  TSSdiff = apply(transcript_test_set,1,function(x)find_offset(x[1],x[2],as.numeric(x[3]),controlname=reference_set))
  TSSdiffs[[reference_set]] = -TSSdiff
  TSSdiff_densities = table(-TSSdiff)
  cat(reference_set,'\t')
  cat(sum(TSSdiff_densities[as.character(-10:10)],na.rm = T)/sum(TSSdiff_densities),'\n')
  abline(h=0)
}



# FIGURE 3 CONFIGURATION
pdf('FIG3A.nanoPARE_TSS_poly.pdf',height = 4.8622047,width = 7.204724/3)
par(mfrow=c(7,1),mar=c(0,0,0,0),lend='butt',ljoin='mitre')
for(reference_set in c('TAIR10_ERCC','Araport11','5ng_fb','1ng_fb','100pg_fb','10pg_fb')){
  yvals = table(TSSdiffs[[reference_set]])
  yvals = yvals[as.character(-50:50)]
  names(yvals) = -50:50
  yvals[is.na(yvals)] = 0
  xvals = -50:50
  plot(x=xvals,y=yvals,type='l',lwd=0,col=annotation_colors[reference_set],xlim=c(-50,50),axes=F)
  abline(v=seq(-40,40,20),lty=3,col='black')
  polygon(x=c(xvals,xvals[length(xvals):1]),y=c(yvals,rep(0,length(yvals))),col=annotation_colors[reference_set],border = NA)
  abline(v=c(-50,50))
  abline(h=0,lwd=1,col='black')
  legend(
    'topleft',
    legend = c(
      reference_set,
      paste('n=',length(which(abs(TSSdiffs[[reference_set]])<=50)),' (',round(mean(abs(TSSdiffs[[reference_set]])<=50)*100),'%)',sep='')),
    border = NA,bg = 'white',box.col = NA,cex=1)
}
par(mar=c(3,0,0,0))
plot(-100,-100,xlim=c(-50,50),axes=F,xlab='Distance from true TSS')
axis(1,seq(-40,40,20))
dev.off()

pdf('FIG3B.nanoPARE_CFD_TSS.pdf',useDingbats = F,width = 1.8,height = 2,pointsize = 8)
maxdist=200
par(mfrow=c(1,1),mar=c(3,3,0,1))
plot(-10,-10,xlim=c(0,maxdist),ylim=c(0,1),ylab='Cumulative frequency',xlab='Distance from true TSS (nucleotides)',las=1,xaxt='n',yaxt='n')
axis(1,seq(0,maxdist,50),las=1)
axis(2,seq(0,1,.25),seq(0,100,25),las=1)
for(reference_set in c('TAIR10_ERCC','Araport11','5ng_fb','1ng_fb','100pg_fb','10pg_fb')){
  refdata = TSSdiffs[[reference_set]][abs(TSSdiffs[[reference_set]])<=maxdist]
  lines(cumsum(table(abs(refdata)))/length(refdata),type='l',lwd=1,col=annotation_colors[reference_set])
}
abline(h=c(0,1))
legend('bottomright',legend = names(annotation_colors),fill = annotation_colors,border = NA,bty='n')
dev.off()


