## Barcode counts file from step 1 (extractValidR2reads_wCorrection.py):
bcCountsFile <- 'ratIslet_bcCounts.txt'
bc <- read.table(file=bcCountsFile, header=T, sep='\t', row.names=1)

## open displays:
nWindows <- 3
while(length(dev.list())<nWindows) {
    ##windows()      # for Windows machines
    X11()            # for Linux machines (X11 display)
}
dList <- dev.list()

## plot the reads per barcode in input order:
dev.set(dList[1])
plot(bc$count,pch=20,log='y',xlab='BC index',ylab='reads per barcode',cex=0.5)

## visually locate gap between upper band of 'valid' barcodes and remainder (~15000 for this set):
bcThresh <- 15000
abline(h=bcThresh,lty=3,col='red')

## Print the fractions of valid/invalid reads:
nValidBc <- length(which(bc$count>bcThresh))
fractValid <- sum(bc$count[which(bc$count>bcThresh)])/sum(bc$count)

message(sprintf('Valid barcodes: %d, %4.1f%% of total reads', nValidBc, fractValid*100))
flush.console()

## Second method: plot sorted reads and identify 'knee' in the curve:
dev.set(dList[2])
bc.sort <- sort(bc$count)
plot(bc.sort,type='l',log='y',xlab='barcode count rank (low-to-high)',ylab='reads per barcode',main='all barcodes')
abline(v=length(which(bc.sort<=bcThresh)),lty=3,col='red')

## zoom in to visually locate 'knee':
dev.set(dList[3])
## plot the 5000 barcodes with the highest number of reads:
nBc <- length(bc.sort)   # total # of barcodes
plot((nBc-5000):nBc,bc.sort[(nBc-5000):nBc],log='y',type='l',
     xlab='barcode count rank (low-to-high)',ylab='reads per barcode',main='top 5000 barcodes')
abline(v=length(which(bc.sort<=bcThresh)),lty=3,col='red')
