#source("http://bioconductor.org/biocLite.R")
library("GenomicFeatures")
library("Rsamtools")
library("GenomicAlignments")
library("systemPipeR")


medakaGene <- "../genome/Hd-rR/gene/Medaka-gene-ver2.2.4.gff_geneID"
bamDir = "./"
countFile = "RNA_counts_rep1.txt"
rpkmFile = "RNA_RPKM_rep1.txt"


# List up bam files of RNA-seq
fls <- list.files(bamDir, pattern="_Aligned.out_q20_sort.bam$", full=TRUE)
bamLst <- BamFileList(fls, yieldSize=100000)


# Extract exons
txdb <- makeTxDbFromGFF(medakaGene, format="gff3")
exonsByGene <- exonsBy(txdb, by="gene")


# count the overlaps of the reads in the exons
ovlp <- summarizeOverlaps(exonsByGene, bamLst,
                          mode="Union", singleEnd=TRUE, ignore.strand=TRUE)
counts <- assays(ovlp)$counts
write.table(counts, countFile, sep="\t", append=F, quote=F, row.name=T, col.name=T)


# Compute RPKM for each stage
rpkm <- apply(counts, 2, function(x) returnRPKM(counts=x, ranges=exonsByGene))
write.table(rpkm, rpkmFile, sep="\t", append=F, quote=F, row.name=T, col.name=T)
