#!/usr/bin/env Rscript

docstring <- "This R script computes the total exonic gene length\\n\\
per genes in a sqlite transcript database. All transcript models for\\n\\
a given gene are collapsed into a union gene model and the total\\n\\
exonic length is summed."

suppressPackageStartupMessages(library("argparse"))
parser <- ArgumentParser(description=docstring,
                         formatter_class='argparse.RawTextHelpFormatter')

suppressPackageStartupMessages(library("GenomicFeatures"))
# create parser object

# required args
parser$add_argument("-i", "--txdb",
 help="Input sqlite transcript database")
parser$add_argument("-o", "--out",
 help="Output gene lengths")

args <- parser$parse_args()

if (is.null(args$txdb)){
    print("Please provide --txdb")
    exit()
}
if (is.null(args$out)){
    print("Please provide --out")
    exit()
}

##################################################

txdb <- loadDb(args$txdb)
# collect the exons per gene id
exons.list.per.gene <- exonsBy(txdb, by="gene")
# for each gene, reduce all the exons to a set of non overlapping exons, calculate their lengths (widths) and sum then
exonic.gene.sizes <- lapply(exons.list.per.gene, function(x) {sum(width(reduce(x)))})
 
exonic.gene.sizes.df <- as.data.frame(exonic.gene.sizes)
exonic.gene.sizes.df <- data.frame(gene=names(exonic.gene.sizes), length=as.numeric(exonic.gene.sizes))

write.table(exonic.gene.sizes.df, file=args$txdb, row.names=FALSE, sep='\t', quote=FALSE)