#!/usr/bin/env Rscript

suppressPackageStartupMessages(library("argparse"))
# create parser object
parser <- ArgumentParser()

# required args
parser$add_argument("-i", "--mat",
 help="Input matrix")
parser$add_argument("-o", "--out",
 help="Output RDS")

# optional args
parser$add_argument("-s", "--sv",
 help="Input surrogate variable")
parser$add_argument("-l", "--lib_sizes",
 help="describe")
parser$add_argument("--ctrl_lib_sizes",
 help="describe")
parser$add_argument("-n", "--ncis",
 help="describe")
parser$add_argument("--without_intercept", action="store_true", default=FALSE,
 help="describe")
parser$add_argument("--inverse_pi_naught", action="store_true", default=FALSE,
 help="describe")

args <- parser$parse_args()

if (is.null(args$mat)){
    print("Please provide --mat")
    exit()
}
if (is.null(args$out)){
    print("Please provide --out")
    exit()
}
if (!is.null(args$ncis)){
    if (is.null(args$lib_sizes)){
        print("If --ncis provided, please provide --lib_sizes")
        exit()
    }
    if (is.null(args$ctrl_lib_sizes)){
        print("If --ncis provided, please provide --ctrl_lib_sizes")
        exit()
    }
}

##################################################

suppressPackageStartupMessages(library("edgeR"))

mat <- read.table(args$mat, header = T, row.names=1, sep='\t')
# timepoint <- factor(sapply(strsplit(as.character(colnames(mat)), "_"), function(x) x[[1]]))
timepoint <- factor(sapply(lapply(strsplit(as.character(colnames(mat)), "_"), 
                              function(x) x[1:length(x)-1]), paste, collapse = "_"))

if (!is.null(args$sv)){
    sv <- as.matrix(read.table(args$sv, header = F, sep='\t'))
    if (!args$without_intercept){
        model_design <- model.matrix(~timepoint+sv)
    }else{
        model_design <- model.matrix(~0+timepoint+sv)
    }
}else{
    if (!args$without_intercept){
        model_design <- model.matrix(~timepoint)
    }else{
        model_design <- model.matrix(~0+timepoint)
    }
}

print(paste('Observations =', min(dim(model_design))))
print(paste('Parameters to estimate =',qr(model_design)$rank))

####

y <- DGEList(counts = mat, group = timepoint)

if (!is.null(args$lib_sizes)){
    lib_sizes <- read.table(args$lib_sizes, header = F, row.names=1, sep='\t')
    y$samples$lib.size <- lib_sizes[colnames(mat),]
    if (is.null(args$ncis)){
        # use total library size as normalization factor
        y <- calcNormFactors(y, method="none")
    }else{
        ctrl_lib_sizes <- read.table(args$ctrl_lib_sizes, header = F, row.names=1, sep='\t')
        stopifnot(all(colnames(lib_sizes) == colnames(ctrl_lib_sizes)))
        ncis <- read.table(args$ncis, header = F, row.names=1, sep='\t')
        if (!is.null(args$inverse_pi_naught)){
            norm.factors <- 1 / ( ctrl_lib_sizes[colnames(mat),] * ncis[colnames(mat),] )
            norm.factors <- norm.factors / max(norm.factors)
        }else{
            pi.naught <- ( ctrl_lib_sizes[colnames(mat),] * ncis[colnames(mat),] ) / lib_sizes[colnames(mat),] 
            inverse.pi.naught <- 1 / pi.naught
            norm.factors <- inverse.pi.naught / max(inverse.pi.naught)
        }
        y$samples$norm.factors <- norm.factors
    }
}else{
    y <- calcNormFactors(y, design=model_design)
}

y <- estimateDisp(y, design=model_design)

saveRDS(y, args$out)