#!/usr/bin/env Rscript

## Kiley Graim
## March 2018
##
## Implementation of the PAGE differential expression analysis on the PEPs
##   PAGE: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1183189/

## PAGE enrichment of PAM50 subtypes, etc


###########################
###   Script setup
####################



if(!require('getopt')) {
  install.packages('getopt')
  library(getopt)
}


## usage, options and doc
argspec <- paste(get_Rscript_filename(), c(paste('calculates PAGE enrichment for a number of comparisons in the human data, using the PEP lists generated from the canine data.

  Usage: 
    ',get_Rscript_filename(),' -p <PEPs filename>

  Options:
    -d <data directory>       Directory containing any input files provided by FREYA
    -w <data directory>       Directory containing user-created files (including files generated by FREYA)
    -o <output directory>     Directory to write/store enrichment results
        ')))

args <- commandArgs(TRUE)

## Print help if requested
if( length(args)==0 ) { args = '--help' } # If run without arguments, assume the user wants help
if ( '--help' %in% args | '-h' %in% args ) {
  write(argspec, stderr())
  quit()
}

## Set up input specs (long flag, short flag, required/optional, type)
spec <- matrix( c(
       'datadir',      'd', 2, 'character',
       'workingdir',   'w', 2, 'character',
       'outdir',       'o', 2, 'character',
       'PEP',          'p', 1, 'character'
      ),
    ncol=4,
    byrow=TRUE
         )

opt <- getopt( spec=spec )

# Set defaults for optional parameters
if( is.null(opt$outdir) ) { opt$outdir = './results' }
if( is.null(opt$datadir) ) { opt$datadir = '../data' }
if( is.null(opt$workingdir) ) { opt$workingdir = '../user_data' }

## If the output directory doesn't exist, create it
if(!dir.exists(opt$outdir)) {
  print(paste('Creating output directory',opt$outdir))
  system(paste('mkdir -p',opt$outdir))
}



###########################
###### Functions 
###########

get.labs <- function(comps) {
  ## Load the data, subset to shared dog-human genes
  dat <- read.table(paste(opt$datadir, 'BRCA_rnaseq_paired_noMets.t.txt',sep='/'), sep='\t', header=T, row.names=1, check.names=F) 
  dat <- as.matrix(dat) + 1
  dat <- log(dat, base=2)
  rownames(dat) <- substr(rownames(dat),1,15)

  labs <- read.table(paste(opt$datadir,'BRCA_PAM50_labels.csv',sep='/'), sep=',', header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)

  ## Drop to Basal vs Luminal comparison
  if('Luminal' %in% comps) {
    labs[which(labs$PAM50=='LumB'),'PAM50'] <- 'Luminal'
    labs[which(labs$PAM50=='LumA'),'PAM50'] <- 'Luminal'
    labs <- labs[ which(labs$PAM50 %in% comps),,drop=FALSE ]
    ids <- intersect( rownames(labs), rownames(dat) )
    dat <- dat[ids,]
    labs <- labs[ids,]
  } else if('TUMOR' %in% comps) {
    labs <- read.table(paste(opt$datadir,'BRCA_tumorVSnormal_paired.txt',sep='/'), header=F, row.names=1, check.names=F)
    rownames(labs) <- substr(rownames(labs),1,15)
    ids <- intersect( rownames(labs), rownames(dat) )
    dat <- dat[ids,]
    labs <- labs[ids,]
  } else {
    labs <- labs[labs[,1] %in% comps,,drop=FALSE]
    ids <- intersect( rownames(labs), rownames(dat) )
    dat <- dat[ids,]
    labs <- labs[ids,]
  }

  genes.all <- read.table(paste(opt$datadir,'Canine_Human_Gene_Conversion.txt',sep='/'), sep='\t', header=T, stringsAsFactors=F)
  dat <- dat[,colnames(dat) %in% genes.all$Hum_Symb]
  return( list(dat, labs) )
}

load.peps <- function(fn) {
  ## Load the PEPs
  peps <- read.table(fn, sep=',', header=TRUE, stringsAsFactors=FALSE)
  pep.genes <- c( peps[peps$Adenoma_Expression_Pattern < 0.05,'HumanSymbol'], peps[peps$Tumor_Expression_Pattern < 0.05,'HumanSymbol'], peps[peps$Carcinoma_Expression_Pattern < 0.05,'HumanSymbol'] )
  pep.lists <- data.frame(Gene=pep.genes, PEP=rep(NA, length(pep.genes)))
  pep.lists[ pep.lists$Gene %in% peps[peps$Adenoma_Expression_Pattern < 0.05,'HumanSymbol'], 'PEP'] <- 'Adenoma'
  pep.lists[ pep.lists$Gene %in% peps[peps$Carcinoma_Expression_Pattern < 0.05,'HumanSymbol'], 'PEP'] <- 'Carcinoma'
  pep.lists[ pep.lists$Gene %in% peps[peps$Tumor_Expression_Pattern < 0.05,'HumanSymbol'], 'PEP'] <- 'Tumor'
  peps <- pep.lists

  return(peps)
}




#######################
### Code to run
##########

## Load the data
#peps <- load.peps(paste(opt$workingdir, opt$PEP, sep='/')) 
peps <- load.peps(opt$PEP)

## Define the list of comparisons we want to make
comparisons <- list( c('TUMOR','NORMAL'), c('Basal','Luminal'), c('LumA','Normal'), c('LumB','Normal'), c('Luminal','Normal'), c('Basal','Normal') ) 

## For each comparison & PEP pair, calculate PAGE score
write('PAGE scores',file=paste(opt$outdir,'PAGE.txt', sep='/'))
for( comps in comparisons ) {

  ## Print the comparison we're currently doing
  write('', file=paste(opt$outdir,'PAGE.txt', sep='/'), append=TRUE)
  write(paste(comps[1], comps[2]), file=paste(opt$outdir,'PAGE.txt', sep='/'), append=TRUE)
  print(comps); flush.console() 

  ## For each PEP, calculate PAGE
  for( pep.name in c('Adenoma', 'Carcinoma', 'Tumor') ) {

    # Load the applicable labels for this comparison
    dat.list <- get.labs(comps)
    dat <- dat.list[[1]]
    labs <- dat.list[[2]]

    ## Calculate standard deviation of all genes, and non-PEP gene list
    sd.genes <- sd(dat)
    non.pep.genes <- colnames(dat)[ !colnames(dat) %in% peps$Gene ]

    ## Get the list of genes in the current PEP
    genes <- c(non.pep.genes, as.character(peps[ peps$PEP==pep.name, 'Gene' ]))
    genes <- genes[ genes %in% colnames(dat) ]

    ## Calculate and print PAGE score for this comparison
    res <- sapply(genes, function(gene) { ( (mean( dat[labs==comps[1],gene] ) - mean( dat[labs==comps[2],gene] ) ) * sqrt(sum(peps$PEP==pep.name)) ) / sd.genes } )
    temp <- data.frame( PAGE=res, PEP=names(res) %in% peps[ peps$PEP==pep.name,'Gene' ] )
    write(paste(pep.name,signif(wilcox.test( PAGE ~ PEP, data=temp )$p.value,digits=3), sep='\t'), file=paste(opt$outdir,'PAGE.txt', sep='/'), append=TRUE)
    print( paste(pep.name,signif(wilcox.test( PAGE ~ PEP, data=temp )$p.value,digits=3)) ); flush.console()
    
  } # End PEPs loop
} # End comps loop

print('Success!'); flush.console()

