#!/usr/bin/Rscript

# AUTHOR:	c.g.b 
#


#set for whether  the script is run locally or in the cluster (codon)
cluster.status = T 

#packages required locally 
if (cluster.status == F) {
  pack.lib = NULL
  packages = c("readr")
  mypath<- "/home/charlie/MelanomaProject/melanomaresistance/"
  
  
}else {pack.lib = "/hps/software/users/petsalaki/Rpackages/4_0_3/"
packages = c("crayon","BiocGenerics","S4Vectors","IRanges", 
             "TCGAbiolinks","GenomeInfoDb","GenomicRanges" ,
             "MatrixGenerics","SummarizedExperiment","DESeq2", "dorothea", 
             "org.Hs.eg.db", "readr", "viper", "purr", "dplyr", "gprofiler2", "glmGamPoi")
mypath<- "/nfs/research/petsalaki/users/cbarker/TCGA_BRCA/paper_days_to_death"
}
#import otherwise R wont know where all your dependencies are 
environment(.libPaths)$.lib.loc <- pack.lib

#load packages
suppressPackageStartupMessages(lapply(packages, require, lib.loc = pack.lib, character.only = TRUE))




#path of pathways to write
#path of pathway names to read
setwd(mypath)

#code kindly sent by Giannis Kamzolas

#  ---TF Activities---
# Inputs:
# 1) -Mouse regulon file (already created mouse network - we don't need to create it again and it's the same for the analysis of all the models)
#    -Human regulon file ------------//---------------------------//---------------------------//---------------------------//------------------
# 2) D.E. Signature (file from DESeq2 (with gene names in the last column))


df2regulon = function(df) {
  regulon = df %>%
    split(.$tf) %>%
    map(function(dat) {
      tf = dat %>% distinct(tf) %>% pull()
      targets = setNames(dat$mor, dat$target)
      likelihood = dat$likelihood
      list(tfmode =targets, likelihood = likelihood)
    })
  return(regulon)
}



my_TFA_plus_rap1 <- function(mypath, organism, outfile, Rap1_Mod) #mypath is deseq filename
{
  if(organism == "HUMAN")
    Regulon_file<- read.csv("~/phenotype_networks/data/DOROTHEA/human_network.csv", header=T) ###Open the human regulon file
  ###subset to the threshold - keep only the most confident TFs
  Regulon_file<- Regulon_file[Regulon_file$confidence=='A'| Regulon_file$confidence=='B'| Regulon_file$confidence=='C' | Regulon_file$confidence=='D',]
  DEsignature <- read.table(file = mypath, sep = ",", header = TRUE) #Read the DEGs file
  ens2symbol <- AnnotationDbi::select(org.Hs.eg.db, #add gene names
                                      key=DEsignature$X,
                                      columns="SYMBOL",
                                      keytype="ENSEMBL")
  #get symbol
  ens2symbol <- as_tibble(ens2symbol)
  DEsignature <- inner_join(DEsignature, ens2symbol, by=c("X"="ENSEMBL"))
  # Exclude probes with unknown or duplicated gene symbol
  DEsignature<-DEsignature[!(is.na(DEsignature$padj) | DEsignature$padj==""), ]
  # Estimatez-score values for the GES. Check VIPER manual for details
  myStatistics = matrix(DEsignature$log2FoldChange, dimnames = list(DEsignature$SYMBOL, 'log2FC') )
  myPvalue = matrix(DEsignature$padj, dimnames = list(DEsignature$SYMBOL, 'adj.P.Val') )
  mySignature = (qnorm(myPvalue/2, lower.tail = FALSE) * sign(myStatistics))[, 1]
  mySignature = mySignature[order(mySignature, decreasing = T)]
  # Estimate TF activities
  reg_rap<-df2regulon(Regulon_file)
  reg_rap$Rap1_Mod<-Rap1_Mod
  mrs = msviper(ges = mySignature, regulon = reg_rap, ges.filter = F, minsize = 4)
  
  
  TF_activities = data.frame(Regulon = names(mrs$es$nes),
                             Size = mrs$es$size[ names(mrs$es$nes) ],
                             NES = mrs$es$nes,
                             p.value = mrs$es$p.value,
                             FDR = p.adjust(mrs$es$p.value, method = 'fdr'))
  TF_activities = TF_activities[ order(TF_activities$p.value), ]
  # Save results
  if (organism == "HUMAN")
    write.csv(TF_activities, file = outfile)
}

#####MAIN####

#get data
data = readRDS(file = "../tcga_data.RDS")


#set attribute that you would like to study
variable<-"paper_BRCA_Subtype_PAM50"





#remove nas
data <- data[,!is.na(colData(data)[variable][,1])]

#Perform DE analysis


for (sample in colnames(data)[1:2]){
  colData(data)[variable][,1]<-"CONTROL"
  colData(data)[variable][rownames(colData(data)) == sample,] <- sample
  ddsSE <- DESeqDataSet(data, design = ~paper_BRCA_Subtype_PAM50)
  keep <- rowSums(counts(ddsSE)) >= 10
  ddsSE <- ddsSE[keep,]
  ddsSE <- DESeq(ddsSE)
  results_names<-resultsNames(ddsSE)
  print(results_names)
}
 # print(results)
  #important 
#  results<-gsub("/", "_", results)
 # res <- results(ddsSE, name = results)
  #dea <- as.data.frame(res)
  #summary(res)
  #write.csv(dea, file = paste(results, ".csv", sep=""))







