library(gprofiler2)

cell.shape.path<-c("/home/charlie/cell_shapes/trrust/tfFORmodules/activateORinhibit_2/")
trrust_raw<-read.delim("~/cell_shapes/trrust/tfFORmodules/trrust_rawdata.human.tsv", header = F)
source("~/cell_shapes/scripts/get_wgcna.R")
module_genes<-get_wgcna(path_wgcna = "~/cell_shapes/genesprmodule/ALLgenesprmodule.tab", 
                       path_correlations = "~/cell_shapes/genesprmodule/correlations.txt", 
                       network = edgelist,
                       is.full = T)
modules<-list.files(path=cell.shape.path, pattern="*.txt", full.names=TRUE, recursive=FALSE)
table<-data.frame()
for (module in modules) {
  tf.file<-suppressMessages(read_delim(module, delim = "\t", col_names = FALSE))
  table_module<-data.frame(tf.file, module = module)
  table<-rbind(table, table_module)
}  
sig_table<-table[table$X3 < 0.1,]
sig_table$module<-unlist(lapply(strsplit(sig_table$module, "//"), tail, n = 1L)) #get just module name 
sig_table$module<-unlist(lapply(strsplit(sig_table$module, "_"), head, n = 1L)) #get just module name 
targets<-c()
for (n in c(1:nrow(sig_table))) {
  tf_AI<-strsplit(sig_table$X1[n], "_")
  tf<-trrust_raw[trrust_raw$V1 == unlist(tf_AI)[1],]
  targets<-c(targets, paste(intersect(module_genes[sig_table$module[n]][[1]], tf$V2), collapse = ", "))
}
sig_table$targets <- targets #some only have one overlap - i wonder whether it is a conversion problem 
sig_table$TF <- unlist(lapply(strsplit(sig_table$X1, "_"), head, n = 1L))
sig_table$sign <- unlist(lapply(strsplit(sig_table$X1, "_"), tail, n = 1L))
sig_table$X1 <- NULL
sig_table$X4 <- NULL
#if there are duplicate activation/inhibition for a transcription factor, then that is because they are annotated as having unknown sign in TRRUST. 
#Hence remove these, re-assign their sign as "Unknown" and remove the duplicate (the one with the smallest FDR)
index<-c()
for (unique_tf in unique(sig_table$TF)) {
  sig_tf<-sig_table[sig_table$TF == unique_tf,]
  if (length(unique(sig_tf$module)) == 1 & length(unique(sig_tf$sign)) > 1) {
    sig_table[sig_table$TF == unique_tf,]$sign = "Unknown"
    index <- c(index, which(sig_table$TF == unique_tf & sig_table$X3 == min(sig_tf$X3))[1])
  }
}
sup_tf_table<-sig_table[-index,]
newname<-read.csv("~/cell_shapes/data/module_names.csv")
sup_tf_table$module<-newname[match(sup_tf_table$module, newname$ME_names),]$new_name
colnames(sup_tf_table) <- c("P value", "adj P value", "Module Name", "TF targets", "TF", "Annotated Sign")
end_table<-sup_tf_table[, c("Module Name", "TF", "TF targets", "Annotated Sign", "P value", "adj P value")]
end_table[order(end_table$`adj P value`),]

full_module_enrich<-read.csv("~/cell_shapes/data/full_module_names.csv")
module_name_supp<-read.csv("~/cell_shapes/data/module_names.csv")
out_sup<-data.frame()
for (n in c(1:nrow(module_name_supp))) {
  enriched_db<-full_module_enrich[which(full_module_enrich$module == module_name_supp$ME_names[n] & full_module_enrich$top_db == module_name_supp$db[n]),]
  out_sup<-rbind(out_sup, data.frame(enriched_db))
}
out_sup$NewName<-module_name_supp$new_name[match(out_sup$module, module_name_supp$ME_names)]

write_csv(x = end_table[order(end_table$`adj P value`),], file = "~/cell_shapes/manuscript/figures/cell shape figures/supplementary/STable1.csv")
write_csv(x = out_sup, file = "~/cell_shapes/manuscript/figures/cell shape figures/supplementary/STable2.csv")

