      
      # Performs heat diffusion from activated transcription factors in different cell shapes using the derived regulatory network 
      # 
      # AUTHOR:	C.Barker
      # INPUT: 
      #          csn_semsim.txt,          network in question with semantic similarity as edge weights. 
      #          module_names.csv,        csv converting default module names from wgcna to ones with biological function
      #          basal.csv                logical - do you want the full list of wgna modules
      #
      # OUTPUT:
      #          heat diffusion plots     both on all the seeds at once and seperately 
      #          enrichment results       enrichment of terms in 'hot' nodes 
      
      
      library(diffusr)
      library(dnet)
      library(dils)
      library(readr)
      library(org.Hs.eg.db)
      library(dplyr)
      
      ####FUNCTIONS####
      
      #function to prep seeds 
      #input: raw seeds in named double variable and adjacency matrix from AdjacencyFromEdgelist()
      prep.seeds <- function(raw.seeds, adj) {
        raw.seeds<-sqrt(raw.seeds^2)
        heat.initial<-data.frame(adj$nodelist, 0)
        for (i in seq(1:nrow(heat.initial))) {
          if (heat.initial$adj.nodelist[i] %in% names(raw.seeds)) {
            heat.initial$X0[i]<-as.numeric(raw.seeds[as.character(heat.initial$adj.nodelist[i])])
          }
          else {
            heat.initial$X0[i] <- 0 
          }
        }
        heat.initial$X0[is.na(heat.initial$X0)] <- 0
        heat.out<-heat.initial$X0
        names(heat.out)<-adj$nodelist
        return(heat.out)
      }
      
      #function to shuffle graph, preserving degree distribution - used in calculating pvalue.
      #inputs graph as igraph object to be shuffled.
      #seeds in the form of two columns - first upregulated and one downregulated - both non-negative. 
      shuffle.graph <- function(graph, seeds) {    
        rewired.graph<-rewire(graph, with = keeping_degseq(niter = vcount(graph) * 10))
        PT <- suppressMessages(dRWR(rewired.graph, normalise="none", setSeeds=seeds,
                                    restart=0.95,parallel = FALSE,verbose = FALSE))
        up<-as.numeric(PT[,1])
        down<-as.numeric(PT[,2])
        shuffle.results<-data.frame(up, down)
        return(shuffle.results)
      }
      
      #function to give pvalues given our results using graph rewirng 
      #the disadvantage of this is that the rewiring doesnt take into account edge weight. 
      #inputs: results from heat diffusion, adjacency matrix from function
      # AdjacencyFromEdgelist(), igraph object and seeds. 
      pvalues.rewire.graph <- function(results, adj, graph, seeds, niter) {
        A <- as_adjacency_matrix(graph, type = "both", names = TRUE, sparse = FALSE) #prep graph. 
        #A[A != 0] <- 1
        graph.unweighted<-graph_from_adjacency_matrix(A,weighted = NULL)
        
        up.matrix<-matrix(nrow = length(adj$nodelist),ncol = niter) #set up empty matrix
        down.matrix<-matrix(nrow = length(adj$nodelist),ncol = niter) #set down empty matrix
        for (i in seq(1,niter)) { #there must be a faster way to do this 
          shuffle.results <- shuffle.graph(graph.unweighted, seeds)
          up.matrix[,i]<-as.numeric(shuffle.results[,1])
          down.matrix[,i]<-as.numeric(shuffle.results[,2])
          cat("\r", "iteration:", i)
        }
        node.names<-c()
        p.values.up<-c()
        p.values.down<-c()
        for (i in 1:nrow(results)) {
          node.names<-c(node.names, as.character(results$NODE[i]))
          p.values.up<-c(p.values.up, ((sum(up.matrix[i,] > results$UP[i],na.rm = TRUE))/niter))
          p.values.down<-c(p.values.down, ((sum(down.matrix[i,] > results$DOWN[i],na.rm = TRUE))/niter))
        }
        names(p.values.up)<-node.names
        names(p.values.down)<-node.names
        p.values<-data.frame(p.values.up, p.values.down)
        return(p.values)
      }
      
      #function to give pvalues given our results using seed randomisation  
      #the disadvantage of this is that will account for hubs, but not the seeds themselves. 
      #inputs: results from heat diffusion, adjacency matrix from function
      # AdjacencyFromEdgelist(), igraph object and seeds. 
      pvalues.seed.shuffle <- function(results, adj, graph, seeds, niter) {
        
        up.matrix<-matrix(nrow = length(adj$nodelist),ncol = niter) #set up empty matrix
        down.matrix<-matrix(nrow = length(adj$nodelist),ncol = niter) #set up empty matrix
        for (i in seq(1,niter)){
          seeds.shuffled<-seeds[sample(nrow(seeds)),]   #randomise seeds (keeping the same value)
          rownames(seeds.shuffled)<-rownames(seeds)     #reassign rownames, so different nodes get different seeds 
          PT <- suppressMessages(dRWR(graph, normalise="none", setSeeds=seeds.shuffled, #perform random walk with restart from shuffled seeds. 
                                      restart=0.95,parallel = FALSE,verbose = FALSE))
          up<-as.numeric(PT[,1]) 
          down<-as.numeric(PT[,2])
          shuffle.results<-data.frame(up, down)
          up.matrix[,i]<-as.numeric(shuffle.results[,1]) #assign random walks to corresponding matrix
          down.matrix[,i]<-as.numeric(shuffle.results[,2])
          cat("\r", "iteration:", i)
        }
        
        p.values.up<-c()
        p.values.down<-c()
        
        for (i in 1:nrow(results)) { #go through rows of results (by protein)
          p.values.up<-c(p.values.up, ((sum(up.matrix[i,] >= results$UP[i],na.rm = TRUE))/niter))
          p.values.down<-c(p.values.down, ((sum(down.matrix[i,] >= results$DOWN[i],na.rm = TRUE))/niter))
        }
        names(p.values.up)<-results$NODE
        names(p.values.down)<-results$NODE
        p.values<-data.frame(p.values.up, p.values.down)
        return(p.values)
      }
      
      
      #path of pathways to write
      mypath<-"/home/charlie/phenotype_networks/data/" 
      setwd(mypath)
      
      #path of pathway names to read
      edgelist<-data.frame(read_delim("../network_results/csn_semsim.txt", delim = "\t")) #get our graph. usually ./totalNetwork.txt 
      #replace old wgcna module names with new ones . 
      mod_name_convert<-read_csv("~/cell_shapes/data/module_names.csv",col_names = T)
      edgelist$from[edgelist$from %in% mod_name_convert$ME_names] <- mod_name_convert$new_name[match(edgelist$from[edgelist$from %in% mod_name_convert$ME_names],
                                                                                                     mod_name_convert$ME_names)]
      edgelist$to[edgelist$to %in% mod_name_convert$ME_names] <- mod_name_convert$new_name[match(edgelist$to[edgelist$to %in% mod_name_convert$ME_names],
                                                                                                 mod_name_convert$ME_names)]
      
      adj<-AdjacencyFromEdgelist(edgelist) #turn into adjacency
      
      
      #####DOROTHEA#### 
      tfa.file<-c("~/phenotype_networks/data/DOROTHEA/luminal.csv") #get condition 
      tfa.df<-suppressMessages(read_delim(tfa.file, delim = ",", col_names = TRUE)) #read
      
      
      #we do two seperate random walks, one for upregulated and one for down regulated 
      
      #upregulated
      
      nes.heat.up<-tfa.df$NES[tfa.df$FDR < 0.05 & tfa.df$NES > 0.1] #here you decide whether to look up heat diffusuion of up regulated genes or down regulated genes 
      names(nes.heat.up)<-tfa.df$Regulon[tfa.df$FDR < 0.05 & tfa.df$NES > 0.1] #here too (change the </>)
      nes.heat.up<-prep.seeds(nes.heat.up, adj)
      #downregulated
      
      nes.heat.down<-tfa.df$NES[tfa.df$FDR < 0.05 & tfa.df$NES < -0.1] #here you decide whether to look down heat diffusuion of down regulated genes or down regulated genes 
      names(nes.heat.down)<-tfa.df$Regulon[tfa.df$FDR < 0.05 & tfa.df$NES < -0.1] #here too (change the </>)
      nes.heat.down<-prep.seeds(nes.heat.down, adj)
      
      #create igraph object from edgelist 
      graph<-IgraphFromEdgelist(edgelist,directed = TRUE)
      #addnames
      V(graph)$name <- as.character(adj$nodelist)
      
      ####SEED MANAGEMENT####
      setSeeds.dorothea <- data.frame(nes.heat.up, nes.heat.down)
      seeds.used<-setSeeds.dorothea  #set seeds here  <---------
      #write.csv(x = seeds.used,file = "../data/seeds.csv")
      
      #our problem with dorothea has got to be with out probability. we should be filtering out these common results. 
      ####RANDOM WALK####
      PTmatrix <- dRWR(g=graph, normalise="none", setSeeds=seeds.used,
                       restart=0.95,parallel = FALSE,verbose = FALSE,normalise.affinity.matrix = 'none')
      
      dim(as.matrix(PTmatrix[PTmatrix != 0]))
      h1<-as.data.frame(as.matrix(PTmatrix))
      h1<-data.frame(adj$nodelist,h1)
      colnames(h1) <- c("NODE", "UP", "DOWN") #heat scores as probabilitiy 
      
      
      
      ####SIGNIFANCE TESTING####
      #compute pvalues by randomising graph, keeping an identical degree distribution
      niter<-1000
      pvalues.res<-pvalues.seed.shuffle(h1, adj, graph, seeds.used, niter)
      pvalues.res[seeds.used != 0]<-1 #get rid of heat from seed nodes 
      results<-data.frame(h1,pvalues.res, pvalues.res<0.1)
      for (i in c(1:nrow(results))) {
        if(results[i,]$p.values.up.1 == FALSE){ #If pvalue is not signifacnt set to zero for up
          results[i,]$UP<-0
        }
        if(results[i,]$p.values.down.1 == FALSE){ #for down
          results[i,]$DOWN<-0
        }
        #if node is terminal with no inputs (only outputs) remove (so long as its also not in the seeds )  
        if(length(incident(graph, results[i,]$NODE, mode = "in")) == 0){
          results[i,]$UP<-0
          results[i,]$DOWN<-0
        }
      }
      
      library(reshape2)
      library(ggplot2)
      bar.plot<-data.frame(results$NODE, results$UP, results$DOWN)
      colnames(bar.plot)<- c("NODE", "UP", "DOWN")
      bar.plot<-bar.plot[bar.plot$UP != 0 | bar.plot$DOWN != 0,]
      df2 <- reshape2::melt(bar.plot, id.vars='NODE')
      ggplot(df2, aes(x=reorder(NODE, -value), y=value, fill=variable)) +
        geom_bar(stat='identity', position='dodge') + theme(axis.text.x = element_text(angle = 90, hjust = 1))
      
      ####enrichment#####
      
      library(BBmisc)
      library(enrichR)
      dbs <- listEnrichrDbs()
      df3 <- df2[df2$value != 0,]
      dbs <- "Panther_2016"
      enriched_prizes<- enrichr(as.character(df3$NODE), dbs)
      bound_enriched<-bind_rows(enriched_prizes, .id = "libraryName")
      bound_enriched<-bound_enriched[bound_enriched$Adjusted.P.value < 0.01 & bound_enriched$Combined.Score > 100,]
      ggplot(bound_enriched, aes(reorder(Term, Combined.Score), Combined.Score), Combined.Score) +
        geom_col(aes(fill=Adjusted.P.value<0.01)) +
        coord_flip() +
        labs(x="Regulon", y="Normalized Enrichment Score",
             title="Regulons") + 
        theme_minimal()
      
      
      ##notes : Most obvious promising thing is the heat diffused to the expression module 
      
      ####one-by-one heat diffion####
      
      one.by.one<- data.frame(rownames(setSeeds.dorothea), setSeeds.dorothea$nes.heat.up, setSeeds.dorothea$nes.heat.down)
      m.one.by.one.down<-dcast(one.by.one, rownames.setSeeds.dorothea. ~ rownames.setSeeds.dorothea., value.var = "setSeeds.dorothea.nes.heat.down")
      m.one.by.one.up<-dcast(one.by.one, rownames.setSeeds.dorothea. ~ rownames.setSeeds.dorothea., value.var = "setSeeds.dorothea.nes.heat.up")
      m.one.by.one.down[is.na(m.one.by.one.down)]<-0
      m.one.by.one.up[is.na(m.one.by.one.up)]<-0
      rownames(m.one.by.one.up)<-m.one.by.one.up$rownames.setSeeds.dorothea.
      rownames(m.one.by.one.down)<-m.one.by.one.down$rownames.setSeeds.dorothea.
      
      m.one.by.one.up$rownames.setSeeds.dorothea.<-NULL
      m.one.by.one.down$rownames.setSeeds.dorothea.<-NULL
      
      m.one.by.one.up<-m.one.by.one.up[,as.logical(colSums(m.one.by.one.up != 0) != 0)]
      m.one.by.one.down<-m.one.by.one.down[,as.logical(colSums(m.one.by.one.down != 0) != 0)]
      
      
      PTmatrixup <- dRWR(g=graph, normalise="none", setSeeds=m.one.by.one.up,
                       restart=0.95,parallel = FALSE,verbose = FALSE)
      PTmatrixdown <- dRWR(g=graph, normalise="none", setSeeds=m.one.by.one.down,
                         restart=0.95,parallel = FALSE,verbose = FALSE)
      
      h1_up<-as.data.frame(as.matrix(PTmatrixup))
      colnames(h1_up)<-colnames(m.one.by.one.up)
      h1_down<-as.data.frame(as.matrix(PTmatrixdown))
      colnames(h1_down)<-colnames(m.one.by.one.down)
      
      h1_up$ID<-rownames(m.one.by.one.up)
      h1_down$ID<-rownames(m.one.by.one.down)
      
      sigs<-data.frame(node = results$NODE, up = results$p.values.up.1, down = results$p.values.down.1)
      h1_up<-merge(x = h1_up, y = sigs, by.x = "ID", by.y = "node")
      h1_up<-h1_up[h1_up$up,] #seeds in 
      h1_down<-merge(x = h1_down, y = sigs, by.x = "ID", by.y = "node")
      h1_down<-h1_down[h1_down$down,] #seeds in 
      
      h1_up$up<-NULL
      h1_up$down<-NULL
      h1_down$up<-NULL
      h1_down$down<-NULL
      
      sig.subset.up<-h1_up#[h1_up$ID %in% df3$NODE,]
      sig.subset.down<-h1_down#[h1_down$ID %in% df3$NODE,]
      melt.up<-reshape2::melt(sig.subset.up, id="ID") 
      melt.down<-reshape2::melt(sig.subset.down, id="ID") 
      melt.up$sign<-"up"
      melt.down$sign<-"down"
      melt.heat<-rbind(melt.up, melt.down)
      #put stars on the supernodes. 
      ggplot(data = melt.heat, aes(x=variable, y = reorder(ID,value), size = value, color = sign)) + 
        geom_point()  + scale_size(range = c(0, 5))
      E(graph)[ from("Rap1 signaling") ]
      E(graph)[ to("Rap1 signaling") ]
      graph_unweighted<-graph
      E(graph_unweighted)$weight<-sqrt((E(graph)$weight)^2)
      node_of_interest <- "Rap1 signaling"
      paths<-get.all.shortest.paths(graph = graph_unweighted,from = node_of_interest,
                             to = as.character(melt.heat[melt.heat$ID == node_of_interest & melt.heat$value != 0,]$variable))
      list_subgraphs<-list()
      for (n in c(1:length(paths$res))) {
        sub_graph<-induced_subgraph(graph, paths$res[[n]])
        list_subgraphs[[n]]<-sub_graph
      }
      all_paths_graph<-do.call(igraph::union, list_subgraphs)
      V(all_paths_graph)$NES<-0
      tf_NES<-tfa.df[tfa.df$Regulon %in% as_ids(V(all_paths_graph)),]
      V(all_paths_graph)$NES[match(tf_NES$Regulon, as_ids(V(all_paths_graph)))]<-tf_NES$NES
      plot(all_paths_graph)
      library(RCy3)
            
      createNetworkFromIgraph(all_paths_graph,"heat_sub_graph")
