  
  
  # library
  library(ggridges)
  library(ggplot2)
  library(purrr)
  # Libraries
  library(tidyverse)
  library(hrbrthemes)
  library(viridis)
  library(forcats)
  
  merge_list<-read.csv("~/phenotype_networks/sim_pathways.csv")
  filt.reactome.df<-read.csv("~/phenotype_networks/real_pathways.csv")
  
  
  table(as.numeric(map(strsplit(merge_list$Overlap, "/"), 1)))
  table(as.numeric(map(strsplit(filt.reactome.df$Overlap, "/"), 1)))
  
  real_module_pathways<-data.frame(Mod_name = filt.reactome.df$Mod_name,
                                  Term = filt.reactome.df$Term,
                                  ID = filt.reactome.df$ID,
                                  Overlap = filt.reactome.df$Overlap,
                                  P.value = filt.reactome.df$P.value,
                                  Adj.P.value = filt.reactome.df$Adj.P.value,
                                  Odds.Ratio = filt.reactome.df$Odds.Ratio,
                                  Genes = filt.reactome.df$Genes,
                                  T_F = "Real")
  real_module_pathways<-real_module_pathways[real_module_pathways$Adj.P.value < 0.05,]
  merge_list$X<-NULL
  merge_list$REACTOME_ID<-NULL
  merge_list<-data.frame(merge_list, T_F = "Simulated")
  hypothesis_testing<-rbind(real_module_pathways, merge_list)
  hypothesis_testing$No_of_TFs<-as.numeric(map(strsplit(hypothesis_testing$Overlap, "/"), 1))
  hypothesis_testing<-hypothesis_testing[hypothesis_testing$Term != "Signal Transduction Homo sapiens R-HSA-162582",]
  hypothesis_testing$minuslog2 = -log2(hypothesis_testing$Adj.P.value)
  hypothesis_testing#<-hypothesis_testing[hypothesis_testing$Adj.P.value < 0.1,]
  # Represent it
  
  NOTCH<-hypothesis_testing[grepl("NOTCH", hypothesis_testing$Term, fixed = TRUE),]
  hypothesis_testing<-hypothesis_testing
  # basic example
  hypothesis_testing %>%
    ggplot( aes(x=No_of_TFs, color=T_F, fill=T_F)) +
    geom_histogram(alpha=1, binwidth = 1) +
    scale_fill_viridis(discrete=TRUE) +
    scale_color_viridis(discrete=TRUE) +
    theme_ipsum() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    ) +
    xlab("") +
    ylab("Assigned Probability (%)")
  
  p <- hypothesis_testing %>%
    ggplot( aes(x=No_of_TFs, color=T_F, fill=T_F)) +
    geom_histogram(alpha=0.6, binwidth = 1) +
    scale_fill_viridis(discrete=TRUE) +
    scale_color_viridis(discrete=TRUE) +
    theme_ipsum() +
    xlab("Overlap of TFs with pathways") +
    ylab("Frequency") +
    facet_wrap(~Term)
  p
  
  ggplot(hypothesis_testing, aes(x=minuslog2, color=T_F)) +
    geom_density() +
    facet_wrap(~Term)
  
  #p.values based on distributions of pathways 
  pathway_adj_p<-c()
  
  for (row in c(1:nrow(real_module_pathways))) {
    pathway_adj_p<-c(pathway_adj_p, sum(real_module_pathways[row,]$Adj.P.value < merge_list[merge_list$Term == real_module_pathways[row,]$Term,]$Adj.P.value)/1000) #1,000 because thats the total number of peturbations 
  }
  
  #signal transduction is the highest for adjusted pvaleus and for offs ratio. odds ratio is weird because we havent introduced a cut off for TFs which we should. 
  
  pathway_adj_df<-real_module_pathways
  pathway_adj_df$pathway_adj_p<-pathway_adj_p
  
  pathway_adj_df[order(pathway_adj_df$pathway_adj_p, decreasing = T),]
  pathway_adj_df$pathway_adj_p_ms_adj<-p.adjust(pathway_adj_df$pathway_adj_p,method = "fdr")
  sup_table<-pathway_adj_df#[pathway_adj_df$pathway_adj_p_ms_adj < 0.05,]
  sup_table$T_F <- NULL
  sup_table$ID <- NULL
  write_csv(sup_table[order(sup_table$pathway_adj_p_ms_adj),], file = "~/barker_et_al_2021/REVISED_VERSION/SUPP_DATA/Supp_pathway_specific.csv")
  # Change dotsize and stack ratio
  library(ggrepel)
  ggplot(pathway_adj_df, aes(x=pathway_adj_p, y=Adj.P.value)) + 
    geom_point(binaxis='y', stackdir='center',
                 stackratio=1.5, dotsize=1.2) + 
    geom_label_repel(aes(label = Term),
                     box.padding   = 0.35, 
                     point.padding = 0.5,
                     segment.color = 'grey50') +
    theme_classic()