#This file obstains basic DEG overlap statistics, deg bar graphs, and the volcano plots

FCres <- read.csv("DEGage_C/DEGage_results/FC_dvenus_posvneg.csv")
NSres <- read.csv("DEGage_C/DEGage_results/NS_dvenus_posvneg.csv")
HCres <- read.csv("DEGage_C/DEGage_results/HC_dvenus_posvneg.csv")

rownames(FCres) <- FCres$X
FCres <- FCres[,-1]
rownames(NSres) <- NSres$X
NSres <- NSres[,-1]
rownames(HCres) <- HCres$X
HCres <- HCres[,-1]

for( i in 1:nrow(FCres)){
    if(FCres$pval[i] == 0){
      FCres$pval[i] <- 1*10^-50
    }
}
#if you wanna change lfc threshold:
lfc = 1
p = 0.05
FCres <- FCres[abs(FCres$lfc) > lfc,]
NSres <- NSres[abs(NSres$lfc) > lfc,]
HCres <- HCres[abs(HCres$lfc) > lfc,]

FCres <- FCres[FCres$permPvals <= 0.1,]
NSres <- NSres[NSres$permPvals <= 0,]
HCres <- HCres[HCres$permPvals <= 0,]

FCres <- FCres[!is.na(FCres$FDR),]
NSres <- NSres[!is.na(NSres$FDR),]
HCres <- HCres[!is.na(HCres$FDR),]

FCres <- FCres[!is.infinite(FCres$FDR),]
NSres <- NSres[!is.infinite(NSres$FDR),]
HCres <- HCres[!is.infinite(HCres$FDR),]

FCres <- FCres[FCres$FDR <= p,]
NSres <- NSres[NSres$FDR <= p,]
HCres <- HCres[HCres$FDR <= p,]

FCres <- FCres[-grep("Rik", rownames(FCres)),]
NSres <- NSres[-grep("Rik", rownames(NSres)),]
HCres <- HCres[-grep("Rik", rownames(HCres)),]


FC_ndegs <- nrow(FCres)
NS_ndegs <- nrow(NSres)
HC_ndegs <- nrow(HCres)

FC_up <- FCres[FCres$lfc > 0,]
FC_down <- FCres[FCres$lfc < 0,]
NS_up <- NSres[NSres$lfc > 0,]
NS_down <- NSres[NSres$lfc < 0,]
HC_up <- HCres[HCres$lfc > 0,]
HC_down <- HCres[HCres$lfc < 0,]

FC_nup <- nrow(FC_up)
FC_ndown <- nrow(FC_down)
NS_nup <- nrow(NS_up)
NS_ndown <- nrow(NS_down)
HC_nup <- nrow(HC_up)
HC_ndown <- nrow(HC_down)

###Comparison with RaoRuiz
rr_FC <- read.csv("analysis_v3_data/FC_orig.csv")
rr_NS <- read.csv("analysis_v3_data/NS_orig.csv")
rr_HC <- read.csv("analysis_v3_data/HC_orig.csv")

rr_FC <- rr_FC[abs(rr_FC$log2FoldChange) >lfc,]
rr_NS <- rr_NS[abs(rr_NS$log2FoldChange) >lfc,]
rr_HC <- rr_HC[abs(rr_HC$log2FoldChange) > lfc,]

rr_nFC <- nrow(rr_FC)
rr_nNS <- nrow(rr_NS)
rr_nHC <- nrow(rr_HC)

rr_FCup <- rr_FC[rr_FC$log2FoldChange > 0,]
rr_FCdown <- rr_FC[rr_FC$log2FoldChange < 0,]
rr_NSup <- rr_NS[rr_NS$log2FoldChange > 0,]
rr_NSdown <- rr_NS[rr_NS$log2FoldChange < 0,]
rr_HCup <- rr_HC[rr_HC$log2FoldChange > 0,]
rr_HCdown <- rr_HC[rr_HC$log2FoldChange < 0,]

rr_FCnup <- nrow(rr_FCup)
rr_FCndown <- nrow(rr_FCdown)
rr_NSnup <- nrow(rr_NSup)
rr_NSndown <- nrow(rr_NSdown)
rr_HCnup <- nrow(rr_HCup)
rr_HCndown <- nrow(rr_HCdown)

FC_nsame <- length(which((rr_FC$Gene.name %in% rownames(FCres))))
NS_nsame <- length(which((rr_NS$Gene.name %in% rownames(NSres))))
HC_nsame <- length(which((rr_HC$Gene.name %in% rownames(HCres))))
FC_propsame <- FC_nsame/nrow(rr_FC)
NS_propsame <- NS_nsame/NS_ndegs
HC_propsame <- HC_nsame/HC_ndegs

FCshared <- FCres[which(rownames(FCres) %in% rr_FC$Gene.name),]
FCdiff <- FCres[which(!(rownames(FCres) %in% rr_FC$Gene.name)),]
rr_FCdff <- rr_FC[which(!(rr_FC$Gene.name %in% rownames(FCres))),]

FCres <- FCres[order(FCres$pval),]
NSres <- NSres[order(NSres$pval),]
HCres <- HCres[order(HCres$pval),]

all.FC.lfcs<- read.csv("FC_lfc_calcs_all.csv")
FCres$genename = rownames(FCres)
rr_FC <- rr_FC[order(rr_FC$Gene.name),]
FCres <- FCres[order(FCres$genename),]
all.FC.lfcs <- all.FC.lfcs[order(all.FC.lfcs$X),]
rr_FC <- rr_FC[which(rr_FC$Gene.name %in% all.FC.lfcs$X),]
FCres <- FCres[which(FCres$genename %in% all.FC.lfcs$X),]


rr_FC$log2FoldChange<- all.FC.lfcs[which(all.FC.lfcs$X %in% rr_FC$Gene.name),]$log2FoldChange
rr_FC <- rr_FC[abs(rr_FC$log2FoldChange) > lfc,]

FCres$lfc <-  all.FC.lfcs[which(all.FC.lfcs$X %in% FCres$genename),]$log2FoldChange

FCres <- FCres[FCres$pval < p,]
FCres <- FCres[abs(FCres$lfc) > lfc,]

min_lfc = 6
top_FC <- FCres[abs(FCres$lfc) > min_lfc,]
top_Rao <- rr_FC[abs(rr_FC$log2FoldChange) > min_lfc,]
unique_FC <- top_FC$genename[!(top_FC$genename %in% top_Rao$Gene.name)]
unique_rr <- top_Rao$Gene.name[!(top_Rao$Gene.name %in% top_FC$genename)]
non_unique_FC <- top_FC$genename[(top_FC$genename %in% top_Rao$Gene.name)]
non_unique_rr <- top_Rao$Gene.name[(top_Rao$Gene.name %in% top_FC$genename)]

#DEG bar graph
up <- c(FC_nup, NS_nup, HC_nup)
down <- c(FC_ndown, NS_ndown, HC_ndown)
df <- data.frame(up = up, down = down)
df <- melt(df)
df$Condition <- c("FC", "NS", "HC","FC", "NS", "HC")

degplot <- ggplot(df, aes(x = Condition, y = value, fill = variable))+
  geom_bar(position = "stack", stat = "identity")+
  scale_fill_manual(values = c("darkred", "darkblue"))+
  theme_minimal()+
  labs(title ="DEGage")+
  theme(plot.title = element_text(hjust = 0.5), axis.title.x=element_blank(),axis.title.y=element_blank(), legend.title = element_blank())+
  ylim(0,2000)

up <- c(rr_FCnup, rr_NSnup, rr_HCnup)
down <- c(rr_FCndown, rr_NSndown, rr_HCndown)
df <- data.frame(up = up, down = down)
df <- melt(df)
df$Condition <- c("FC", "NS", "HC","FC", "NS", "HC")

rr_degplot <- ggplot(df, aes(x = Condition, y = value, fill = variable))+
  geom_bar(position = "stack", stat = "identity")+
  scale_fill_manual(values = c("darkred", "darkblue"))+
  theme_minimal()+
  labs(title ="Rao Ruiz")+
  theme(plot.title = element_text(hjust = 0.5), axis.title.x=element_blank(),axis.title.y=element_blank(), legend.title = element_blank())+
  ylim(0, 2000)

ggarrange(degplot, rr_degplot, common.legend = TRUE)
# Volcano plots
library(ggplot2)
FCpos <- ggplot(data = FCres, aes(x = lfc, y = -log(pval),color = lfc >0 ))+
  geom_point(size = 0.5, show.legend = FALSE)+
  geom_vline(xintercept = 0)+
  labs(x = "Log2 Fold Change", y = "-logP")+
  scale_color_manual(values = c("darkblue","darkred"))+
  geom_text(data = subset(FCres, genename %in% unique_FC), aes(label = genename), col = "gold4", check_overlap = FALSE, size = 3, nudge_x = .4)+
  geom_text(data = subset(FCres, genename %in% non_unique_FC), aes(label = genename), col = "black", check_overlap = FALSE, size = 3, nudge_x = .4)+
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5))+
  xlim(min_lfc,11)+
  ylim(0, 125)

FC_neg <- ggplot(data = FCres, aes(x = lfc, y = -log(pval),color = lfc >0 ))+
    geom_point(size = 0.5, show.legend = FALSE)+
    geom_vline(xintercept = 0)+
    labs(x = "Log2 Fold Change", y = "-logP")+
    scale_color_manual(values = c("darkblue","darkred"))+
    geom_text(data = subset(FCres, genename %in% unique_FC), aes(label = genename), col = "gold4", check_overlap = FALSE, size = 3, nudge_x = .4)+
    geom_text(data = subset(FCres, genename %in% non_unique_FC), aes(label = genename), col = "black", check_overlap = FALSE, size = 3, nudge_x = .4)+
    theme_classic()+
    theme(plot.title = element_text(hjust = 0.5))+
    xlim(-9,-min_lfc)+
    ylim(0, 125)

rr_pos <- ggplot(data = rr_FC, aes(x = log2FoldChange, y = -log(P.value),color = log2FoldChange >0 ))+
    geom_point(size = 0.5, show.legend = FALSE)+
    geom_vline(xintercept = 0)+
    labs(x = "Log2 Fold Change", y = "-logP")+
    scale_color_manual(values = c("darkblue","darkred"))+
    geom_text(data = subset(rr_FC, Gene.name %in% unique_rr), aes(label = Gene.name), col = "gold4", check_overlap = FALSE, size = 3, nudge_x = .4)+
    geom_text(data = subset(rr_FC, Gene.name %in% non_unique_rr), aes(label = Gene.name), col = "black", check_overlap = FALSE, size = 3, nudge_x = .4)+
    theme_classic()+
    theme(plot.title = element_text(hjust = 0.5))+
    xlim(min_lfc, 11)+
    ylim(0, 125)

rr_neg <- ggplot(data = rr_FC, aes(x = log2FoldChange, y = -log(P.value),color = log2FoldChange >0 ))+
    geom_point(size = 0.5, show.legend = FALSE)+
    geom_vline(xintercept = 0)+
    labs(x = "Log2 Fold Change", y = "-logP")+
    scale_color_manual(values = c("darkblue","darkred"))+
    geom_text(data = subset(rr_FC, Gene.name %in% unique_rr), aes(label = Gene.name), col = "gold4", check_overlap = FALSE, size = 3, nudge_x = .4)+
    geom_text(data = subset(rr_FC, Gene.name %in% non_unique_rr), aes(label = Gene.name), col = "black", check_overlap = FALSE, size = 3, nudge_x = .4)+
    theme_classic()+
    theme(plot.title = element_text(hjust = 0.5),
          panel.background = element_blank())+
    xlim(-9, -min_lfc)+
    ylim(0, 125)

library(patchwork)
library(ggpubr)
(FC_neg + FCpos)/ (rr_neg + rr_pos)

