#Analysis V3 compares dVenus+ vs. neg, this analysis compares FC to NS and HC, with both pooled and separated samples

setwd("~/Documents/DEGage_stuff/DEGage_Testing/RaoRuiz")
library(stringi)
library(stringr)
library(DEGage)
library(DESeq2)
df <- read.delim("GSE129024_counts_per_gene_sample.txt", sep = "\t")

FC <- df[,substr(colnames(df), 1, 2)=="FC"]
NS <- df[,substr(colnames(df), 1, 2)=="NS"]
HC <- df[,substr(colnames(df), 1, 2)=="HC"]

FCpos <- FC[,substr(colnames(FC), 4,4) == "G"]
FCneg <- FC[,substr(colnames(FC), 4,4) == "N"]
NSpos <- NS[,substr(colnames(NS), 4,4) == "G"]
NSneg <- NS[,substr(colnames(NS), 4,4) == "N"]
HCpos <- HC[,substr(colnames(HC), 4,4) == "G"]
HCneg <- HC[,substr(colnames(HC), 4,4) == "N"]

#######performing differential expression analysis with DEgage
#i didnt iterate it, suck my ass it was faster to copy and paste
group <- factor(c(rep(1,ncol(FC)), rep(2, ncol(NS))))
FCvNS <- DEGage(cbind(FC, NS), group = group, perm.preprocess = FALSE)

group <- factor(c(rep(1,ncol(FC)), rep(2, ncol(HC))))
FCvHC <- DEGage(cbind(FC, HC), group = group, perm.preprocess = FALSE)

group <- factor(c(rep(1,ncol(FCpos)), rep(2, ncol(NSpos))))
FCposvNSpos <- DEGage(cbind(FCpos, NSpos), group = group, perm.preprocess = FALSE)

group <- factor(c(rep(1,ncol(FCpos)), rep(2, ncol(HCpos))))
FCposvHCpos <- DEGage(cbind(FCpos, HCpos), group = group, perm.preprocess = FALSE)

group <- factor(c(rep(1,ncol(FCneg)), rep(2, ncol(NSneg))))
FCnegvNSneg <- DEGage(cbind(FCneg, NSneg), group = group, perm.preprocess = FALSE)

group <- factor(c(rep(1,ncol(FCneg)), rep(2, ncol(HCneg))))
FCnegvHCneg <- DEGage(cbind(FCneg, HCneg), group = group, perm.preprocess = FALSE)

write.csv(FCvNS,"DEGage_C/DEGage_results/FCvNS.csv")
write.csv(FCvHC,"DEGage_C/DEGage_results/FCvHC.csv")
write.csv(FCposvNSpos,"DEGage_C/DEGage_results/FCposvNSpos.csv")
write.csv(FCposvHCpos,"DEGage_C/DEGage_results/FCposvHCpos.csv")
write.csv(FCnegvNSneg,"DEGage_C/DEGage_results/FCnegvNSneg.csv")
write.csv(FCnegvHCneg,"DEGage_C/DEGage_results/FCnegvHCneg.csv")

#analysis with deseq2 for lfc values

d <- cbind(FC,NS)
filler <- matrix(c(rep("pos",ncol(FC)),
                   rep("neg",ncol(NS))),
                 nrow =ncol(d),
                 dimnames = list(colnames(d), 'Group'))
DESeqobj <- DESeqDataSetFromMatrix(countData = d, colData = filler, design = ~Group)
Deseqres <- DESeq(DESeqobj)
FCvNSlfc<- as.data.frame(results(Deseqres))

d <- cbind(FC,HC)
filler <- matrix(c(rep("pos",ncol(FC)),
                   rep("neg",ncol(HC))),
                 nrow =ncol(d),
                 dimnames = list(colnames(d), 'Group'))
DESeqobj <- DESeqDataSetFromMatrix(countData = d, colData = filler, design = ~Group)
Deseqres <- DESeq(DESeqobj)
FCvHClfc<- as.data.frame(results(Deseqres))

d <- cbind(FCpos,NSpos)
filler <- matrix(c(rep("pos",ncol(FCpos)),
                   rep("neg",ncol(NSpos))),
                 nrow =ncol(d),
                 dimnames = list(colnames(d), 'Group'))
DESeqobj <- DESeqDataSetFromMatrix(countData = d, colData = filler, design = ~Group)
Deseqres <- DESeq(DESeqobj)
FCposvNSposlfc<- as.data.frame(results(Deseqres))

d <- cbind(FCpos,HCpos)
filler <- matrix(c(rep("pos",ncol(FCpos)),
                   rep("neg",ncol(HCpos))),
                 nrow =ncol(d),
                 dimnames = list(colnames(d), 'Group'))
DESeqobj <- DESeqDataSetFromMatrix(countData = d, colData = filler, design = ~Group)
Deseqres <- DESeq(DESeqobj)
FCposvHCposlfc<- as.data.frame(results(Deseqres))

d <- cbind(FCneg,NSneg)
filler <- matrix(c(rep("pos",ncol(FCneg)),
                   rep("neg",ncol(NSneg))),
                 nrow =ncol(d),
                 dimnames = list(colnames(d), 'Group'))
DESeqobj <- DESeqDataSetFromMatrix(countData = d, colData = filler, design = ~Group)
Deseqres <- DESeq(DESeqobj)
FCnegvNSneglfc<- as.data.frame(results(Deseqres))

d <- cbind(FCneg,HCneg)
filler <- matrix(c(rep("pos",ncol(FCneg)),
                   rep("neg",ncol(HCneg))),
                 nrow =ncol(d),
                 dimnames = list(colnames(d), 'Group'))
DESeqobj <- DESeqDataSetFromMatrix(countData = d, colData = filler, design = ~Group)
Deseqres <- DESeq(DESeqobj)
FCnegvHCneglfc<- as.data.frame(results(Deseqres))

write.csv(FCvNSlfc,"DEGage_C/DEGage_results/lfcs/FCvNS.csv")
write.csv(FCvHClfc,"DEGage_C/DEGage_results/lfcs/FCvHC.csv")
write.csv(FCposvNSposlfc,"DEGage_C/DEGage_results/lfcs/FCposvNSpos.csv")
write.csv(FCposvHCposlfc,"DEGage_C/DEGage_results/lfcs/FCposvHCpos.csv")
write.csv(FCnegvNSneglfc,"DEGage_C/DEGage_results/lfcs/FCnegvNSneg.csv")
write.csv(FCnegvHCneglfc,"DEGage_C/DEGage_results/lfcs/FCnegvHCneg.csv")

####START HERE ONCE ABOVE DATA IS COLLECTED
##Now that that's done, reading this shit all back
#I will do this in an iterated manner, will probably thank myself for it later

lfcfile <- list.files("DEGage_C/DEGage_results/lfcs/")

#pulling out degs, tagging lfcs onto file
#params: lfc >2, FDR < 0.05, permPval = 0
dfs <- list()
for(i in 1:length(lfcfile)){
  x <- read.csv(paste("DEGage_C/DEGage_results/lfcs/",lfcfile[i], sep = ""))
  dfs[[i]] <- read.csv(paste("DEGage_C/DEGage_results/",lfcfile[i], sep = ""))
  dfs[[i]] <- dfs[[i]][!is.na(dfs[[i]]$pval),]
  dfs[[i]] <- dfs[[i]][dfs[[i]]$FDR < 0.05,]
  dfs[[i]] <- dfs[[i]][dfs[[i]]$permPvals == 0,]
  x <- x[which(x$X %in% dfs[[i]]$X),]
  dfs[[i]]$lfc <- x$log2FoldChange
  dfs[[i]] <- dfs[[i]][abs(dfs[[i]]$lfc) > 2,]
  dfs[[i]] <- dfs[[i]][-grep("Rik", dfs[[i]]$X),]
  write.csv(dfs[[i]],paste("DEGage_C/","Degs_",lfcfile[i], sep = ""))
}

#finding genes that are commonly differentially expressed in FCvsNS and FCvsHC
sharedgenes <- dfs[[5]][which(dfs[[5]]$X %in% dfs[[6]]$X),]
sharedgenes$lfc2 <- dfs[[6]][dfs[[6]]$X %in% sharedgenes$X,]$lfc
sharedgenes <- sharedgenes[-grep("Rik", sharedgenes$X),]
networkgenes %in% sharedgenes$X

sharedgenespos <- dfs[[1]][which(dfs[[1]]$X %in% dfs[[2]]$X),]
sharedgenespos$lfc2 <- dfs[[2]][dfs[[2]]$X %in% sharedgenespos$X,]$lfc
sharedgenespos <- sharedgenespos[-grep("Rik", sharedgenespos$X),]

sharedgenesneg <- dfs[[3]][which(dfs[[3]]$X %in% dfs[[4]]$X),]
sharedgenesneg$lfc2 <- dfs[[4]][dfs[[4]]$X %in% sharedgenesneg$X,]$lfc
sharedgenesneg <- sharedgenesneg[-grep("Rik", sharedgenesneg$X),]

write.csv(sharedgenes, "AnalysisV4/sharedgenes.csv")
write.csv(sharedgenespos, "AnalysisV4/sharedgenespos.csv")
write.csv(sharedgenesneg, "AnalysisV4/sharedgenesneg.csv")
write.csv(sharedgenespos[!(sharedgenespos %in% sharedgenesneg)], "AnalysisV4/sharedpos_neg_elim.csv")

write.csv(as.character(sharedgenes$X), "AnalysisV4/sharedgenes_genelist.csv")
write.csv(sharedgenespos$X, "AnalysisV4/sharedgenespos_genelist.csv")
write.csv(sharedgenesneg$X, "AnalysisV4/sharedgenesneg_genelist.csv")
write.csv(sharedgenespos[!(sharedgenespos %in% sharedgenesneg)]$X, "AnalysisV4/sharedpos_neg_elim_genelist.csv")


#DID NETOWRK ANALYSIS IN STRING, IMPORTING IT BACK IN
networkgenes <- read.table("AnalysisV4/FCvAllnetwork.csv")$V1
plotdf <- sharedgenes[sharedgenes$X %in% networkgenes,]
plotdf <- plotdf[,c(1,11,12)]

#generating network genes
dfs <- list()
for(i in 1:length(resfile)){
  x <- read.csv(paste("AnalysisV4/lfcs/",lfcfile[i], sep = ""))
  dfs[[i]] <- read.csv(paste("AnalysisV4/DEGageres/",lfcfile[i], sep = ""))
  x <- x[which(x$X %in% dfs[[i]]$X),]
  dfs[[i]]$lfc <- x$log2FoldChange
}

plotdf <- dfs[[5]][,c(1,10,11)]
plotdf <- plotdf[plotdf$X %in% dfs[[6]]$X,]
plotdf$FDR2 <- dfs[[6]][which(dfs[[6]]$X %in% plotdf$X),]$FDR
plotdf$lfc2 <- dfs[[6]][which(dfs[[6]]$X %in% plotdf$X),]$lfc
plotdfall <- plotdf[plotdf$X %in% networkgenes,]
colnames(plotdfall) <- c("Gene", "FCvHC.FDR", "FCvHC.lfc","FCvNS.FDR", "FCvNS.lfc" )

plotdf <- dfs[[3]][,c(1,10,11)]
plotdf <- plotdf[plotdf$X %in% dfs[[4]]$X,]
plotdf$FDR2 <- dfs[[4]][which(dfs[[4]]$X %in% plotdf$X),]$FDR
plotdf$lfc2 <- dfs[[4]][which(dfs[[4]]$X %in% plotdf$X),]$lfc
plotdfpos <- plotdf[plotdf$X %in% networkgenes,]
colnames(plotdfpos) <- c("Gene", "FCvHC.FDR", "FCvHC.lfc","FCvNS.FDR", "FCvNS.lfc" )

plotdf <- dfs[[1]][,c(1,10,11)]
plotdf <- plotdf[plotdf$X %in% dfs[[2]]$X,]
plotdf$FDR2 <- dfs[[2]][which(dfs[[2]]$X %in% plotdf$X),]$FDR
plotdf$lfc2 <- dfs[[2]][which(dfs[[2]]$X %in% plotdf$X),]$lfc
plotdfneg <- plotdf[plotdf$X %in% networkgenes,]
colnames(plotdfneg) <- c("Gene", "FCvHC.FDR", "FCvHC.lfc","FCvNS.FDR", "FCvNS.lfc" )



lfcplotall <- plotdfall[,c(1,3,5)]
lfcplotneg <- plotdfneg[,c(1,3,5)]
lfcplotpos <- plotdfpos[,c(1,3,5)]

library(reshape2)
library(ggplot2)
lfcplotall <- melt(lfcplotall, value.name = "Gene")
lfcplotneg <- melt(lfcplotneg, value.name = "Gene")
lfcplotpos <- melt(lfcplotpos, value.name = "Gene")

colnames(lfcplotall) <- c("Gene", "Condition", "LFC")
colnames(lfcplotneg) <- c("Gene", "Condition", "LFC")
colnames(lfcplotpos) <- c("Gene", "Condition", "LFC")

vec <- lfcplotall$Condition
lfcplotall$Condition <- c(rep("FC v HC",length(vec[grep("HC", vec)])),
                         rep("FC v NS",length(vec[grep("NS", vec)])))
vec <- lfcplotneg$Condition
lfcplotneg$Condition <- c(rep("FC v HC",length(vec[grep("HC", vec)])),
                         rep("FC v NS",length(vec[grep("NS", vec)])))
vec <- lfcplotpos$Condition
lfcplotpos$Condition <- c(rep("FC v HC",length(vec[grep("HC", vec)])),
                          rep("FC v NS",length(vec[grep("NS", vec)])))


merged <- cbind(plotdfall[,c(1,3,5)] ,plotdfpos[,c(3,5)],plotdfneg[,c(3,5)])
colnames(merged) <- c("Gene", "FCvHC", "FCvNS", "FCvHCpos", "FCvNSpos", "FCvHCneg", "FCvNSneg")
merged<-melt(merged, value.name = "Gene")
colnames(merged) <- c("Gene", "Condition", "LFC")
merged$Sample <- c(rep("Pooled", 50), rep("dVenus+ Only",50), rep("dVenus- Only", 50))
merged$Condition <- rep(c(rep("FC v HC", 25), rep("FC v NS", 25)),3)

ggplot(merged, aes(x= Gene, y = Condition, fill = LFC))+
  geom_tile()+
  scale_fill_gradient2(low = "blue", high = "red3", mid = "white", midpoint = 0)+
  guides(fill = guide_colourbar(title = "LFC"))+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5))+
  labs(x = element_blank(), y = element_blank())+
  coord_flip()+
  facet_grid(~Sample)
