################################################################################################################################################################
##################### Use FeatureCount files to do DE analysis of clustered sRNA levels in inputs, oxidised libraries and columns ################################
################################################################################################################################################################
BiocManager::install("DESeq2")
library('DESeq2')
library('ggplot2')
if(!is.null(theme)) theme_set(theme)
library('knitr')
if(is.null(colors)) {
  library('RColorBrewer')
}
library('pheatmap')
library('DT')
library('sessioninfo')

# Load count data and metadata table
countda <- read.table("allcounts.methods.txt", header = TRUE, check.names = FALSE)

# Set row names
rownames(countda) <- countda$Geneid
countda <- countda[, -1]
head(countda)
tail(countda)

# Load sample info (hand-made tab-delimited file)
colData <- read.table("sampleinfo.tsv", header = TRUE, check.names = FALSE)

nrow(colData) 
ncol(countda)

# Create data object from count matrix and metadata table
ddsFullCountTable <- DESeqDataSetFromMatrix(
  countData = countda,
  colData = colData,
  design = ~ condition)
ddsFullCountTable

# DEseq analysis
dds <- DESeq(ddsFullCountTable)
res <- results(dds, alpha=0.05)
res
summary(res)



###############################
###### Columns vs input ######
###############################


rescol <- results(dds, contrast = c("condition", "col", "input"))
resdf <- data.frame(rescol)
sig_results <- resdf[!is.na(resdf$padj) & resdf$padj < 0.05,]
head(sig_results, n=20)
sig_resultsUP <- sig_results[sig_results$log2FoldChange > 0,]
head(sig_resultsUP, n=20)
nrow(sig_resultsUP)
sig_resultsDOWN <- sig_results [sig_results$log2FoldChange < 0,]
head(sig_resultsDOWN, n=20) 
nrow(sig_resultsDOWN) 

# MA plot for DE col VS input
clab = 1.5
cmain = 2
caxis = 1.5
pdf("DEcolumns.pdf")
options(scipen=10000)
plotMA(rescol, 
       ylim = c(-12, 6),
       xlab="Mean of normalized counts",
       ylab= "log2(FC)",
       cex.lab=clab,
       colNonSig = "gray60",
       colSig = "red3",
       colLine = "red",
       cex.main =cmain,
       cex.axis=caxis,
       alpha = 0.05)
dev.off()

install.packages("ggpubr")
library('ggpubr')
ggmaplot(rescol, fdr = 0.05, fc = 0, size = 1, alpha = 1, palette = c("green3", "red3", "grey40"),legend="top", top = 0, font.label = c("bold", 5),label.rectangle = TRUE, font.legend = c("bold",12), ggtheme = theme_classic(), font.main = "bold")




###############################
###### Oxidation vs input ######
###############################

resox <- results(dds, contrast = c("condition", "ox", "input2"))
resdf <- data.frame(resox)

sig_results <- resdf[!is.na(resdf$padj) & resdf$padj < 0.05,]
head(sig_results, n=20)
sig_resultsUP <- sig_results[sig_results$log2FoldChange > 0,]
head(sig_resultsUP, n=20)
nrow(sig_resultsUP)
sig_resultsDOWN <- sig_results [sig_results$log2FoldChange < 0,]
head(sig_resultsDOWN, n=20)
nrow(sig_resultsDOWN) 

# MA plot for DE col VS input
clab = 1.5
cmain = 2
caxis = 1.5
pdf("DEox.pdf")
options(scipen=10000)
plotMA( resox, ylim = c(-12, 6),                                                      
        xlab="Mean of normalized counts",
        ylab= "log2(FC)",
        cex.lab=clab,
        cex.main =cmain,
        cex.axis=caxis,
        alpha = 0.05)
dev.off()

ggmaplot(resox, fdr = 0.05, fc = 0, size = 1, alpha = 1, palette = c("green3", "red3", "grey40"),legend="top", top = 0, font.label = c("bold", 5),label.rectangle = TRUE, font.legend = c("bold",12), ggtheme = theme_classic(), font.main = "bold")







####################################
###### Update annotation file ######
####################################

# This is the original sRNA annotation file from ShortStack_1603321407. 3495 clusters
gff <- read.table("ShortStack_D.gff3", header = FALSE, check.names = FALSE)
gff <- read.table("ShortStack_D.txt", header = FALSE, check.names = FALSE)

# This is the list of sRNA clusters that we want to remove from the original annotation
rem <- read.table("removefromgff.txt", header = FALSE, check.names = FALSE)

# Change .gff3 extension to .txt
# Command-line : grep -v -w -F -f removefromgff.txt ShortStack_D.txt > sRNAannotation.txt
# Prints only the lines in Shortstack_D that ARE NOT in the removefromgff.txt list.
# Change sRNAannotation.txt to sRNAannotation.gff3, so it can be used as the new sRNA annotation file!

annot <- read.table("sRNAannotation.gff3", header = FALSE, check.names = FALSE)

annot$V10 <- NA
annot$V10 = paste(annot$V9,sep="=")
annot$V10 <- regmatches(annot$V10,gregexpr("(?<==).*",annot$V10, perl=TRUE))
annot$V10 <-  sub("\\;.*","\\;", annot$V10)
annot$V10 <- str_remove(annot$V10, ";")

names(rem)[1] <- "V10"

newannot <- anti_join(annot, rem, by=c("V10"))
newannot$V10 <- NULL

out_dir <- "/Users/alexandradallaire/"
write.table(newannot, paste(out_dir,"sRNAannotation_VF.gff",sep="/"), col.names=F, quote=F, sep="\t", row.names=F)

newannotbed <- subset(newannot, select=c(V1, V4, V5, V9, V6, V7))
write.table(newannotbed, paste(out_dir,"sRNAannotation_VF.bed",sep="/"), col.names=F, quote=F, sep="\t", row.names=F)












