library(ggplot2)
library(ggrepel)
library(dplyr)

data <- read.table("reduced_proteinGroups.txt", header=TRUE)
head(data)

RES <- data.frame(data$my_label,
                   data$mean2_naimp_meas_0h,
                   data$mean2_naimp_meas_24ht1,
                   data$mean2_naimp_meas_24ht2,
                   data$mean2_naimp_meas_48ht1,
                   data$mean2_naimp_meas_48ht2,
                   data$difference_24ht1_0h, 
                   data$difference_24ht2_0h, 
                   data$difference_48ht1_0h, 
                   data$difference_48ht2_0h, 
                   data$log10.pvalue_24ht1_0h, 
                   data$log10.pvalue_24ht2_0h, 
                   data$log10.pvalue_48ht1_0h, 
                   data$log10.pvalue_48ht2_0h)


########################################
############### HEATMAP ###############
########################################

install.packages("pheatmap")
library("pheatmap")
library(plyr)
library(RColorBrewer)
library(ggfortify)

# Load data
RES <- read.table("reduced_proteinGroups.txt", 
                  header = TRUE)

# Make a dataframe with significant DE genes
sigde1 <- subset(RES, log10.pvalue_24ht1_0h >1.1 & abs(difference_24ht1_0h) >1.1)
sigde2 <- subset(RES, log10.pvalue_24ht2_0h >1.1 & abs(difference_24ht2_0h) >1.1)
sigde3 <- subset(RES, log10.pvalue_48ht1_0h >1.1 & abs(difference_48ht1_0h) >1.1)
sigde4 <- subset(RES, log10.pvalue_48ht2_0h >1.1 & abs(difference_48ht2_0h) >1.1)
hmdf1 <- join(sigde1, sigde2, type="full")
hmdf2 <- join(sigde3, sigde4, type ="full")

# Final dataframe
hmdf5 <- join(hmdf1, hmdf2, type ="full")
hmdf <- data.frame(hmdf5$my_label, hmdf5$mean2_naimp_meas_0h, hmdf5$mean2_naimp_meas_24ht1, hmdf5$mean2_naimp_meas_24ht2, hmdf5$mean2_naimp_meas_48ht1, hmdf5$mean2_naimp_meas_48ht2)
sapply(hmdf, class)

# Set row names
rownames(hmdf) <- hmdf5$my_label
hmdf <- hmdf[, -1]

names(hmdf)[1] <- "0h"
names(hmdf)[2] <- "24h mock"
names(hmdf)[3] <- "24h exudates"
names(hmdf)[4] <- "48h mock"
names(hmdf)[5] <- "48h exudates"

### Heatmap. Figure S4F
breaksList = seq(-2, 2.0, by = 0.1)
pheatmap(hmdf, 
         color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList)), 
         cluster_rows = T, 
         clustering_method = "complete",
         cluster_cols = F, 
         show_rownames=T, 
         border_color=NA, 
         fontsize = 5, 
         scale="none", 
         fontsize_row = 2, 
         height=50, 
         cellwidth = 10, 
         annotation_width = 50) # Legend size

#Extract protein IDs
map = pheatmap(hmdf, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList)), 
               cluster_rows = T, 
               clustering_method = "complete",
               cluster_cols = F, 
               show_rownames=T, 
               border_color=NA, 
               fontsize = 5, 
               scale="none", 
               fontsize_row = 2, 
               height=50, 
               cellwidth = 10, 
               annotation_width = 50)

clusters <- hmdf[map$tree_row$order,]
clusters$newnumber = seq.int(nrow(clusters))
clusters$Geneid = row.names(clusters)
#Merge clusters with nongenicdeTEsinfo 
names(hmdf5)[1] <- "Geneid"
clusters_merged <- merge(clusters, hmdf5, by="Geneid")
#Reorder by 'newnumber'to get the ordered list seen in the heatmap. This is TABLE S5.
clusters_merged_ordered <- clusters_merged[order(clusters_merged$newnumber),] 
write.table(clusters_merged_ordered, paste(out_dir,"clusters_merged_ordered_proteomicsheatmap.csv",sep="/"), col.names=T, quote=F, sep="\t", row.names=T)


####################################################
############### PCA plot (FIGURE S4G) ###############
####################################################

pca <- subset(data, select=c("my_label", "imputed.log2.LFQ.intensity.0h_1", "imputed.log2.LFQ.intensity.0h_2", "imputed.log2.LFQ.intensity.0h_3", "imputed.log2.LFQ.intensity.0h_4", "imputed.log2.LFQ.intensity.24ht1_1", "imputed.log2.LFQ.intensity.24ht1_2", "imputed.log2.LFQ.intensity.24ht1_3",	"imputed.log2.LFQ.intensity.24ht1_4",	"imputed.log2.LFQ.intensity.24ht2_1",	"imputed.log2.LFQ.intensity.24ht2_2",	"imputed.log2.LFQ.intensity.24ht2_3",	"imputed.log2.LFQ.intensity.24ht2_4",	"imputed.log2.LFQ.intensity.48ht1_1",	"imputed.log2.LFQ.intensity.48ht1_2",	"imputed.log2.LFQ.intensity.48ht1_3",	"imputed.log2.LFQ.intensity.48ht1_4",	"imputed.log2.LFQ.intensity.48ht2_1",	"imputed.log2.LFQ.intensity.48ht2_2",	"imputed.log2.LFQ.intensity.48ht2_3",	"imputed.log2.LFQ.intensity.48ht2_4"))
rownames(pca) <- pca$my_label
pca <- pca[, -1]
#Transpose
pcaData = as.data.frame(t(pca))
#Add type of experiment
pcaData["type"] = c(rep("Control",4), rep("24mock",4), rep("24exu",4), rep("48mock",4), rep("48exu",4))

autoplot(prcomp(pcaData[,1:20]), 
         data = pcaData, 
         colour = 'type',
         label = TRUE, 
         label.size = 0) +
  theme_classic()


#######################################################################
############### List of all proteins detected (SUP TABLE S4) ##########
#######################################################################

data2 <- read.table("proteinGroups_filtered.txt", header=TRUE)
prot <-  subset(data, select=c("my_label", "Sequence.coverage....", "Mol..weight..kDa.",	"Sequence.length", "mean2_naimp_meas_24ht1", "mean2_naimp_meas_24ht2", "mean2_naimp_meas_48ht1", "mean2_naimp_meas_48ht2", "mean2_naimp_meas_0h"))
prot$level = (prot$mean2_naimp_meas_24ht2 + prot$mean2_naimp_meas_24ht1 + prot$mean2_naimp_meas_48ht2 + prot$mean2_naimp_meas_48ht1 + data$mean2_naimp_meas_0h)/5
write.table(prot, paste(out_dir,"expressedproteins.csv",sep="/"), col.names=T, quote=F, sep="\t", row.names=T)

                               
                               