
setwd("C:/Users/CK/Documents/Lab/interactome_capture_on_R/mRIC-master")

## install.packages("ggplot2")
## install.packages("viridis")
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("biomaRt")
BiocManager::install("limma")
library(viridis) ## color scales
library(ggplot2)  ## graphs
library(reshape2) ##  rearrange ("melt") correlation matrix for plotting
library(matrixStats)  ### for colMedians
library(plyr)
library(biomaRt)  ## retrieve annotations
library(propagate)   # for error propagation
library(limma)    ### for moderated t-tests
library(MASS)
library(statmod)


### All sourced functions are included at the end of the script
source("R/addNewData.R")
source("R/addColumnToMaster.R") ## function depends on addNewData.R
source("R/addRawData.R") ## function depends on addNewData.R and addColumnToMaster.R
source("R/addRawDataToNoCL.R") ## function depends on addNewData.R and addColumnToMaster.R
source("R/addRawDataToLys.R") ## function depends on addNewData.R and addColumnToMaster.R
source("R/findModeAndAddValue.R")
source("R/addMeanAndSD.R")
source("R/addPropagatedError.R")


## First, we download a list of all S. pombe genes + descriptions from biomart; we then use lookup tables to fill-in the mRIC data from various experiments
    mart = useMart("fungi_mart", host="fungi.ensembl.org", dataset="spombe_eg_gene")
    gene.data <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name', 'description'),
                   mart = mart)
    rm(mart)
    
    ## read in mRIC data and start adding to gene.data
                      ### mRIC13_vs_WCE.csv contains non-normalised data for mRIC13 RIC (+ WCEs)
                        mRIC13.data <- read.csv("data/mRIC13_vs_WCE.csv", header = TRUE, stringsAsFactors = FALSE)
                        
                  
                        gene.data.extended <- addColumnToMaster("X144_A_3J", mRIC13.data, gene.data)
                  
                        idx <- colnames(mRIC13.data[3:13])
                        for(i in idx) {
                          gene.data.extended <- addColumnToMaster(i, mRIC13.data, gene.data.extended)
                        }
                  
                        rm(mRIC13.data)
                  
                      ### mRIC15_EL_proteins_nonNorm.csv contains non-normalised data for mRIC15 RIC
                        mRIC15.EL.data <- read.csv("data/mRIC15_EL_proteins_nonNorm.csv", header = TRUE, stringsAsFactors = FALSE)
                        colnames(mRIC15.EL.data)
                  
                        idx <- colnames(mRIC15.EL.data[3:11])
                        for(i in idx){
                          gene.data.extended <- addColumnToMaster(i, mRIC15.EL.data, gene.data.extended)
                        }
                  
                        rm(mRIC15.EL.data)
                  
                        ### mRIC13_Norm_Proteins.csv contains normalised data for mRIC13 WCEs (+ RIC)
                          mRIC13.lys.data <- read.csv("data/mRIC13_Norm_proteins.csv", header = TRUE, stringsAsFactors = FALSE)
                          colnames(mRIC13.lys.data)
                  
                          idx <- colnames(mRIC13.lys.data[14:19])
                          for(i in idx){
                            gene.data.extended <- addColumnToMaster(i, mRIC13.lys.data, gene.data.extended)
                          }
                  
                          rm(mRIC13.lys.data)
                  
                        ### mRIC15_input-Norm.csv contains normalised data for mRIC15 WCEs
                          mRIC15.lys.data <- read.csv("data/mRIC15_input-Norm.csv", header = TRUE, stringsAsFactors = FALSE)
                          colnames(mRIC15.lys.data)
                  
                          idx <- colnames(mRIC15.lys.data[c(2:4, 6:11)])
                          for(i in idx){
                            gene.data.extended <- addColumnToMaster(i, mRIC15.lys.data, gene.data.extended)
                          }
                  
                          rm(mRIC15.lys.data)

    ## we finally have a dataset that contains all raw data, and first adjust the labels:
          gene.data.raw <- gene.data.extended
          colnames(gene.data.raw) <- gsub("X", "", colnames(gene.data.raw))
          colnames(gene.data.raw) <- gsub("144", "WT", colnames(gene.data.raw))
          colnames(gene.data.raw) <- gsub("MTL", "mtl1", colnames(gene.data.raw))
          colnames(gene.data.raw) <- gsub("729", "mtl1", colnames(gene.data.raw))
          colnames(gene.data.raw) <- gsub("67", "dis3", colnames(gene.data.raw))
          colnames(gene.data.raw) <- gsub("411", "rrp6", colnames(gene.data.raw))
          colnames(gene.data.raw)


                        ## Before normalisation, we have to purify the data and replace background values that were imputed at an earlier step ("18" values - these were not detected in MS) with "NA" 
                        ## where there wasn't any signal in any of the 3J samples (in the individual experiments)
                  
                            col.coord <- c(4,6,8,10,12,14, 16,17,18,19,20,21,22,23,24)
                            ## mRIC13
                            gene.data.raw.red <- gene.data.raw
                            col.coord <- c(4,6,8,10,12,14)
                            which(rowSums(gene.data.raw.red[ , col.coord], na.rm = TRUE) == 6*18, arr.ind = TRUE)
                            idx <- length(rownames(gene.data.raw.red))
                            for (i in 1:idx) {
                              if(rowSums(gene.data.raw.red[ i, col.coord], na.rm = TRUE) == 6*18) {
                                gene.data.raw.red[i, col.coord] <- NA
                              } 
                            }
                            which(rowSums(gene.data.raw.red[ , col.coord], na.rm = TRUE) == 6*18, arr.ind = TRUE)
                  
                          ## mRIC15
                          col.coord <- c(16,17,18,19,20,21,22,23,24)
                          which(rowSums(gene.data.raw.red[ , col.coord], na.rm = TRUE) == 9*18, arr.ind = TRUE)
                          idx <- length(rownames(gene.data.raw.red))
                          for (i in 1:idx) {
                            if(rowSums(gene.data.raw.red[ i, col.coord], na.rm = TRUE) == 9*18) {
                              gene.data.raw.red[i, col.coord] <- NA
                            } 
                          }
                          which(rowSums(gene.data.raw.red[ , col.coord], na.rm = TRUE) == 9*18, arr.ind = TRUE)
                  
           
      ### normalisation to median = zero
            ## Visualize degree of variation pre-normalization:
            col.coord <- c(4,6,8,10,12,14,16,17,18,19,20,21,22,23,24)
            dat <- melt(gene.data.raw.red[ , col.coord], na.rm = TRUE)
            ggplot(data = dat, aes(x=variable, y=value)) + 
              xlab(label="") + ylab(label="MS intensities") + 
              geom_boxplot() +theme(axis.text.x = element_text(angle=60, vjust=0.5))
    
                  ## normalise all medians to zero
                gene.data.test <- gene.data.raw.red
                col.coord.mat <- matrix(c(4,6,8,10,12,14, 16,17,18,19,20,21,22,23,24), nrow = 5, byrow = TRUE)
                for (j in 1:5) {
                  col.coord.single <- col.coord.mat[j, ]
                  mat <- as.matrix(gene.data.raw.red[ , col.coord.single])
                  for (i in 1:3) {
                    dat <- mat[ ,i]
                    dat <- dat - median(dat, na.rm = TRUE)
                    mat[ ,i] <- dat
                  }
                  gene.data.test[ , col.coord.single] <- mat
                }
                gene.data.raw.normToZero <- gene.data.test
    
                ## visualise data post-normalisation
                dat <- melt(gene.data.raw.normToZero[ , col.coord], na.rm = TRUE)
                ggplot(data = dat, aes(x=variable, y=value)) + 
                  xlab(label="") + ylab(label="MS intensities") + 
                  geom_boxplot() +theme(axis.text.x = element_text(angle=60, vjust=0.5))
        rm(dat, mat, scale.factor, col.coord.mat, col.coord.single, gene.data.test)
    


    ### Produce new datasets with derivative data (diffs, p-values)
          
          gene.data.alldiff.normToZero <- addRawData(gene.data.raw.normToZero)
          gene.data.alldiff.toNoCL <- addRawDataToNoCL(gene.data.raw.red)
          
      
      
              #### These dataset have to be purified
                              ### For visualisation and analysis, we  drop all lines that were not detected in any of the 
                              ### crosslinked proteomes (_3J_)
                              
                                ## First, we drop all zero difference values (which are a consequence of joint analysis of different
                                ## samples by SM, where baseline values were imputed if no signal was detected)
                                #### This has become obsolete with  data normalisation, except for dis3 and rrp6
                                 ## For dis3
                                  col.coord <- c(16,17,18,22,23,24)
                                  colnames(gene.data.raw.red[col.coord])
                                  row.coord <- which(rowSums(gene.data.raw.red[ , col.coord]) == 6*18, arr.ind = TRUE)
                                  colnames(gene.data.alldiff.red)
                                  gene.data.alldiff.noNorm[row.coord , 16] <- NA
                                  gene.data.alldiff.norm[row.coord , 16] <- NA
                                  gene.data.alldiff.normToZero[row.coord , 16] <- NA
                              
                                 ##for rrp6
                                  col.coord <- c(16:21)
                                  colnames(gene.data.raw.red[col.coord])
                                  row.coord <- which(rowSums(gene.data.raw.red[ , col.coord]) == 6*18, arr.ind = TRUE)
                                  colnames(gene.data.alldiff.red)
                                  gene.data.alldiff.noNorm[row.coord , 10] <- NA
                                  gene.data.alldiff.norm[row.coord , 10] <- NA
                                  gene.data.alldiff.normToZero[row.coord , 10] <- NA
                                  rm(col.coord, row.coord)
                                 
                                  ### zero values also need to be dropped for ToNoCL, which was generated from unnormalised raw data
                                    ### wt1 to noCL
                                    col.coord <- c(4:9)
                                    colnames(gene.data.raw.red[col.coord])
                                    row.coord <- which(rowSums(gene.data.raw.red[ , col.coord]) == 6*18, arr.ind = TRUE)
                                    colnames(gene.data.alldiff.toNoCL)
                                    gene.data.alldiff.toNoCL[row.coord , 4] <- NA
                                  
                                    ### mtl1 to noCL
                                    col.coord <- c(10:15)
                                    colnames(gene.data.raw.red[col.coord])
                                    row.coord <- which(rowSums(gene.data.raw.red[ , col.coord]) == 6*18, arr.ind = TRUE)
                                    colnames(gene.data.alldiff.toNoCL)
                                    gene.data.alldiff.toNoCL[row.coord , 6] <- NA
                                    
                                    ### WTboth to noCL
                                    row.coord <- which(!(gene.data.alldiff.toNoCL$ensembl_gene_id %in% WTboth.CL), arr.ind = TRUE)
                                    gene.data.alldiff.toNoCL[row.coord , 8] <- NA
                                    
                                ## then I drop rows where all crosslinked samples are NA
                                colnames(gene.data.alldiff)
                                col.coord.3J <- c(4, 10, 16)
                                colnames(gene.data.alldiff.red[col.coord.3J])
                              
                                if(sum(is.na(gene.data.alldiff.noNorm[ , 1:3])) == 0) {
                                  alldiff.noNorm.red <- gene.data.alldiff.noNorm[rowSums(is.na(gene.data.alldiff.noNorm[col.coord.3J])) != 3, ]
                                } else {
                                  print("Warning: non-data rows contain NA values")
                                }
                              
                                if(sum(is.na(gene.data.alldiff.norm[ , 1:3])) == 0) {
                                  alldiff.norm.red <- gene.data.alldiff.norm[rowSums(is.na(gene.data.alldiff.norm[col.coord.3J])) != 3, ]
                                } else {
                                  print("Warning: non-data rows contain NA values")
                                }
                              
                                if(sum(is.na(gene.data.alldiff.normToZero[ , 1:3])) == 0) {
                                  alldiff.normToZero.red <- gene.data.alldiff.normToZero[rowSums(is.na(gene.data.alldiff.normToZero[col.coord.3J])) != 3, ]
                                } else {
                                  print("Warning: non-data rows contain NA values")
                                }
                                ## write.csv(gene.data.alldiff.red, file = "output/data/gene_data_allDiff_redTo3J.csv", row.names = FALSE)
                              rm(col.coord.3J)
                              
                              ### same for to noCL data  
                              colnames(gene.data.alldiff.toNoCL)
                                col.coord.3J <- c(4, 6)
                                colnames(gene.data.alldiff.toNoCL[col.coord.3J])
                              
                                if(sum(is.na(gene.data.alldiff.toNoCL[ , 1:3])) == 0) {
                                  alldiff.toNoCL.red <- gene.data.alldiff.toNoCL[rowSums(is.na(gene.data.alldiff.toNoCL[col.coord.3J])) != 2, ]
                                } else {
                                  print("Warning: non-data rows contain NA values")
                                }
                              

          ### produce spearman correlation plot (Suppl. Fig 1B) 
                col.coord <- c(4,6,8, 16,17,18,25:27,31:33) ## WTs only
                      colnames(gene.data.raw[col.coord])
                      cor.mat <- cor(gene.data.raw.red[ ,col.coord], use = "complete" , method = "spearman")
                      colnames(cor.mat) <- gsub("Lysate_", "", colnames(cor.mat))
                      colnames(cor.mat) <- gsub("medianNorm", "WCE", colnames(cor.mat))
                      rownames(cor.mat) <- gsub("Lysate_", "", rownames(cor.mat))
                      rownames(cor.mat) <- gsub("medianNorm", "WCE", rownames(cor.mat))
                      melted.cor.mat <- melt(cor.mat)
                      head(melted.cor.mat)
                  ggplot(data = melted.cor.mat, aes(x=Var1, y=Var2, fill=value)) + scale_fill_viridis(option="inferno") +
                      geom_tile() + xlab(label="") + ylab(label="") + theme(aspect.ratio=0.9) +
                      theme(axis.text.x = element_text(angle=60, vjust=0.5))
                rm(cor.mat, melted.cor.mat)
          

### basic volcano plots for comparison to noCL
    volcano.dat <- gene.data.alldiff.toNoCL      # comparison 3J to noCL
    row.names(volcano.dat) <- volcano.dat$ensembl_gene_id
    volcano.dat$categories <- NA ##"NA"
                      

             ### harvest Go annotations for gene data sets, e.g. GO:0003723 RNA-binding ("go") or with all its descendents ("go_parent_term")
                  mart = useMart("fungi_mart", host="fungi.ensembl.org", dataset="spombe_eg_gene")
                  gene.data2 <- getBM(attributes = c('ensembl_gene_id', 'uniprotswissprot', 'description'),
                        filters = "go_parent_term", values = "GO:0022626",
                        mart = mart)
                  gene.data.sorted <- with(gene.data2,  gene.data2[order(ensembl_gene_id) , ])
                  gene.list_to.convert <- as.vector(volcano.dat$ensembl_gene_id)
                  gene.overlap <- intersect(gene.list_to.convert, gene.data.sorted[ , 1])
                  gene.list.mask <- gene.list_to.convert %in% gene.overlap
                  volcano.dat <- cbind(volcano.dat, gene.list.mask)
                  volcano.dat[volcano.dat$gene.list.mask == TRUE, "categories"] <- "GO:0022626"
                  volcano.dat$gene.list.mask <- NULL
                rm(mart, gene.data2, gene.data.sorted, gene.list_to.convert, gene.overlap, gene.list.mask)
    
                                    ### plot volcanoes (with categories)
                                      xlabel <- expression("log"[2]*" (fold change MS intensities (3J/noCL))")
                                      ylabel <- expression("-log"[10]*" (p-value)")
                                      myPalette3 <- c("green", "black", "black", "#F0E442", "#FFCC00", "#33CC00", 
                                                      "#56B4E9", "#F0E442", "#FFCC00", "#FF9900")
                                      
                                      #volcano.wtboth.toNoCL
                                      ggplot(volcano.dat) + 
                                        geom_point(aes(x = diff_WTboth_3J_noCL, y = p_WTboth_3J_noCL_limma), color = "grey") + 
                                        geom_point(data=volcano.dat[volcano.dat$categories != "NA", ], aes(x = diff_WTboth_3J_noCL, y = p_WTboth_3J_noCL_limma, color = categories)) +
                                        scale_color_manual(values=myPalette3) + theme_bw() + theme(aspect.ratio=0.9) +
                                        scale_x_continuous(breaks = c(-10,0,10), limits = c(-15,15)) + xlab(xlabel) + ylab(ylabel) +
                                        scale_y_continuous(breaks = c(0,2,4,6,8,10), limits = c(0,11)) +
                                        geom_text(aes(x = diff_WTboth_3J_noCL, y = p_WTboth_3J_noCL_limma, label=ifelse(categories != "NA", as.character(volcano.dat$external_gene_name),"")), hjust=-0.1,vjust=0.2, size=3.5) +
                                        labs(title = expression(paste(italic("S. pombe "), "wild-type interactome")))
                                      
                  #### harvest domain ID (Pfam) annotations - from RBD list
                  ### read in RBD annotation file
                              volcano.dat <- gene.data.alldiff.toNoCL
                              row.names(volcano.dat) <- volcano.dat$ensembl_gene_id
                              volcano.dat$categories <- "NA"
                                    Pfam.data <- read.csv("data/pfam.domains.RBD.classification.csv", header = TRUE, stringsAsFactors = FALSE)
                                    row.coord <- which(Pfam.data[ ,"RBD.classification"] == "classical", arr.ind = TRUE)
                                    Pfam.classical <- Pfam.data[row.coord , "id"] 
                                    row.coord <- which(Pfam.data[ ,"RBD.classification"] == "nonclassical", arr.ind = TRUE)
                                    Pfam.nonclassical <- Pfam.data[row.coord , "id"] 
                                    mart = useMart("fungi_mart", host="fungi.ensembl.org", dataset="spombe_eg_gene")
                                      gene.data2 <- getBM(attributes = c('ensembl_gene_id', 'uniprotswissprot', 'description', 'pfam'),
                                                    filters = "pfam", values = Pfam.classical,
                                                    mart = mart)
                                      gene.data.sorted <- with(gene.data2,  gene.data2[order(ensembl_gene_id) , ])
                                      gene.list_to.convert <- as.vector(volcano.dat$ensembl_gene_id)
                                      gene.overlap <- intersect(gene.list_to.convert, gene.data.sorted[ , 1])
                                      gene.list.mask <- gene.list_to.convert %in% gene.overlap
                                    volcano.dat <- cbind(volcano.dat, gene.list.mask)
                                    volcano.dat[volcano.dat$gene.list.mask == TRUE, "pfam.classical"] <- "classical"
                                    volcano.dat$gene.list.mask <- NULL
                                      gene.data3 <- getBM(attributes = c('ensembl_gene_id', 'uniprotswissprot', 'description', 'pfam'),
                                                    filters = "pfam", values = Pfam.nonclassical,
                                                    mart = mart)
                                      gene.data.sorted <- with(gene.data3,  gene.data3[order(ensembl_gene_id) , ])
                                      gene.list_to.convert <- as.vector(volcano.dat$ensembl_gene_id)
                                      gene.overlap <- intersect(gene.list_to.convert, gene.data.sorted[ , 1])
                                      gene.list.mask <- gene.list_to.convert %in% gene.overlap
                                    volcano.dat <- cbind(volcano.dat, gene.list.mask)
                                    volcano.dat[volcano.dat$gene.list.mask == TRUE, "pfam.nonclassical"] <- "nonclassical"
                                  volcano.dat$gene.list.mask <- NULL
                              rm(mart, gene.data2, gene.data3, gene.data.sorted, gene.list_to.convert, gene.overlap, gene.list.mask)
                                  

    
                                                  ### plot with pfam classification
                                                  xlabel <- expression("log"[2]*" (fold change MS intensities (3J/noCL))")
                                                  ylabel <- expression("-log"[10]*" (p-value)")
                                              
                                                          #volcano.wtboth.toNoCL
                                                          ggplot(volcano.dat) + 
                                                            geom_point(aes(x = diff_WTboth_3J_noCL, y = p_WTboth_3J_noCL_limma, color = "none")) + 
                                                            geom_point(data=volcano.dat[volcano.dat$pfam.nonclassical == "nonclassical", ], aes(x = diff_WTboth_3J_noCL, y = p_WTboth_3J_noCL_limma, color = "nonclassical")) +
                                                            theme_bw() + scale_x_continuous(breaks = c(-10,0,10), limits = c(-15,15)) +  
                                                            scale_y_continuous(breaks = c(0,2,4,6,8,10), limits = c(0,11)) +
                                                            theme(aspect.ratio=0.9) + xlab(xlabel) + ylab(ylabel) + 
                                                            geom_point(data=volcano.dat[volcano.dat$pfam.classical == "classical", ], aes(x = diff_WTboth_3J_noCL, y = p_WTboth_3J_noCL_limma, color = "classical")) +
                                                            scale_color_manual(name = "RNA-binding domains",
                                                                               values = c("none" = "grey", "nonclassical" = "blue",  "classical" = "black"),
                                                                               labels = c("classical", "nonclassical", "none")) +
                                                            labs(title = expression(paste(italic("S. pombe "), "wild-type interactome")))
   

  
###  WCE-normalisation of WT interactomes
  gene.data.raw.toLys <- gene.data.raw.red
  
                                      
                ## visualize data spread before normalization
                      col.coord <- c(4,6,8,25,26,27,16,17,18,31,32,33)
                      dat <- melt(gene.data.raw.toLys[ , col.coord], na.rm = TRUE)
                      ggplot(data = dat, aes(x=variable, y=value)) + 
                        xlab(label="") + ylab(label="MS intensities") +
                        geom_boxplot() +theme(axis.text.x = element_text(angle=60, vjust=0.5))
                      rm(dat, col.coord)
  
            ## normalize all medians to zero
                            gene.data.test <- gene.data.raw.red
                                                ## this following detects leftover genes that were detected neither in 3J nor Lys (mtl1/rrp6/dis3 only)
                                                    none.detect.value <- rowSums(gene.data.raw.toLys[763 ,c(4,6,8,25,26,27)])
                                                    dummy <- gene.data.raw.toLys[ ,c(4,6,8,25,26,27)]
                                                    superfluous.row.coord <- which(rowSums(dummy) == none.detect.value)
                                                    gene.data.raw.toLys[superfluous.row.coord, c(4,6,8,25,26,27)] <- NA
                                                    none.detect.value <- rowSums(gene.data.raw.toLys[915 ,c(16,17,18,31,32,33)])
                                                    dummy <- gene.data.raw.toLys[ ,c(16,17,18,31,32,33)]
                                                    superfluous.row.coord2 <- which(rowSums(dummy) == none.detect.value)
                                                gene.data.test[superfluous.row.coord, c(4,6,8,25,26,27)] <- NA
                                                gene.data.test[superfluous.row.coord2, c(16,17,18,31,32,33)] <- NA
                            col.coord.mat <- matrix(c(4,6,8,25,26,27,16,17,18,31,32,33), nrow = 4, byrow = TRUE)
                            for (j in 1:4) {
                              col.coord.single <- col.coord.mat[j, ]
                              mat <- as.matrix(gene.data.raw.red[ , col.coord.single])
                              for (i in 1:3) {
                                dat <- mat[ ,i]
                                dat <- dat - median(dat, na.rm = TRUE)
                                mat[ ,i] <- dat
                              }
                              gene.data.test[ , col.coord.single] <- mat
                            }
                            gene.data.raw.toLys <- gene.data.test
            
                ## Data imputation for WCE where 3J signal exists and for 3J where WCE signal exists
                  ## First, identify replacement value for not-detected ("18" on raw data at the level of triplicates). This is shifted post-normalisation: 
                  ## Find mode for bottom end values (< -5 for WCE), 
                  ## then use value to replace NAs in WCE sample in all rows where either 3J or WCE returned at least one value!
                                 
          
                        ## for mRIC13
                        col.coord <- c(4,6,8,25,26,27)
                        dummy <- gene.data.raw.toLys[ ,  col.coord]
                        row.coord <- which(rowSums(is.na(dummy)) < 6)
                        ## find lower mode of bimodal distribution (= SM's imputed value) and replace NAs where opportune
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 4, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 6, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 8, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 25, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 26, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 27, -5)
          
                        ### for mRIC15
                        col.coord <- c(16,17,18,31,32,33)
                        dummy <- gene.data.raw.toLys[ ,  col.coord]
                        row.coord <- which(rowSums(is.na(dummy)) < 6)
                        ## find lower mode of bimodal distribution (= SM's imputed value) and replace NAs where opportune
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 16, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 17, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 18, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 31, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 32, -5)
                        gene.data.raw.toLys <- findModeAndAddValue(gene.data.raw.toLys, 33, -5)
          
            
            ### Generate lists of proteins with certain behaviour (detected in RIC, detected in WCE?) for plotting
                        
                        ### coordinates of proteins that crosslinked at least once in WT1
                        stringent.row.coord.WT1 <- which(rowSums(gene.data.raw.toLys[ , c(4,6,8)]) != findMode(gene.data.raw.toLys, 4, -5) + findMode(gene.data.raw.toLys, 6, -5) + findMode(gene.data.raw.toLys, 8, -5), arr.ind = TRUE)
                        
                        ### coordinates of proteins that crosslinked at least once in WT2
                        stringent.row.coord.WT2 <- which(rowSums(gene.data.raw.toLys[ , c(16,17,18)]) != findMode(gene.data.raw.toLys, 16, -5) + findMode(gene.data.raw.toLys, 17, -5) + findMode(gene.data.raw.toLys, 18, -5), arr.ind = TRUE)
                        
                        ### list of subset of proteins detected in either WT1, 2, or both
                        stringent.row.coord.WTboth <- sort(unique(c(stringent.row.coord.WT1, stringent.row.coord.WT2)))
                        WTboth.CL <- gene.data.raw.toLys[stringent.row.coord.WTboth , 1]
                        
                        ### detected in WCE (WT1, WT1, or 1 OR 2)
                        row.coord <- which(rowSums(gene.data.raw.toLys[ , c(25,26,27)]) != findMode(gene.data.raw.toLys, 25, -5) + findMode(gene.data.raw.toLys, 26, -5) + findMode(gene.data.raw.toLys, 27, -5), arr.ind = TRUE)
                        WT1.Lys <- gene.data.raw.toLys[row.coord, 1]
                        row.coord <- which(rowSums(gene.data.raw.toLys[ , c(31,32,33)]) != findMode(gene.data.raw.toLys, 31, -5) + findMode(gene.data.raw.toLys, 32, -5) + findMode(gene.data.raw.toLys, 33, -5), arr.ind = TRUE)
                        WT2.Lys <- gene.data.raw.toLys[row.coord, 1]
                        WTboth.Lys <- gene.data[gene.data$ensembl_gene_id %in% unique(c(WT1.Lys, WT2.Lys)), 1]
      
                        ### detected in 3J but not lysate
                        WTbothCL.noLys<- setdiff(WTboth.CL, WTboth.Lys)
                        ### detected both in 3J and in lysate
                        WTboth.CLandLys <- intersect(WTboth.CL, WTboth.Lys)
          
                                ### visualize CL+Lys vs. CL.noLys on Marguerat et al, Cell protein data (Suppl. Fig 1C and D)
                                              marguerat <- read.csv("data/marguerat_protData.csv", header = TRUE, stringsAsFactors = FALSE)
                                              marguerat$type <- as.factor(marguerat$type)
                                              marguerat <- marguerat[marguerat$Marguerat != "#N/A", ]
                                              marguerat$Marguerat <- as.numeric(marguerat$Marguerat)
                                              marguerat$MargueratLog <- log10(marguerat$Marguerat)
                                              dummy1 <- marguerat[marguerat$external_gene_name %in% WTbothCL.noLys, ]
                                              dummy1$type <- "CL.noLys"
                                              dummy3 <- marguerat[marguerat$external_gene_name %in% WTboth.CLandLys , ]
                                              dummy3$type <- "CL+Lys"
                                              marguerat.test <- rbind(dummy3, dummy1)
                                              rm(dummy1, dummy2, dummy3)
                                              marguerat.test$type <- factor(marguerat.test$type,
                                                     levels = c("CL+Lys", "CL.noLys"), ordered = TRUE)
                            
                                                            ### plot dotplot
                                                            ggplot(marguerat.test, aes(x = type, y = MargueratLog)) +
                                                              geom_boxplot()+
                                                              theme_bw() +
                                                              scale_x_discrete(expand = waiver()) +
                                                              geom_dotplot(binaxis = "y", stackdir = "center", 
                                                                           binwidth = 0.1, 
                                                                           dotsize= 0.4,
                                                                           stackratio = 1.5, color = "blue") #+
                                                            rm(marguerat, marguerat.test)
          

                ## generate derivate data for WCE-normalised interactome: fold change, p-values (standard t-test)
                          gene.data.WT1diff.toLys <- addRawDataToLys(gene.data.raw.toLys, c(4,6,8), c(25,26,27), "WT1")
                          gene.data.WT2diff.toLys <- addRawDataToLys(gene.data.raw.toLys, c(16,17,18), c(31,32,33), "WT2")
                          gene.data.WTbothDiff.toLys  <- addRawDataToLys(gene.data.raw.toLys, c(4,6,8,16,17,18), c(25,26,27,31,32,33), "WTboth")
                          gene.data.WTdiff.toLys <- cbind(gene.data.WT1diff.toLys, gene.data.WT2diff.toLys[, c(4,5)], gene.data.WTbothDiff.toLys[, c(4,5)])
                          gene.data.WTdiff.toLys[gene.data.WTdiff.toLys == "NaN"] <- NA
        
        
                        ### add p-values from moderated t-test(eBayes function, limma package)
                          gene.data.WTdiff.toLys.limma <- gene.data.WTdiff.toLys
                
                                      ##WT1 to Lys
                                        eset <- as.matrix(gene.data.raw.toLys[ ,c(4,6,8,25,26,27)])
                                          design <- cbind(WT=c(1,1,1,0,0,0), MU=c(0,0,0,1,1,1))
                                          fit <- lmFit(eset, design)
                                          cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                                          fit2 <- contrasts.fit(fit, cont.matrix)
                                          fit.test1 <- eBayes(fit2)
                                          pval1 <- -log10(as.numeric(fit.test1$p.value))
                                        gene.data.WTdiff.toLys.limma$p_WT1_3J_Lys <- pval1
                                        
                                      ##WT2 to Lys
                                        eset <- as.matrix(gene.data.raw.toLys[ ,c(16,17,18,31,32,33)])
                                          design <- cbind(WT=c(1,1,1,0,0,0), MU=c(0,0,0,1,1,1))
                                          fit <- lmFit(eset, design)
                                          cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                                          fit2 <- contrasts.fit(fit, cont.matrix)
                                          fit.test1 <- eBayes(fit2)
                                          pval1 <- -log10(as.numeric(fit.test1$p.value))
                                        gene.data.WTdiff.toLys.limma$p_WT2_3J_Lys <- pval1
                                        
                                      ##WT1 and WT2 combined to Lys
                                        eset <- as.matrix(gene.data.raw.toLys[ ,c(4,6,8,16,17,18,25,26,27,31,32,33)])
                                        design <- cbind(WT=c(1,1,1, 1,1,1,0,0,0, 0,0,0), MU=c(0,0,0,0,0,0,1,1,1, 1,1,1))
                                        fit <- lmFit(eset, design)
                                        cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                                        fit2 <- contrasts.fit(fit, cont.matrix)
                                        fit.test1 <- eBayes(fit2)
                                        pval2 <- -log10(as.numeric(fit.test1$p.value))
                                        gene.data.WTdiff.toLys.limma$p_WTboth_3J_Lys <- pval2
                                    
                                    rm(pval1, pval2, fit.test1, fit, fit2)
                
              
  
## plotting WCE-normalised interactomes (volcanoes)

  volcano.dat <- gene.data.WTdiff.toLys.limma   ### moderated t-test
  row.names(volcano.dat) <- volcano.dat$ensembl_gene_id
  volcano.dat$categories <- NA
  volcano.dat$categories <- "NA" ### for subsetting on cytosolic ribosome - syntax doesn't work with NA


                    #### harvest domain ID (Pfam) annotations - from RBD list
                    ### read in RBD annotation file  
                        Pfam.data <- read.csv("data/pfam.domains.RBD.classification.csv", header = TRUE, stringsAsFactors = FALSE)
                        row.coord <- which(Pfam.data[ ,"RBD.classification"] == "classical", arr.ind = TRUE)
                        Pfam.classical <- Pfam.data[row.coord , "id"] 
                        row.coord <- which(Pfam.data[ ,"RBD.classification"] == "nonclassical", arr.ind = TRUE)
                        Pfam.nonclassical <- Pfam.data[row.coord , "id"] 
                        mart = useMart("fungi_mart", host="fungi.ensembl.org", dataset="spombe_eg_gene")
                            gene.data2 <- getBM(attributes = c('ensembl_gene_id', 'uniprotswissprot', 'description', 'pfam'),
                                      filters = "pfam", values = Pfam.classical,
                                      mart = mart)
                            gene.data.sorted <- with(gene.data2,  gene.data2[order(ensembl_gene_id) , ])
                            gene.list_to.convert <- as.vector(volcano.dat$ensembl_gene_id)
                            gene.overlap <- intersect(gene.list_to.convert, gene.data.sorted[ , 1])
                            gene.list.mask <- gene.list_to.convert %in% gene.overlap
                        volcano.dat <- cbind(volcano.dat, gene.list.mask)
                        volcano.dat[volcano.dat$gene.list.mask == TRUE, "pfam.classical"] <- "classical"
                        volcano.dat$gene.list.mask <- NULL
                            gene.data3 <- getBM(attributes = c('ensembl_gene_id', 'uniprotswissprot', 'description', 'pfam'),
                                      filters = "pfam", values =  Pfam.nonclassical, #Pfam.nonclassical, ###[140] is void
                                      mart = mart)
                            gene.data.sorted <- with(gene.data3,  gene.data3[order(ensembl_gene_id) , ])
                            gene.list_to.convert <- as.vector(volcano.dat$ensembl_gene_id)
                            gene.overlap <- intersect(gene.list_to.convert, gene.data.sorted[ , 1])
                            gene.list.mask <- gene.list_to.convert %in% gene.overlap
                        volcano.dat <- cbind(volcano.dat, gene.list.mask)
                        volcano.dat[volcano.dat$gene.list.mask == TRUE, "pfam.nonclassical"] <- "nonclassical"
                        volcano.dat$gene.list.mask <- NULL
                      rm(gene.overlap, gene.data.sorted, gene.list_to.convert, gene.list.mask2, gene.list.mask, gene.data3, gene.data2, gene.data4)
                      rm(Pfam.classical, Pfam.data, Pfam.nonclassical)

                                          ### plotting WCE-normalised volcano with RBD classification
                                            ## volcano.WTboth.toLys
                                            ggplot(volcano.dat) + 
                                              geom_point(data = volcano.dat[volcano.dat$ensembl_gene_id %in% WTboth.CLandLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = "none")) +
                                              geom_point(data=volcano.dat[volcano.dat$pfam.nonclassical == "nonclassical" & !(volcano.dat$ensembl_gene_id %in% WTboth.CL), ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = "nonclassical"), shape = 4) +
                                              geom_point(data=volcano.dat[volcano.dat$pfam.nonclassical == "nonclassical" & volcano.dat$ensembl_gene_id %in% WTboth.CLandLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = "nonclassical")) +
                                              geom_point(data=volcano.dat[volcano.dat$pfam.nonclassical == "nonclassical" & volcano.dat$ensembl_gene_id %in% WTbothCL.noLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = "nonclassical"), shape = 1) +
                                              theme_bw() + theme(aspect.ratio=0.9) +
                                              xlab(xlabel) + ylab(ylabel) + scale_x_continuous(breaks = c(-10,0,10), limits = c(-18,18)) + scale_y_continuous(breaks = c(0, 2, 4, 6,8,10,12), limits = c(0,13)) + 
                                              geom_point(data=volcano.dat[volcano.dat$pfam.classical == "classical" & !(volcano.dat$ensembl_gene_id %in% WTboth.CL), ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = "classical"), shape = 4) +
                                              geom_point(data=volcano.dat[volcano.dat$pfam.classical == "classical" & volcano.dat$ensembl_gene_id %in% WTboth.CLandLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = "classical")) +
                                              geom_point(data=volcano.dat[volcano.dat$pfam.classical == "classical" & volcano.dat$ensembl_gene_id %in% WTbothCL.noLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = "classical"), shape = 1) +
                                              scale_color_manual(name = "RNA-binding domains",
                                                                 values = c("none" = "light grey", "nonclassical" = "blue",  "classical" = "black"),
                                                                 labels = c("classical", "nonclassical", "none")) +
                                              labs(title = expression(paste(italic("S. pombe "), "wild-type interactome"))) +
                                              geom_text(data = volcano.dat[volcano.dat$pfam.classical == "classical"  & !(volcano.dat$ensembl_gene_id %in% WTboth.CL), ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, label = external_gene_name), color = "black")
                                            
                        
                                          ### create table with Pfam.nonclassical data (Figure 6B)
                                              ## harvest annotated genes for each Pfam ID
                                              dummy <- vector("list", 100)
                                              for (i in 1:length(Pfam.nonclassical)) {
                                                    dat <- getBM(attributes = c('ensembl_gene_id', 'uniprotswissprot', 'description', 'pfam'),
                                                                                    filters = "pfam", values = Pfam.nonclassical[i], ###[140] is void
                                                                                    mart = mart)
                                                   dummy[[i]] <- dat[ ,1]
                                              }
                                                        
                                                  
                                                       ### count annotated genes for each Pfam ID
                                                        dummy.length  <- vector("list", 100)
                                                        for (i in 1:length(Pfam.nonclassical)) {
                                                              dat <- length(dummy[[i]])
                                                              dummy.length[[i]] <- dat
                                                        }
                                                        
                                                        
                                                      ### harvest NEVs (= RIC/WCE values)
                                                        dummy.NEVs   <- vector("list", 100)
                                                        for (i in 1:length(Pfam.nonclassical)) {
                                                            dat <- gene.data.WTdiff.toLys.limma[gene.data.WTdiff.toLys.limma$ensembl_gene_id %in% unlist(dummy[[i]]), "diff_WTboth_3J_Lys"]
                                                            dummy.NEVs[[i]] <- dat
                                                        }
                                                        dummy.NEVs[dummy.NEVs == "numeric(0)"] <- NA
                                                        
                                                        ### combine data to table
                                                        RBD.table <- cbind(Pfam.nonclassical, dummy, dummy.length, dummy.NEVs)
                                                        colnames(RBD.table) <- c("Pfam.nonclassical", "annotated_genes", "annotations", "NEVs")
                                                        row.names(RBD.table) <- RBD.table[ ,1]
                                                        mean.NEV <- sapply(RBD.table[ ,4], function(x) mean(x, na.rm = TRUE))
                                                        RBD.table <- cbind(RBD.table, mean.NEV)
                                                        
                                                        ### sort to mean NEV
                                                        RBD.table.sorted <- RBD.table[order(unlist(RBD.table[ ,5])), ]
                                                        
                                                         #### prepare RBD data for boxplot
                                                        dat <- melt(RBD.table.sorted[RBD.table.sorted[ ,5] != "NaN" , 4]) #& RBD.table.sorted[ ,3] > 3
                                                        dat2 <- unique(melt(RBD.table.sorted[RBD.table.sorted[ ,5] != "NaN" , 2])) #& RBD.table.sorted[ ,3] > 3
                                                        dat$gene <- dat2$value
                                                        rm(dat2)
                                                        dat$L1 <- factor(dat$L1, levels = unique(dat$L1))  ## sort boxplot on mean 
                                                        dat$L1 <- factor(dat$L1, levels = c(PF.sub, PF.classicalLike, PF.elective)) ### group boxplot on categories
                                                        dat <- dat[!(is.na(dat[ ,"L1"])), ]  
                                                        
                                                               ### non-classical RBD boxplots      
                                                                  ggplot(dat, aes(x=L1, y=value)) +
                                                                      geom_boxplot(outlier.shape = NA) + #geom_point() +
                                                                      geom_point(data = dat[!(dat$gene %in% GO0022626.genelist) & dat$gene %in% WTboth.CLandLys, ], colour = "black") +
                                                                        geom_point(data = dat[!(dat$gene %in% GO0022626.genelist) & !(dat$gene %in% WTboth.CL), ], colour = "black", shape = 4) +
                                                                        geom_point(data = dat[!(dat$gene %in% GO0022626.genelist) & dat$gene %in% WTbothCL.noLys, ], colour = "black", shape = 1) +
                                                                        geom_point(data = dat[(dat$gene %in% overlap), ], colour = "grey") +
                                                                      geom_point(data = dat[dat$gene %in% GO0022626.genelist & dat$gene %in% WTboth.CLandLys, ], colour = "orange") +
                                                                      geom_point(data = dat[dat$gene %in% GO0022626.genelist & !(dat$gene %in% WTboth.CL), ], colour = "orange", shape = 4) +
                                                                    geom_point(data = dat[dat$gene %in% GO0022626.genelist & dat$gene %in% WTbothCL.noLys, ], colour = "orange", shape = 1) +
                                                                      xlab(label="Pfam ID") + #ylab(label="NEV")  + theme(aspect.ratio=0.435) +
                                                                      theme(axis.text.x = element_text(angle=90, vjust=0.5))
                            
         
                   
                      ### colour volcano according to rank in toNoCL
                                                    ### assign rank to genes based on toNoCL for comparative visualization to normToLys
                                                    volcano.dat <- gene.data.alldiff.toNoCL      # comparison 3J to noCL
                                                    row.names(volcano.dat) <- volcano.dat$ensembl_gene_id
                                                    volcano.dat$categories <- "NA"
                                                    volcano.dat$order <- 1:nrow(volcano.dat)
                                                    volcano.dat.sorted <- arrange(volcano.dat, p_WTboth_3J_noCL_limma)
                                                    volcano.dat.sorted$rank <- 1:nrow(volcano.dat.sorted)
                                                        ylabel <- expression("-log"[10]*" (p-value)")
                                                        xlabel <- expression("log"[2]*" (fold change MS intensities (3J/noCL))")
                                                       ### plot RIC/noCL volcano with rank colours
                                                            ggplot(volcano.dat.sorted) + 
                                                              geom_point(aes(x = diff_WTboth_3J_noCL, y = p_WTboth_3J_noCL_limma, color = rank)) + 
                                                              scale_color_viridis(option = "inferno", limits = c(0,1200)) + theme_bw() + theme(aspect.ratio=0.9) +
                                                              xlab(xlabel) + ylab(ylabel) + 
                                                              scale_x_continuous(breaks = c(-10,0,10), limits = c(-15,15)) + scale_y_continuous(breaks = c(0, 2, 4, 6,8,10), limits = c(0,11)) +  
                                                              labs(title = expression(paste(italic("S. pombe "), "wild-type interactome")))
                                                      ### attach to normToLys data
                                                          volcano.dat.sorted <- arrange(volcano.dat.sorted, order)
                                                          volcano.dat <- gene.data.WTdiff.toLys.limma
                                                          row.names(volcano.dat) <- volcano.dat$ensembl_gene_id
                                                          volcano.dat$categories <- "NA"
                                                          volcano.dat$rank <- volcano.dat.sorted$rank
                                                          xlabel <- expression("log"[2]*" (fold change MS intensities (3J/Lys))")
                                                        ### plot RIC/WCE volcano with rank colours
                                                              ggplot(volcano.dat) + 
                                                                geom_point(data = volcano.dat[volcano.dat$ensembl_gene_id %in% WTbothCL.noLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = rank), shape = 1) +
                                                                geom_point(data = volcano.dat[volcano.dat$ensembl_gene_id %in% WTboth.CLandLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = rank)) +
                                                                theme_bw() + theme(aspect.ratio=0.9) +
                                                                geom_point(data = volcano.dat[volcano.dat$external_gene_name %in% dummy, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys), color = "black", shape = 4) + ### for crossing out marguerat.outliers
                                                                geom_text(aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, label=ifelse(volcano.dat$external_gene_name %in% dummy, as.character(volcano.dat$external_gene_name),""), color = rank), hjust=-0.2,vjust=0.6, size=3) +                  xlab(xlabel) + ylab(ylabel) + 
                                                                scale_x_continuous(breaks = c(-10,0,10), limits = c(-18,18)) + scale_y_continuous(breaks = c(0, 2, 4, 6,8,10,12), limits = c(0,13)) + 
                                                                scale_color_viridis(option = "inferno", limits = c(0,1200)) +
                                                                labs(title = expression(paste("all"))) #+
                                                        rm(volcano.dat.sorted)
                

                
            ### plot WCE-normalised volcano with categories, e.g. GO annotations
                
                volcano.dat <- gene.data.WTdiff.toLys.limma   ### moderated t-test
                row.names(volcano.dat) <- volcano.dat$ensembl_gene_id
                volcano.dat$categories <- NA
                
                        ### harvest Go annotations for gene data sets, e.g. GO:0003723 RNA-binding ("go") or with all its descendents ("go_parent_term")
                        gene.data4 <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'description'),
                                            filters = "go_parent_term", values = "GO:0001732", #### GO:0005681 spliceosomal complex ATP-dependent RNA helicase activity (GO:0004004) ATP-dependent DNA helicase activity (GO:0004003) 
                                            mart = mart)
                        gene.data.sorted <- with(gene.data4,  gene.data4[order(ensembl_gene_id) , ])
                        gene.list_to.convert <- as.vector(volcano.dat$ensembl_gene_id)
                        gene.overlap <- intersect(gene.list_to.convert, gene.data.sorted[ , 1])
                        gene.list.mask <- gene.list_to.convert %in% gene.overlap
                        volcano.dat <- cbind(volcano.dat, gene.list.mask)
                        volcano.dat[volcano.dat$gene.list.mask == TRUE, "categories"] <- "GO:0001732"
                        volcano.dat$gene.list.mask <- NULL
                        rm(gene.data4, gene.data.sorted, gene.list_to.convert, gene.list.mask)
                
                            ylabel <- expression("-log"[10]*" (p-value)")
                                xlabel <- expression("log"[2]*" (fold change MS intensities (3J/Lys))")
                              ggplot(volcano.dat) + 
                                  geom_point(data = volcano.dat[volcano.dat$ensembl_gene_id %in% WTboth.CLandLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys), color = "light grey") +
                                  #geom_point(data = volcano.dat[volcano.dat$ensembl_gene_id %in% WTbothCL.noLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys), color = "#666666", shape =1) +
                                  xlab(xlabel) + ylab(ylabel) + scale_x_continuous(breaks = c(-10,0,10), limits = c(-18,18)) + scale_y_continuous(breaks = c(0, 2, 4, 6,8,10,12), limits = c(0,13)) + 
                                  labs(title = expression(paste(italic("S. pombe "), "WT interactome"))) +
                                  scale_color_manual(values=myPalette3) +
                                  geom_point(data = volcano.dat[!(is.na(volcano.dat$categories)) & !(volcano.dat$ensembl_gene_id %in% WTboth.CL), ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = categories), shape = 4) +
                                  geom_point(data = volcano.dat[!(is.na(volcano.dat$categories)) & volcano.dat$ensembl_gene_id %in% WTboth.CLandLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = categories)) +
                                  geom_point(data = volcano.dat[!(is.na(volcano.dat$categories)) & volcano.dat$ensembl_gene_id %in% WTbothCL.noLys, ], aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, color = categories), shape = 1) +
                                  geom_text(aes(x = diff_WTboth_3J_Lys, y = p_WTboth_3J_Lys, label=ifelse(!(is.na(volcano.dat$categories)) & volcano.dat$ensembl_gene_id %in% WTboth.CL , as.character(volcano.dat$external_gene_name),"")), hjust=-0.2,vjust=0.6, size=3) +
                                  theme_bw() + theme(aspect.ratio=0.9)
  
 #### SD plots
    volcano.dat <- gene.data.WTdiff.toLys.limma   ### moderated t-test
    row.names(volcano.dat) <- volcano.dat$ensembl_gene_id
    volcano.dat$categories <- NA #"NA"
                  ### add summary stats, gene.data.stats is generated below
                  volcano.dat <- cbind(volcano.dat, gene.data.stats[ ,c("mean_WTboth_Lys", "SD_WTboth_Lys", "mean_WTboth_3J", "SD_WTboth_3J")], gene.data.propStats.varNorm[ ,"SD_WTboth_3JtoLys"], gene.data.alldiff.toNoCL[ , "diff_WTboth_3J_noCL"])
                  colnames(volcano.dat) <- c(colnames(gene.data.WTdiff.toLys.limma),"categories",  "mean_WTboth_Lys", "SD_WTboth_Lys", "mean_WTboth_3J", "SD_WTboth_3J", "SD_WTboth_3JtoLys", "diff_WTboth_3J_noCL")
                    
            ### SD to RIC/WCE
                ylabel <- expression("stdev")
                xlabel <- expression("log"[2]*" (fold change MS intensities (3J/Lys))")
                ggplot(volcano.dat) + 
                    geom_point(data = volcano.dat[volcano.dat$ensembl_gene_id %in% WTboth.CLandLys, ], aes(x = diff_WTboth_3J_Lys, y = -SD_WTboth_3JtoLys), color = "light grey") +
                    xlab(xlabel) + ylab(ylabel) + scale_x_continuous(breaks = c(-8,-4,0,4,8), limits = c(-12,12)) + #scale_y_continuous(breaks = c(0, 2, 4, 6,8,10,12), limits = c(0,13)) + 
                    labs(title = expression("SD plot")) +
                    scale_color_manual(values=myPalette3) +
                    geom_point(data = volcano.dat[volcano.dat$categories != "NA" & !(volcano.dat$ensembl_gene_id %in% WTboth.CL), ], aes(x = diff_WTboth_3J_Lys, y = -SD_WTboth_3JtoLys, color = categories), shape = 4) +
                    geom_point(data = volcano.dat[volcano.dat$categories != "NA" & volcano.dat$ensembl_gene_id %in% WTboth.CLandLys, ], aes(x = diff_WTboth_3J_Lys, y = -SD_WTboth_3JtoLys, color = categories)) +
                    geom_point(data = volcano.dat[volcano.dat$categories != "NA" & volcano.dat$ensembl_gene_id %in% WTbothCL.noLys, ], aes(x = diff_WTboth_3J_Lys, y = -SD_WTboth_3JtoLys, color = categories), shape = 1) +
                    geom_text(aes(x = diff_WTboth_3J_Lys, y = -SD_WTboth_3JtoLys, color = categories, label=ifelse(volcano.dat$categories != "NA" & volcano.dat$ensembl_gene_id %in% WTboth.CL, as.character(volcano.dat$external_gene_name),"")), hjust=-0.2,vjust=0.6, size=3) +
                    theme_bw() + theme(aspect.ratio=0.9)
                  

  
    ## Generate WCE-normalised mutant interactomes
            gene.data.raw.toLys.mut <- gene.data.raw.red

                
                      ## visualize data spread before normalization
                          col.coord <- c(10,12,14,28,29,30,19,20,21,34,35,36,22,23,24,37,38,39)
                          dat <- melt(gene.data.raw.toLys.mut[ , col.coord], na.rm = TRUE)
                          ggplot(data = dat, aes(x=variable, y=value)) + 
                            xlab(label="") + ylab(label="MS intensities") +
                            geom_boxplot() +theme(axis.text.x = element_text(angle=60, vjust=0.5))
                        rm(dat, col.coord)
                
                ## normalise all medians to zero
                ### this derives from normalisation to mean median and could be simplified
                          gene.data.test <- gene.data.raw.red
                          col.coord.mat <- matrix(c(10,12,14,28,29,30,19,20,21,34,35,36,22,23,24,37,38,39), nrow = 6, byrow = TRUE)
                          for (j in 1:6) {
                            col.coord.single <- col.coord.mat[j, ]
                            mat <- as.matrix(gene.data.raw.red[ , col.coord.single])
                            for (i in 1:3) {
                              dat <- mat[ ,i]
                              dat <- dat - median(dat, na.rm = TRUE)
                              mat[ ,i] <- dat
                            }
                            gene.data.test[ , col.coord.single] <- mat
                          }
                          gene.data.raw.toLys.mut <- gene.data.test
                
                ## Data imputation for WCE where 3J signal exists and for 3J where WCE signal exists
                   ## For this, find mode for bottom end values (< -5 for Lys), then use value to replace NAs in Lys samples
                   ## in rows where either 3J or Lys returned at least one value, as for the wild-type
                
                        ## for mtl1
                                col.coord <- c(10,12,14,28,29,30)
                                dummy <- gene.data.raw.toLys.mut[ ,  col.coord]
                                row.coord <- which(rowSums(is.na(dummy)) < 6)
                                ## find lower mode of bimodal distribution (= Shabaz' imputed value) and replace NAs where opportune
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 10, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 12, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 14, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 28, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 29, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 30, -5)
                                
                                ### coordinates and list of proteins that were detected in mtl1 RIC or WCE
                                stringent.row.coord.mtl1 <- which(rowSums(gene.data.raw.toLys.mut[ , c(10,12,14)]) != findMode(gene.data.raw.toLys.mut, 10, -5) + findMode(gene.data.raw.toLys.mut, 12, -5) + findMode(gene.data.raw.toLys.mut, 14, -5), arr.ind = TRUE)
                                mtl1.CL <- gene.data.raw.toLys.mut[stringent.row.coord.mtl1, "ensembl_gene_id"]
                                stringent.row.coord.mtl1.Lys <- which(rowSums(gene.data.raw.toLys.mut[ , c(28,29,30)]) != findMode(gene.data.raw.toLys.mut, 28, -5) + findMode(gene.data.raw.toLys.mut, 29, -5) + findMode(gene.data.raw.toLys.mut, 30, -5), arr.ind = TRUE)
                                mtl1.Lys <- gene.data.raw.toLys.mut[stringent.row.coord.mtl1.Lys, "ensembl_gene_id"]
                
                       ### for rrp6
                                col.coord <- c(19,20,21,34,35,36)
                                dummy <- gene.data.raw.toLys.mut[ ,  col.coord]
                                row.coord <- which(rowSums(is.na(dummy)) < 6)
                                ## find lower mode of bimodal distribution (= Shabaz' imputed value) and replace NAs where opportune
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 19, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 20, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 21, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 34, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 35, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 36, -5)
                                
                                ### coordinates and list of proteins that were detected in rrp6 RIC or WCE
                                stringent.row.coord.rrp6 <- which(rowSums(gene.data.raw.toLys.mut[ , c(19,20,21)]) != findMode(gene.data.raw.toLys.mut, 19, -5) + findMode(gene.data.raw.toLys.mut, 20, -5) + findMode(gene.data.raw.toLys.mut, 21, -5), arr.ind = TRUE)
                                rrp6.CL <- gene.data.raw.toLys.mut[stringent.row.coord.rrp6, "ensembl_gene_id"]
                                #write.csv(dummy, file = "genelist_allCrosslinked_rrp6.csv", row.names = FALSE)
                                stringent.row.coord.rrp6.Lys <- which(rowSums(gene.data.raw.toLys.mut[ , c(34,35,36)]) != findMode(gene.data.raw.toLys.mut, 34, -5) + findMode(gene.data.raw.toLys.mut, 35, -5) + findMode(gene.data.raw.toLys.mut, 36, -5), arr.ind = TRUE)
                                rrp6.Lys <- gene.data.raw.toLys.mut[stringent.row.coord.rrp6.Lys, "ensembl_gene_id"]
                                
                      ### for dis3
                                col.coord <- c(22,23,24,37,38,39)
                                dummy <- gene.data.raw.toLys.mut[ ,  col.coord]
                                row.coord <- which(rowSums(is.na(dummy)) < 6)
                                ## find lower mode of bimodal distribution (= Shabaz' imputed value) and replace NAs where opportune
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 22, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 23, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 24, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 37, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 38, -5)
                                gene.data.raw.toLys.mut <- findModeAndAddValue(gene.data.raw.toLys.mut, 39, -5)
                
                                ### coordinates and list of proteins that were detected in dis3 RIC or WCE
                                stringent.row.coord.dis3 <- which(rowSums(gene.data.raw.toLys.mut[ , c(22,23,24)]) != findMode(gene.data.raw.toLys.mut, 22, -5) + findMode(gene.data.raw.toLys.mut, 23, -5) + findMode(gene.data.raw.toLys.mut, 24, -5), arr.ind = TRUE)
                                dis3.CL <- gene.data.raw.toLys.mut[stringent.row.coord.dis3, "ensembl_gene_id"]
                                stringent.row.coord.dis3.Lys <- which(rowSums(gene.data.raw.toLys.mut[ , c(37,38,39)]) != findMode(gene.data.raw.toLys.mut, 37, -5) + findMode(gene.data.raw.toLys.mut, 38, -5) + findMode(gene.data.raw.toLys.mut, 39, -5), arr.ind = TRUE)
                                dis3.Lys <- gene.data.raw.toLys.mut[stringent.row.coord.dis3.Lys, "ensembl_gene_id"]
                
                            ## generate derivate data: fold change, p-values
                              gene.data.mtl1.toLys <- addRawDataToLys(gene.data.raw.toLys.mut, c(10,12,14), c(28,29,30), "mtl1")
                              gene.data.rrp6.toLys <- addRawDataToLys(gene.data.raw.toLys.mut, c(19,20,21), c(34,35,36), "rrp6")
                              gene.data.dis3.toLys <- addRawDataToLys(gene.data.raw.toLys.mut, c(22,23,24), c(37,38,39), "dis3")
                              ## bind to one dataset
                              gene.data.all.toLys <- cbind(gene.data.WTdiff.toLys, gene.data.mtl1.toLys[, c(4,5)], gene.data.rrp6.toLys[, c(4,5)], gene.data.dis3.toLys[, c(4,5)])
                              rm(gene.data.mtl1.toLys, gene.data.rrp6.toLys, gene.data.dis3.toLys)
                              
                              
                              ### add p-values from moderated t-test(eBayes function, limma package)
                              gene.data.all.toLys.limma <- gene.data.all.toLys
                              colnames(gene.data.raw.toLys.mut)
                              
                                            ##mtl1 to Lys
                                            eset <- as.matrix(gene.data.raw.toLys.mut[ ,c(10,12,14,28,29,30)])
                                            design <- cbind(WT=c(1,1,1,0,0,0), MU=c(0,0,0,1,1,1))
                                            fit <- lmFit(eset, design)
                                            cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                                            fit2 <- contrasts.fit(fit, cont.matrix)
                                            fit.test1 <- eBayes(fit2)
                                            pval1 <- -log10(as.numeric(fit.test1$p.value))
                                            gene.data.all.toLys.limma$p_mtl1_3J_Lys <- pval1
                              
                                            ##rrp6 to Lys
                                            eset <- as.matrix(gene.data.raw.toLys.mut[ ,c(19,20,21,34,35,36)])
                                            design <- cbind(WT=c(1,1,1,0,0,0), MU=c(0,0,0,1,1,1))
                                            fit <- lmFit(eset, design)
                                            cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                                            fit2 <- contrasts.fit(fit, cont.matrix)
                                            fit.test1 <- eBayes(fit2)
                                            pval1 <- -log10(as.numeric(fit.test1$p.value))
                                            gene.data.all.toLys.limma$p_rrp6_3J_Lys <- pval1
                                            
                                            ##dis3 to Lys
                                            eset <- as.matrix(gene.data.raw.toLys.mut[,c(22,23,24,37,38,39)])
                                            design <- cbind(WT=c(1,1,1,0,0,0), MU=c(0,0,0,1,1,1))
                                            fit <- lmFit(eset, design)
                                            cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                                            fit2 <- contrasts.fit(fit, cont.matrix)
                                            fit.test1 <- eBayes(fit2)
                                            pval1 <- -log10(as.numeric(fit.test1$p.value))
                                            gene.data.all.toLys.limma$p_dis3_3J_Lys <- pval1
                                            rm(eset, design, fit, cont.matrix, fit2, fit.test1, pval1)
                                            
                                          ### transfer moderated t-test values for WT
                                          gene.data.all.toLys.limma[ ,c("diff_WTboth_3J_Lys", "p_WTboth_3J_Lys")] <- gene.data.WTdiff.toLys.limma[ ,c("diff_WTboth_3J_Lys", "p_WTboth_3J_Lys")]
                                          gene.data.all.toLys.limma$p_WT1_3J_Lys <- gene.data.WTdiff.toLys.limma$p_WT1_3J_Lys
                                          gene.data.all.toLys.limma$p_WT2_3J_Lys <- gene.data.WTdiff.toLys.limma$p_WT2_3J_Lys
                                          
  
#### Comparative interactomes                
                       
        #### for Lys-normalised comparative interactomes, generate means and standard devs to feed into error propagation of RIC/WCE
                gene.data.extended <- cbind(gene.data, addMeanAndSD(gene.data.raw.toLys, c(4,6,8), "WT1_3J")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys, c(25,26,27), "WT1_Lys")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys, c(16,17,18), "WT2_3J")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys, c(31,32,33), "WT2_Lys")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys.mut, c(10,12,14), "mtl1_3J")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys.mut, c(28,29,30), "mtl1_Lys")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys.mut, c(19,20,21), "rrp6_3J")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys.mut, c(34,35,36), "rrp6_Lys")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys.mut, c(22,23,24), "dis3_3J")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys.mut, c(37,38,39), "dis3_Lys")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys, c(4,6,8,16,17,18), "WTboth_3J")[ ,c(4,5)])
                gene.data.extended <- cbind(gene.data.extended, addMeanAndSD(gene.data.raw.toLys, c(25,26,27,31,32,33), "WTboth_Lys")[ ,c(4,5)])
                gene.data.extended[gene.data.extended == "NaN"] <- NA
                gene.data.stats <- gene.data.extended
                

                #### propagate error for RIC/WCE from mean(RIC) / mean(WCE)
                  gene.data.extended <- addPropagatedError(gene.data.stats, c(4,5), c(6,7), "WT1")
                  gene.data.extended <- cbind(gene.data.extended, addPropagatedError(gene.data.stats, c(8,9), c(10,11), "WT2")[ ,c(4,5)])
                  gene.data.extended <- cbind(gene.data.extended, addPropagatedError(gene.data.stats, c(12,13), c(14,15), "mtl1")[ ,c(4,5)])
                  gene.data.extended <- cbind(gene.data.extended, addPropagatedError(gene.data.stats, c(16,17), c(18,19), "rrp6")[ ,c(4,5)])
                  gene.data.extended <- cbind(gene.data.extended, addPropagatedError(gene.data.stats, c(20,21), c(22,23), "dis3")[ ,c(4,5)])
                  gene.data.extended <- cbind(gene.data.extended, addPropagatedError(gene.data.stats, c(24,25), c(26,27), "WTboth")[ ,c(4,5)])
                  gene.data.propStats <- gene.data.extended
                rm(gene.data.extended)
                
                
        ### Lys-normalized comparative interactomes
                
                ### visualize WCE/RIC input data
                ggplot(gene.data.propStats, aes(x = gene.data.propStats$mean_WTboth_3JtoLys)) + 
                  geom_density() + xlim(-15,15)  
                
                
                ### first, optimise normalisation shift value to minimize global variance
                ## !!with this dataset, this is equivalent to normalisation to the arithmetic mean!!
                
                            global.var <- function(x) {
                              summed.variance = 0
                              for (i in row.coord) {
                                summed.variance <- sum(summed.variance, (gene.data.test[i, col.coord.mut] - x - gene.data.test[i, col.coord.wt])^2, na.rm = TRUE)
                              }
                              summed.variance
                            }
                            
                            ### mtl1 to WT1
                            row.coord <- stringent.row.coord.mtl1      
                            col.coord.mut <- 8 #mean_mtl1_3JtoLys 
                            col.coord.wt <- 4 #mean_WT1_3JtoLys
                            dummy <- optimize(global.var, interval=c(-2.5, 2.5), maximum=FALSE)
                            dummy$minimum #check whether minimum is on interval border, if yes repeat with larger interval
                            gene.data.test$mean_mtl1_3JtoLys <- gene.data.test$mean_mtl1_3JtoLys - dummy$minimum
                            
                            ### rrp6 to WT2
                            row.coord <- stringent.row.coord.rrp6
                            col.coord.mut <- 10 #mean_rrp6_3JtoLys 
                            col.coord.wt <- 6 #mean_WT2_3JtoLys
                            dummy <- optimize(global.var, interval=c(-2.5, 2.5), maximum=FALSE)
                            dummy$minimum
                            gene.data.test$mean_rrp6_3JtoLys <- gene.data.test$mean_rrp6_3JtoLys - dummy$minimum
                            
                             ### dis3 to WT2
                            row.coord <- stringent.row.coord.dis3
                            col.coord.mut <- 12 #mean_dis3_3JtoLys 
                            col.coord.wt <- 6 #mean_WT2_3JtoLys
                            dummy <- optimize(global.var, interval=c(-2.5, 2.5), maximum=FALSE)
                            dummy$minimum
                            gene.data.test$mean_dis3_3JtoLys <- gene.data.test$mean_dis3_3JtoLys - dummy$minimum
                            
                            gene.data.propStats.varNorm <- gene.data.test
                

          ### generate p-values and differences for comparative interactomes
                gene.data.extended <- gene.data
                ###loop through with various stat.coord parameters
                stat.coord <- c(4,8,5,9) # mtl1-1 to WT1
                stat.coord <- c(6,10,7,11) # rrp6 to WT2
                stat.coord <- c(6,12,7,13) # dis3 to WT2

                
                                  idx <- length(row.names(gene.data.test))
                                  dat <- data.frame(1,1)
                                  for (i in 1:idx) {
                                    t.test.results <- tryCatch({
                                      mean1 <- gene.data.test[i, stat.coord[1]] #wt
                                      mean2 <- gene.data.test[i, stat.coord[2]] ##mut
                                      sd1 <- gene.data.test[i, stat.coord[3]]   #wt
                                      sd2 <- gene.data.test[i, stat.coord[4]]   ##mut
                                      tsum.test(mean.x = mean1, mean.y = mean2, n.x = 3, n.y = 3, s.x = sd1, s.y = sd2, var.equal = TRUE)
                                      }, error = function(err) {
                                      "NA"
                                    })
                                    p.value <- -log10(as.numeric(t.test.results["p.value"]))
                                    diff <- mean2 - mean1 
                                    dat[i, ] <- data.frame(diff, p.value)
                                  }
                                  ##pick as applicable
                                  colnames(dat) <- c("diff_3J_WT_mtl1", "p_3J_WT_mtl1")
                                  colnames(dat) <- c("diff_3J_WT_rrp6", "p_3J_WT_rrp6")
                                  colnames(dat) <- c("diff_3J_WT_dis3", "p_3J_WT_dis3")
                                  
                gene.data.extended <- cbind(gene.data.extended,dat)
                ## end of loop
             rm(mean1, mean2, sd1, sd2, p.value, diff, dat, stat.coord, idx, t.test.results)
                
                  
             ### simulate data with exact statistics from error propagation to feed WCE-normalised comparative interactome data
             ###  into limma to generate p-values via moderated t-test (eBayes function)
             
                ### data set: mut_toWCE normalised to corresponding wt by minimising global variance for crosslinked (in mut) proteins only
                gene.data.test <- gene.data.propStats.varNorm

                           ## simulate data for WT1
                           dat.wt1 <- data.frame(NA, NA, NA)
                           col.coord.mean <- 4
                           col.coord.sd <- 5
                           for (i in 1:length(row.names(gene.data.test))) {
                             rnorm.values <- tryCatch({mvrnorm(n = 3, gene.data.test[i, col.coord.mean], (gene.data.test[i, col.coord.sd])^2, empirical = TRUE)
                             }, error = function(err) {
                               NA
                             })
                             dat.wt1[i, ] <- data.frame(t(rnorm.values))
                           }
                           
                           ## simulate data  for mtl1
                           dat.mtl1 <- data.frame(NA, NA, NA)
                           col.coord.mean <- 8
                           col.coord.sd <- 9
                           for (i in 1:length(row.names(gene.data.test))) {
                             rnorm.values <- tryCatch({mvrnorm(n = 3, gene.data.test[i, col.coord.mean], (gene.data.test[i, col.coord.sd])^2, empirical = TRUE)
                             }, error = function(err) {
                               NA
                             })
                             dat.mtl1[i, ] <- data.frame(t(rnorm.values))
                           }
                           
                             ## simulate data  for WT2
                             dat.wt2 <- data.frame(NA, NA, NA)
                             col.coord.mean <- 6
                             col.coord.sd <- 7
                             for (i in 1:length(row.names(gene.data.test))) {
                               rnorm.values <- tryCatch({mvrnorm(n = 3, gene.data.test[i, col.coord.mean], (gene.data.test[i, col.coord.sd])^2, empirical = TRUE)
                               }, error = function(err) {
                                 NA
                               })
                               dat.wt2[i, ] <- data.frame(t(rnorm.values))
                             }
                             
                             ## simulate data  for rrp6
                             dat.rrp6 <- data.frame(NA, NA, NA)
                             col.coord.mean <- 10
                             col.coord.sd <- 11
                             for (i in 1:length(row.names(gene.data.test))) {
                               rnorm.values <- tryCatch({mvrnorm(n = 3, gene.data.test[i, col.coord.mean], (gene.data.test[i, col.coord.sd])^2, empirical = TRUE)
                               }, error = function(err) {
                                 NA
                               })
                               dat.rrp6[i, ] <- data.frame(t(rnorm.values))
                             }
                             
                             ## simulate data  for dis3
                             dat.dis3 <- data.frame(NA, NA, NA)
                             col.coord.mean <- 12
                             col.coord.sd <- 13
                             for (i in 1:length(row.names(gene.data.test))) {
                               rnorm.values <- tryCatch({mvrnorm(n = 3, gene.data.test[i, col.coord.mean], (gene.data.test[i, col.coord.sd])^2, empirical = TRUE)
                               }, error = function(err) {
                                 NA
                               })
                               dat.dis3[i, ] <- data.frame(t(rnorm.values))
                             }
                      rm(rnorm.values, col.coord.mean, col.coord.sd)   
                             
                             
            ### use limma package for moderated t-test from simluated WCE-normalised data
             ### then add p-values to alldiff dataset 
                 gene.data.alldiff.normToLys.limma <- gene.data.alldiff.normToLys    
             
                       ###comparison mtl1_toLys to WT1_toLys
                       eset <- as.matrix(cbind(dat.wt1, dat.mtl1))
                       design <- cbind(WT=c(1,1,1,0,0,0), MU=c(0,0,0,1,1,1))
                       fit <- lmFit(eset, design)
                       cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                       fit2 <- contrasts.fit(fit, cont.matrix)
                       fit.test1 <- eBayes(fit2)
                       pval1 <- -log10(as.numeric(fit.test1$p.value))
                       gene.data.alldiff.normToLys.limma$p_3J_WT_mtl1 <- pval1
                       
                       ###comparison rrp6_toLys to WT2_toLys
                       eset <- as.matrix(cbind(dat.wt2, dat.rrp6))
                       design <- cbind(WT=c(1,1,1,0,0,0), MU=c(0,0,0,1,1,1))
                       fit <- lmFit(eset, design)
                       cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                       fit2 <- contrasts.fit(fit, cont.matrix)
                       fit.test1 <- eBayes(fit2)
                       pval1 <- -log10(as.numeric(fit.test1$p.value))
                       gene.data.alldiff.normToLys.limma$p_3J_WT_rrp6 <- pval1
                       
                       ###comparison dis3_toLys to WT2_toLys
                       eset <- as.matrix(cbind(dat.wt2, dat.dis3))
                       design <- cbind(WT=c(1,1,1,0,0,0), MU=c(0,0,0,1,1,1))
                       fit <- lmFit(eset, design)
                       cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                       fit2 <- contrasts.fit(fit, cont.matrix)
                       fit.test1 <- eBayes(fit2)
                       pval1 <- -log10(as.numeric(fit.test1$p.value))
                       gene.data.alldiff.normToLys.limma$p_3J_WT_dis3 <- pval1
                    rm(eset, design, fit, cont.matrix, fit2, fit.test1, pval1)
                    rm(dat.wt1, dat.mtl1, dat.wt2, dat.rrp6, dat.dis3)
                       
             
                     
    ### plotting the comparative volcanoes 
                volcano.dat <- gene.data.alldiff.normToLys.limma
                row.names(volcano.dat) <- volcano.dat$ensembl_gene_id
                volcano.dat$categories <- NA
                
                                    ### exosome
                                    volcano.dat[volcano.dat$external_gene_name %in% c("ski6", "rrp42","rrp43", "rrp45", "rrp46", "mtr3"), "categories"] <- "exosome core"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("dis3"), "categories"] <- "Dis3"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("rrp6"), "categories"] <- "Rrp6"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("csl4", "rrp4", "rrp40", "mpp6", "cti1"), "categories"] <- "exosome cap"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("cbc1", "cbc2", "pir2"), "categories"] <- "CBC-Ars"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("mtl1", "mtr4"), "categories"] <- "Ski2-like helicase"
                                    
                                    volcano.dat[volcano.dat$external_gene_name %in% c("mmi1", "iss10", "pab2", "mtl1", "red1", "rmn1", "red5", "pla1"), "categories"] <- "MTREC"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("cid14", "mtr4", "air1"), "categories"] <- "TRAMP"
                                    
                                    ## Tamas Fischer notation
                                    volcano.dat[volcano.dat$external_gene_name %in% c("cbc1", "cbc2", "pir2"), "categories"] <- "CBC-Ars"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("mtl1", "red1"), "categories"] <- "MTREC"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("mmi1", "iss10"), "categories"] <- "Mmi1-Iss10 module"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("pab2", "red5", "rmn1"), "categories"] <- "Pab2 module"
                    
                                    ### CPF
                                    volcano.dat[volcano.dat$external_gene_name %in% c("cft2" ,"mpe1" , "ysh1" , "SPBC1734.10c"),"categories"] <- "nuclease module"   # SPBC1734.10c === ipa1
                                    volcano.dat[volcano.dat$external_gene_name %in% c("dis2", "swd22", "ssu72", "pta1", "ppn1"),"categories"] <- "phosphatase"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("iss1", "cft1", "yth1", "pfs2", "pla1"),"categories"] <- "polyA polymerase"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("rna15", "rna14", "pcf11", "SPAC22H10.05c", "ctf1"),"categories"] <- "CF1A" ### SPAC22H10.05c = clp1
                                    volcano.dat[volcano.dat$external_gene_name %in% c("msi2"),"categories"] <- "CF1B"
                                    volcano.dat[volcano.dat$external_gene_name %in% c("seb1"),"categories"] <- "Seb1"
                   
        
                myPalette3 <- c("#666666", "black", "orange")

                ylabel <- expression("-log"[10]*" (p-value)") 
                
              ###volcano.mtl.toWT  - mtl1-1 comparative volcano
                xlabel <- expression(paste("log"[2]*" (fold change norm. MS int. (", italic("mtl1-1"), "/WT))"))
                ggplot(volcano.dat) +
                  geom_point(data = volcano.dat[stringent.row.coord.mtl1, ], aes(x = diff_3J_WT_mtl1, y = p_3J_WT_mtl1), color = "light grey") +
                  geom_point(data = volcano.dat[setdiff(stringent.row.coord.WT1, stringent.row.coord.mtl1), ], aes(x = diff_3J_WT_mtl1, y = p_3J_WT_mtl1), color = "light grey") +
                  geom_point(data=volcano.dat[intersect(stringent.row.coord.mtl1, which(volcano.dat$categories != "NA", arr.ind = TRUE)), ], aes(x = diff_3J_WT_mtl1, y = p_3J_WT_mtl1, color = categories)) +
                  geom_point(data=volcano.dat[intersect(setdiff(stringent.row.coord.WT1, stringent.row.coord.mtl1), which(volcano.dat$categories != "NA", arr.ind = TRUE)), ], aes(x = diff_3J_WT_mtl1, y = p_3J_WT_mtl1, color = categories), shape = 1) +
                  geom_text(aes(x = diff_3J_WT_mtl1, y = p_3J_WT_mtl1, label=ifelse(external_gene_name  %in% volcano.dat[stringent.row.coord.mtl1, "external_gene_name"], as.character(external_gene_name),""), colour = categories), hjust=-0.3,vjust=0.2, size=3) +
                  scale_color_manual(values=myPalette3) + 
                  geom_point(data = volcano.dat[volcano.dat$external_gene_name == "mtl1", ], aes(x = diff_3J_WT_mtl1, y = p_3J_WT_mtl1), color = "black", shape = 1) +
                  geom_text(aes(x = diff_3J_WT_mtl1, y = p_3J_WT_mtl1, label=ifelse(external_gene_name %in% c("mtl1"), as.character(external_gene_name),"")), colour = "black", hjust=-0.3,vjust=0.2, size=3) +
                  theme_bw() +
                  scale_x_continuous(breaks = c(-10,-5,0,5,10), limits = c(-10,10)) + scale_y_continuous(breaks = c(0, 1, 2,3, 4, 5), limits = c(0,5)) +
                  theme(aspect.ratio=0.9) + #theme(legend.position="none") +
                  labs(title = expression(paste(italic("mtl1-1"), " comparative interactome"))) +
                  xlab(xlabel) + ylab(ylabel)
                
               
                ## volcano.rrp6.3J - rrp6D comparative volcano
                xlabel <- expression(paste("log"[2]*" (fold change norm. MS int. (", italic("rrp6D"), "/WT))"))
                ylabel <- expression("-log"[10]*" (p-value)") 
                ggplot(volcano.dat) + 
                  geom_point(data = volcano.dat[stringent.row.coord.rrp6, ], aes(x = diff_3J_WT_rrp6, y = p_3J_WT_rrp6), color = "light grey") + 
                  geom_point(data = volcano.dat[setdiff(stringent.row.coord.WT2, stringent.row.coord.rrp6), ], aes(x = diff_3J_WT_rrp6, y = p_3J_WT_rrp6), color = "grey", shape = 10) +
                  geom_point(data=volcano.dat[intersect(stringent.row.coord.rrp6, which(volcano.dat$categories != "NA", arr.ind = TRUE)), ], aes(x = diff_3J_WT_rrp6, y = p_3J_WT_rrp6, color = categories)) +
                  geom_point(data=volcano.dat[intersect(setdiff(stringent.row.coord.WT2, stringent.row.coord.rrp6), which(volcano.dat$categories != "NA", arr.ind = TRUE)), ], aes(x = diff_3J_WT_rrp6, y = p_3J_WT_rrp6, color = categories), shape = 1) +
                  scale_color_manual(values=myPalette3) + 
                  geom_text(aes(x = diff_3J_WT_rrp6, y = p_3J_WT_rrp6, label=ifelse(external_gene_name  %in% volcano.dat[c(stringent.row.coord.rrp6, stringent.row.coord.WT2), "external_gene_name"], as.character(external_gene_name),""), colour = categories), hjust=-0.3,vjust=0.2, size=3) +
                  geom_point(data = volcano.dat[volcano.dat$external_gene_name == "rrp6", ], aes(x = diff_3J_WT_rrp6, y = p_3J_WT_rrp6), color = "black", shape = 1) +
                  geom_text(aes(x = diff_3J_WT_rrp6, y = p_3J_WT_rrp6, label=ifelse(external_gene_name %in% c("rrp6"), as.character(external_gene_name),"")), colour = "black", hjust=-0.3,vjust=0.2, size=3) +
                  theme_bw() + 
                  scale_x_continuous(breaks = c(-10,-5,0,5,10), limits = c(-10,10)) + scale_y_continuous(breaks = c(0, 1, 2,3, 4, 5, 6), limits = c(0,6)) +
                  theme(aspect.ratio=0.9) + 
                  labs(title = expression(paste(italic("rrp6D"), " comparative interactome"))) +
                  xlab(xlabel) + ylab(ylabel)
                
                        
                #volcano.dis3.3J - dis3-54 comparative volcano
                xlabel <- expression(paste("log"[2]*" (fold change norm. MS int. (", italic("dis3-54"), "/WT))"))
                ylabel <- expression("-log"[10]*" (p-value)") 
                ggplot(volcano.dat) + 
                  geom_point(data = volcano.dat[stringent.row.coord.dis3, ], aes(x = diff_3J_WT_dis3, y = p_3J_WT_dis3), color = "grey") + 
                  geom_point(data = volcano.dat[setdiff(stringent.row.coord.WT2, stringent.row.coord.dis3), ], aes(x = diff_3J_WT_dis3, y = p_3J_WT_dis3), color = "grey", shape = 10) +
                  geom_point(data=volcano.dat[intersect(stringent.row.coord.dis3, which(volcano.dat$categories != "NA", arr.ind = TRUE)), ], aes(x = diff_3J_WT_dis3, y = p_3J_WT_dis3, color = categories)) +
                  geom_point(data=volcano.dat[intersect(setdiff(stringent.row.coord.WT2, stringent.row.coord.dis3), which(volcano.dat$categories != "NA", arr.ind = TRUE)), ], aes(x = diff_3J_WT_dis3, y = p_3J_WT_dis3, color = categories), shape = 1) +
                  scale_color_manual(values=myPalette3) + 
                  geom_point(data = volcano.dat[volcano.dat$external_gene_name == "dis3", ], aes(x = diff_3J_WT_dis3, y = p_3J_WT_dis3), color = "black") +
                  geom_text(aes(x = diff_3J_WT_dis3, y = p_3J_WT_dis3, label=ifelse(external_gene_name %in% c("dis3"), as.character(external_gene_name),"")), colour = "black", hjust=-0.3,vjust=0.2, size=3) +
                  geom_text(aes(x = diff_3J_WT_dis3, y = p_3J_WT_dis3, label=ifelse(external_gene_name  %in% volcano.dat[stringent.row.coord.dis3, "external_gene_name"], as.character(external_gene_name),""), colour = categories), hjust=-0.3,vjust=0.2, size=3) +
                  theme_bw() + 
                  scale_x_continuous(breaks = c(-5,0,5), limits = c(-7,7)) + scale_y_continuous(breaks = c(0, 1, 2, 3, 4, 5), limits = c(0,5)) +
                  theme(aspect.ratio=0.9) + 
                  labs(title = expression(paste(italic("dis3-54"), " comparative interactome" ))) +
                  xlab(xlabel) + ylab(ylabel)
                
                
              


      
#### Functions:         
         

    addNewData <- function(newDataFileName, data, allowedVars){
                           ##' Modifies 'data' by adding new values supplied in newDataFileName
                           ##'
                           ##' newDataFileName is expected to have columns 
                           ##' c(lookupVariable,lookupValue,newVariable,newValue,source)
                           ##' 
                           ##' Within the column 'newVariable', replace values that
                           ##' match 'lookupValue' within column 'lookupVariable' with the value
                           ##' newValue'.  If 'lookupVariable' is NA, then replace *all* elements
                           ##' of 'newVariable' with the value 'newValue'.
                           ##'
                           ##' Note that lookupVariable can be the same as newVariable.
                           ##'
                           ##' @param newDataFileName name of lookup table
                           ##' @param data existing data.frame
                           ##' @param allowedVars vector of permissible variable names for newVariable
                           ##' @return modified data.frame
           import <- readNewData(newDataFileName, allowedVars)
           
           if( !is.null(import)){    
             for(i in seq_len(nrow(import))){  #Make replacements
               col.to <- import$newVariable[i] 
               col.from <- import$lookupVariable[i]
               if(is.na(col.from)){ # apply to whole column
                 data[col.to] <- import$newValue[i]
               } else { # apply to subset
                 rows <- data[[col.from]] == import$lookupValue[i]
                 data[rows,col.to] <- import$newValue[i]
               }
             }   
           }      
           data
         }
         
         ##' Utility function to read/process newDataFileName for addNewData
         ##' 
         ##' @param newDataFileName name of lookup table
         ##' @param allowedVars vector of permissible variable names for newVariable
         ##' @return data.frame with columns c(lookupVariable,lookupValue,newVariable,newValue,source)
         readNewData <- function(newDataFileName, allowedVars){
           
           if( file.exists(newDataFileName)){
             import <- read.csv(newDataFileName, header=TRUE, stringsAsFactors=FALSE,
                                strip.white=TRUE)
             if( nrow(import)> 0 ){
               
               #Check columns names for import are right
               expectedColumns<- c("lookupVariable","lookupValue","newVariable","newValue")
               nameIsOK <-  expectedColumns %in% names(import)
               if(any(!nameIsOK))
                 stop("Incorrect name in lookup table for ",
                      newDataFileName, "--> ", paste(expectedColumns[!nameIsOK],
                                                     collapse=", "))
               
               #Check values of newVariable are in list of allowed variables
               import$lookupVariable[import$lookupVariable == ""] <- NA
               nameIsOK <- import$newVariable %in% allowedVars
               if(any(!nameIsOK))
                 stop("Incorrect name(s) in newVariable column of ",
                      newDataFileName, "--> ", paste(import$newVariable[!nameIsOK],
                                                     collapse=", "))
             } else {
               import <- NULL
             }
           } else {
             import <- NULL
           }
           import
         }    

      
         

         
  addColumnToMaster <- function (columnName,sourceData,extendedData) {
                     ## relies on the function addNewData
                     ### columnName: name of column to be added
                     ### sourceData: data set that contains the column
                     ### extendedData: master data set that the column should be added to
           dataNew <- cbind("ensembl_gene_id", sourceData$ensembl_gene_id, columnName, sourceData[[columnName]], "fungal ensembl")
           colnames(dataNew) <- c("lookupVariable","lookupValue","newVariable","newValue","source")
           write.csv(dataNew, file = "dataNew.csv")
           
           extendedData <- addNewData("dataNew.csv", extendedData, columnName)
           extendedData
  }
  
  
  

  
  addRawData <- function (rawData) {
                            
                            ### this is just to reduce the clutter in the analysis script:
                            ### adds derivative mRIC data dependent on rawData input 
                            
                            ### add differences and p-values
                            gene.data.extended <- gene.data
                            
                            ### mRIC13_3J
                            wt.col.coord <- c(4,6,8)
                            mut.col.coord <- c(10,12,14)
                            colnames(rawData[wt.col.coord])
                            colnames(rawData[mut.col.coord])
                            
                            idx <- length(row.names(rawData))
                            dat <- data.frame(1,1)
                            for (i in 1:idx) {
                              t.test.results <- tryCatch({
                                wt <- rawData[i, wt.col.coord]
                                mut <- rawData[i, mut.col.coord]
                                t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                                       two.sided = TRUE,
                                       var.equal = TRUE, paired = FALSE)
                              }, error = function(err) {
                                "NA"
                              })
                              p.value <- -log10(as.numeric(t.test.results["p.value"]))
                              diff <- mean(as.numeric(wt[1, ])) - mean(as.numeric(mut[1, ]))
                              dat[i, ] <- data.frame(diff, p.value)
                            }
                            
                            gene.data.extended <- cbind(gene.data.extended,dat)
                            colnames(gene.data.extended) <- c(colnames(gene.data), "diff_3J_WT_mtl1", "p_3J_WT_mtl1")
                            
                            ### mRIC13_lys
                            colnames(rawData)
                            wt.col.coord <- c(25,26,27)
                            mut.col.coord <- c(28,29,30)
                            colnames(rawData[wt.col.coord])
                            colnames(rawData[mut.col.coord])
                            
                            idx <- length(row.names(rawData))
                            dat <- data.frame(1,1)
                            for (i in 1:idx) {
                              t.test.results <- tryCatch({
                                wt <- rawData[i, wt.col.coord]
                                mut <- rawData[i, mut.col.coord]
                                t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                                       two.sided = TRUE,
                                       var.equal = TRUE, paired = FALSE)
                              }, error = function(err) {
                                "NA"
                              })
                              p.value <- -log10(as.numeric(t.test.results["p.value"]))
                              diff <- mean(as.numeric(wt[1, ])) - mean(as.numeric(mut[1, ]))
                              dat[i, ] <- data.frame(diff, p.value)
                            }
                            
                            gene.data.extended <- cbind(gene.data.extended,dat)
                            colnames(gene.data.extended) <- c(colnames(gene.data.extended[1:(length(gene.data.extended)-2)]), "diff_Lys_WT_mtl1", "p_Lys_WT_mtl1")
                            
                            ## mRIC13 RNA
                            colnames(gene.data.RNA)[4]
                            gene.data.extended <- cbind(gene.data.extended, gene.data.RNA[ ,4:5])
                            colnames(gene.data.extended)[8:9] <- c("diff_RNA_WT_mtl1", "p_RNA_WT_mtl1")
                            
                            ## mRIC15-rrp6 3J
                            colnames(rawData)
                            wt.col.coord <- c(16,17,18)
                            mut.col.coord <- c(19,20,21)
                            colnames(rawData[wt.col.coord])
                            colnames(rawData[mut.col.coord])
                            
                            idx <- length(row.names(rawData))
                            dat <- data.frame(1,1)
                            for (i in 1:idx) {
                              t.test.results <- tryCatch({
                                wt <- rawData[i, wt.col.coord]
                                mut <- rawData[i, mut.col.coord]
                                t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                                       two.sided = TRUE,
                                       var.equal = TRUE, paired = FALSE)
                              }, error = function(err) {
                                "NA"
                              })
                              p.value <- -log10(as.numeric(t.test.results["p.value"]))
                              diff <- mean(as.numeric(wt[1, ])) - mean(as.numeric(mut[1, ]))
                              dat[i, ] <- data.frame(diff, p.value)
                            }
                            
                            gene.data.extended <- cbind(gene.data.extended,dat)
                            colnames(gene.data.extended) <- c(colnames(gene.data.extended[1:(length(gene.data.extended)-2)]), 
                                                              "diff_3J_WT_rrp6", "p_3J_WT_rrp6")
                            
                            ### mRIC15 rrp6_lys
                            colnames(rawData)
                            wt.col.coord <- c(31,32,33)
                            mut.col.coord <- c(34,35,36)
                            colnames(rawData[wt.col.coord])
                            colnames(rawData[mut.col.coord])
                            
                            idx <- length(row.names(rawData))
                            dat <- data.frame(1,1)
                            for (i in 1:idx) {
                              t.test.results <- tryCatch({
                                wt <- rawData[i, wt.col.coord]
                                mut <- rawData[i, mut.col.coord]
                                t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                                       two.sided = TRUE,
                                       var.equal = TRUE, paired = FALSE)
                              }, error = function(err) {
                                "NA"
                              })
                              p.value <- -log10(as.numeric(t.test.results["p.value"]))
                              diff <- mean(as.numeric(wt[1, ])) - mean(as.numeric(mut[1, ]))
                              dat[i, ] <- data.frame(diff, p.value)
                            }
                            
                            gene.data.extended <- cbind(gene.data.extended,dat)
                            colnames(gene.data.extended) <- c(colnames(gene.data.extended[1:(length(gene.data.extended)-2)]), 
                                                              "diff_Lys_WT_rrp6", "p_Lys_WT_rrp6")
                            
                            ## mRIC15 rrp6 RNA
                            colnames(gene.data.RNA)[6:7]
                            gene.data.extended <- cbind(gene.data.extended, gene.data.RNA[ ,6:7])
                            colnames(gene.data.extended) <- c(colnames(gene.data.extended[1:(length(gene.data.extended)-2)]), 
                                                              "diff_RNA_WT_rrp6", "p_RNA_WT_rrp6")
                            
                            ## mRIC15-dis3 3J
                            colnames(rawData)
                            wt.col.coord <- c(16,17,18)
                            mut.col.coord <- c(22,23,24)
                            colnames(rawData[wt.col.coord])
                            colnames(rawData[mut.col.coord])
                            
                            idx <- length(row.names(rawData))
                            dat <- data.frame(1,1)
                            for (i in 1:idx) {
                              t.test.results <- tryCatch({
                                wt <- rawData[i, wt.col.coord]
                                mut <- rawData[i, mut.col.coord]
                                t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                                       two.sided = TRUE,
                                       var.equal = TRUE, paired = FALSE)
                              }, error = function(err) {
                                "NA"
                              })
                              p.value <- -log10(as.numeric(t.test.results["p.value"]))
                              diff <- mean(as.numeric(wt[1, ])) - mean(as.numeric(mut[1, ]))
                              dat[i, ] <- data.frame(diff, p.value)
                            }
                            
                            gene.data.extended <- cbind(gene.data.extended,dat)
                            colnames(gene.data.extended) <- c(colnames(gene.data.extended[1:(length(gene.data.extended)-2)]), 
                                                              "diff_3J_WT_dis3", "p_3J_WT_dis3")
                            gene.data.extended$diff_3J_WT_dis3 <- as.numeric(gene.data.extended$diff_3J_WT_dis3)
                            
                            
                            ### mRIC15 dis3_lys
                            colnames(rawData)
                            wt.col.coord <- c(31,32,33)
                            mut.col.coord <- c(37,38,39)
                            colnames(rawData[wt.col.coord])
                            colnames(rawData[mut.col.coord])
                            
                            idx <- length(row.names(rawData))
                            dat <- data.frame(1,1)
                            for (i in 1:idx) {
                              t.test.results <- tryCatch({
                                wt <- rawData[i, wt.col.coord]
                                mut <- rawData[i, mut.col.coord]
                                t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                                       two.sided = TRUE,
                                       var.equal = TRUE, paired = FALSE)
                              }, error = function(err) {
                                "NA"
                              })
                              p.value <- -log10(as.numeric(t.test.results["p.value"]))
                              diff <- mean(as.numeric(wt[1, ])) - mean(as.numeric(mut[1, ]))
                              dat[i, ] <- data.frame(diff, p.value)
                            }
                            
                            gene.data.extended <- cbind(gene.data.extended,dat)
                            colnames(gene.data.extended) <- c(colnames(gene.data.extended[1:(length(gene.data.extended)-2)]), 
                                                              "diff_Lys_WT_dis3", "p_Lys_WT_dis3")
                            
                            ## mRIC15 dis3 RNA
                            colnames(gene.data.RNA)[8:9]
                            gene.data.extended <- cbind(gene.data.extended, gene.data.RNA[ ,8:9])
                            colnames(gene.data.extended) <- c(colnames(gene.data.extended[1:(length(gene.data.extended)-2)]), 
                                                              "diff_RNA_WT_dis3", "p_RNA_WT_dis3")
                            
                            gene.data.alldiff <- gene.data.extended
                            colnames(gene.data.alldiff)
                            ## p-values from RNA-Seq data are not log10 like the others:
                            dat <-   -log10(gene.data.alldiff[ ,c(9,15,21)])
                            gene.data.alldiff[ ,c(9,15,21)] <- dat
                            gene.data.alldiff
  }
  
  
  

  
  addRawDataToNoCL <- function (rawData) {
    ### this is just to reduce the clutter in the analysis script:
    ### adds derivative mRIC data dependent on rawData input 
                        #### first: purify rawData set
                        ### remove values in mRIC13-3J whereever all three samples were empty
                        col.coord <- c(4,6,8)
                        colnames(rawData[col.coord])
                        row.coord <- which(rowSums(rawData[ , col.coord]) == 3*18, arr.ind = TRUE)
                        rawData[row.coord , col.coord] <- NA
                        
                        ### remove values in mRIC15-3J whereever all three samples were empty
                        col.coord <- c(16,17,18)
                        colnames(rawData[col.coord])
                        row.coord <- which(rowSums(rawData[ , col.coord]) == 3*18, arr.ind = TRUE)
                        rawData[row.coord , col.coord] <- NA
                        
                        ### remove values in mRIC13-noCL whereever all six 3J samples were empty
                        col.coord <- c(4,6,8,16,17,18)
                        colnames(rawData[col.coord])
                        row.coord <- which(rowSums(is.na(rawData[ , col.coord])) == 6, arr.ind = TRUE)
                        rawData[row.coord , c(5,7,9)] <- NA
                        
                        ### impute background values in mRIC13-noCL whereever there was signal in mRIC15 3J
                        col.coord <- c(16,17,18)
                        colnames(rawData[col.coord])
                        row.coord <- which(rowSums(is.na(rawData[ , col.coord])) < 3, arr.ind = TRUE)
                        row.coord2 <- which(rowSums(is.na(rawData[ , c(5,7,9)])) == 3, arr.in = TRUE)
                        rawData[intersect(row.coord, row.coord2), c(5,7,9)] <- 18
                        
                        
                        ### add differences and p-values
                        gene.data.extended <- gene.data
                        
                        ### mRIC13_3J_wt
                        wt.col.coord <- c(4,6,8)
                        noCL.col.coord <- c(5,7,9)
                        colnames(rawData[wt.col.coord])
                        colnames(rawData[noCL.col.coord])
                        
                        idx <- length(row.names(rawData))
                        dat <- data.frame(1,1)
                        for (i in 1:idx) {
                          t.test.results <- tryCatch({
                            wt <- rawData[i, wt.col.coord]
                            mut <- rawData[i, noCL.col.coord]
                            t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                                   two.sided = TRUE,
                                   var.equal = TRUE, paired = FALSE)
                          }, error = function(err) {
                            "NA"
                          })
                          p.value <- -log10(as.numeric(t.test.results["p.value"]))
                          diff <- mean(as.numeric(wt[1, ])) - mean(as.numeric(mut[1, ]))
                          dat[i, ] <- data.frame(diff, p.value)
                        }
                        
                        gene.data.extended <- cbind(gene.data.extended,dat)
                        colnames(gene.data.extended) <- c(colnames(gene.data), "diff_WT_3J_noCL", "p_WT_3J_noCL")
                        
                        
                        ## mRIC13_3J_mtl1
                        colnames(rawData)
                        wt.col.coord <- c(10,12,14)
                        noCL.col.coord <- c(11,13,15)
                        colnames(rawData[wt.col.coord])
                        colnames(rawData[noCL.col.coord])
                        
                        idx <- length(row.names(rawData))
                        dat <- data.frame(1,1)
                        for (i in 1:idx) {
                          t.test.results <- tryCatch({
                            wt <- rawData[i, wt.col.coord]
                            mut <- rawData[i, noCL.col.coord]
                            t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                                   two.sided = TRUE,
                                   var.equal = TRUE, paired = FALSE)
                          }, error = function(err) {
                            "NA"
                          })
                          p.value <- -log10(as.numeric(t.test.results["p.value"]))
                          diff <- mean(as.numeric(wt[1, ])) - mean(as.numeric(mut[1, ]))
                          dat[i, ] <- data.frame(diff, p.value)
                        }
                        
                        gene.data.extended <- cbind(gene.data.extended,dat)
                        colnames(gene.data.extended) <- c(colnames(gene.data.extended[1:(length(gene.data.extended)-2)]), 
                                                          "diff_mtl1_3J_noCL", "p_mtl1_3J_noCL")
                        
                        ### mRIC13+15_3J_wt
                        wt.col.coord <- c(4,6,8,16,17,18)
                        noCL.col.coord <- c(5,7,9)
                        colnames(rawData[wt.col.coord])
                        colnames(rawData[noCL.col.coord])
                        
                        idx <- length(row.names(rawData))
                        dat <- data.frame(1,1)
                        for (i in 1:idx) {
                          t.test.results <- tryCatch({
                            wt <- rawData[i, wt.col.coord]
                            mut <- rawData[i, noCL.col.coord]
                            t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                                   two.sided = TRUE,
                                   var.equal = TRUE, paired = FALSE)
                          }, error = function(err) {
                            "NA"
                          })
                          p.value <- -log10(as.numeric(t.test.results["p.value"]))
                          diff <- mean(as.numeric(wt[1, ]), na.rm = TRUE) - mean(as.numeric(mut[1, ]), na.rm = TRUE)
                          dat[i, ] <- data.frame(diff, p.value)
                        }
                        
                        gene.data.extended <- cbind(gene.data.extended,dat)
                        colnames(gene.data.extended) <- c(colnames(gene.data.extended[1:(length(gene.data.extended)-2)]), "diff_WTboth_3J_noCL", "p_WTboth_3J_noCL")
                        
                        
                        ### add p-values from moderated t-test(eBayes function, limma package)
                        
                        ##WT1 to noCL
                        eset <- as.matrix(rawData[ ,c(4,6,8,5,7,9)])
                        design <- cbind(WT=c(1,1,1,0,0,0), MU=c(0,0,0,1,1,1))
                        fit <- lmFit(eset, design)
                        cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                        fit2 <- contrasts.fit(fit, cont.matrix)
                        fit.test1 <- eBayes(fit2)
                        pval <- -log10(as.numeric(fit.test1$p.value))
                        gene.data.extended$p_WT1_3J_noCL_limma <- pval
                        #gene.data.extended[!(stringent.row.coord.WT1), "p_WT1_3J_noCL_limma"] <- NA
                        
                        ##WT1 and WT2 combined to noCL
                        eset <- as.matrix(rawData[ ,c(4,6,8,16,17,18,5,7,9)])
                        design <- cbind(WT=c(1,1,1,1,1,1,0,0,0), MU=c(0,0,0,0,0,0,1,1,1))
                        fit <- lmFit(eset, design)
                        cont.matrix <- makeContrasts(MUvsWT=MU-WT, levels=design)
                        fit2 <- contrasts.fit(fit, cont.matrix)
                        fit.test1 <- eBayes(fit2)
                        pval <- -log10(as.numeric(fit.test1$p.value))
                        gene.data.extended$p_WTboth_3J_noCL_limma <- pval
                        #gene.data.extended[!(stringent.row.coord.WTboth), "p_WTboth_3J_noCL_limma"] <- NA
                        
                        
                        
                        gene.data.extended
    
  }
  
  
  findMode <- function (rawData, columnCoord, upperBound) {
    ux <- unique(rawData[rawData[ ,columnCoord] < upperBound, columnCoord])
    ux <- ux[!(is.na(ux))]
    res <- ux[which.max(tabulate(match(rawData[rawData[ ,columnCoord] < upperBound, columnCoord], ux)))]
    res
  }

  
  findModeAndAddValue <- function (rawData, columnCoord, upperBound) {
                    ### this is just to reduce the clutter in the analysis script:
                    ### finds mode (= most common element) at the bottom of a (normalized) raw value distribution
                    ### this yields the place holder value that Shabaz used for not-detected, which can then be plugged in 
                    ### for NA values that are present in 3J/WCE 
                    ux <- unique(rawData[rawData[ ,columnCoord] < upperBound, columnCoord])
                    ux <- ux[!(is.na(ux))]
                    replacement.value <- ux[which.max(tabulate(match(rawData[rawData[ ,columnCoord] < upperBound, columnCoord], ux)))]
                    rawData[row.coord, columnCoord][is.na(rawData[row.coord, columnCoord])] <- replacement.value
                    rawData
  }
  
  
  
  addRawDataToLys <- function (rawData, wt.col.coord, Lys.col.coord, sample.id) {
              ### this is just to reduce the clutter in the analysis script:
              ### adds derivative mRIC data dependent on rawData input       
                    ### add differences and p-values
                    gene.data.extended <- gene.data
                    
                    ### mRIC13_3J_to_Lys
                    #wt.col.coord <- c(4,6,8)
                    #Lys.col.coord <- c(25,26,27)
                    
                    idx <- length(row.names(rawData))
                    dat <- data.frame(1,1)
                    for (i in 1:idx) {
                      t.test.results <- tryCatch({
                        wt <- rawData[i, wt.col.coord]
                        mut <- rawData[i, Lys.col.coord]
                        t.test(x = as.numeric(wt[1, ]), y = as.numeric(mut[1, ]), 
                               two.sided = TRUE,
                               var.equal = TRUE, paired = FALSE)
                      }, error = function(err) {
                        "NA"
                      })
                      p.value <- -log10(as.numeric(t.test.results["p.value"]))
                      diff <- mean(as.numeric(wt[1, ]), na.rm = TRUE) - mean(as.numeric(mut[1, ]), na.rm = TRUE)
                      dat[i, ] <- data.frame(diff, p.value)
                    }
                    
                    gene.data.extended <- cbind(gene.data.extended,dat)
                    colnames(gene.data.extended) <- c(colnames(gene.data), paste("diff_", sample.id, "_3J_Lys", sep = ""), paste("p_", sample.id, "_3J_Lys", sep = ""))
                    
                    gene.data.extended
  }
  
  

  
  addMeanAndSD <- function (rawData, col.coord, sample.id) {
                ### this is just to reduce the clutter in the analysis script:
                ### adds derivative mRIC data dependent on rawData input 
                    ### add differences and p-values
                    gene.data.extended <- gene.data
                    
                    idx <- length(row.names(rawData))
                    dat <- data.frame(1,1)
                    for (i in 1:idx) {
                      sample.mean  <- mean(as.numeric(rawData[i, col.coord]), na.rm = TRUE)
                      sample.SD <- sd(as.numeric(rawData[i, col.coord]), na.rm = TRUE)                  
                      dat[i, ] <- data.frame(sample.mean, sample.SD)
                    }
                    
                    gene.data.extended <- cbind(gene.data.extended,dat)
                    colnames(gene.data.extended) <- c(colnames(gene.data), paste("mean_", sample.id, sep = ""), paste("SD_", sample.id, sep = ""))
                    
                    gene.data.extended
  }
  
  

  
  addPropagatedError <- function (rawData, col.coord.3J, col.coord.Lys, sample.id) {
    ### this is just to reduce the clutter in the analysis script:
    ### propagates uncertainty of RIC/WCE from previously derived mean and SD 
                    ### for 3J and Lys triplicates, propagate error and mean with 2nd order Taylor expansion
                    gene.data.extended <- gene.data
                    
                    #rawData <- gene.data.stats
                    #col.coord.3J <- c(4,5)
                    #col.coord.Lys <- c(6,7)
                    #idx <- 5
                    #i <- 5
                    #sample.id <- "WT1"
                    
                    row.coord <- which(rowSums(is.na(rawData[ , c(col.coord.3J, col.coord.Lys)])) == 0, arr.ind = TRUE)
                    #idx <- length(row.names(rawData))
                    #dat <- data.frame(1,1)
                    gene.data.extended[ , paste("mean_", sample.id, "_3JtoLys", sep = "")] <- NA
                    gene.data.extended[ , paste("SD_", sample.id, "_3JtoLys", sep = "")] <- NA
                    for (i in row.coord) {
                      data.3J <- t(as.vector(rawData[i, col.coord.3J]))
                      data.Lys <- t(as.vector(rawData[i, col.coord.Lys]))
                      dummy <- cbind(data.3J, data.Lys)
                      colnames(dummy) <- c("x", "y")
                      #res <- 
                      tryCatch({
                        res <- propagate(expr = expression(x - y), data = dummy, second.order = TRUE, do.sim = FALSE, type = "stat")
                        sample.mean <- res$prop[2]
                        sample.SD <- res$prop[4]}, error = function(err) {
                          sample.mean <- NA
                          sample.SD <- NA})
                      gene.data.extended[i,4] <- sample.mean
                      gene.data.extended[i,5] <- sample.SD
                    }
                    
                    gene.data.extended
  }
  
  
  
  