library(ggplot2)
library(gdata)
library(doParallel)
library(foreach)
registerDoParallel(cores=16)

load('sep_samp_all_IDR_plus_DE_Results.RData')

# create binary matrix from input data
signifVal <- 0.1
FCVal <- 1.5
RBPs <- c("Cbp20", "CG6227", "Rm62", "snRNP-U1-70K", "U2af50", "Fmr1",
          "B52", "Rbp1", "SC35", "SF2", "Srp54", "tra2",
          "Syp", "elav", "msi", "mub", "ps", "qkr54B", "qkr58E-1",
          "Upf1")
RNAs <- unique(sapply(strsplit(row.names(allIdrDE), '__'), function(x) x[2]))
createTFRow <- function(RBP){
    RBPBound <- rep(FALSE, length(RNAs))
    names(RBPBound) <- RNAs
    RBPBound[sapply(strsplit(row.names(allIdrDE), '__'), function(x) x[2]
                    )[startsWith(row.names(allIdrDE), RBP) &
                      allIdrDE[, 'IDR'] < signifVal &
                      pmin(allIdrDE[, 'FC1'], allIdrDE[, 'FC2']) > FCVal]] <-
                          TRUE
    return(RBPBound)
}
allBound <- foreach(RBPi=RBPs, .combine=rbind) %dopar% {
    createTFRow(RBPi)
}
row.names(allBound) <- RBPs

gdat <- data.frame(numRBP=factor(rep(1, length(RBPs)), levels=1:length(RBPs)),
                   numRNA=rowSums(allBound))

cat('Getting random RBP samples...')
numCombins <- 10000
for(i in 2:length(RBPs)){
    # take random sample of RBPs combinations
    allrRBPs <- combn(
        RBPs, i, simplify=FALSE)[sample(1:choose(length(RBPs), i),
                     min(choose(length(RBPs), i), numCombins))]
    # calculate number of RNAs db for each random selection of RBPs
    gdat <- rbind(gdat, foreach(rRBPs=allrRBPs, .combine=rbind) %dopar% {
        c(numRBP=i, numRNA=sum(apply(allBound[rRBPs,], 2, any)))
    })
    cat(i)
}
cat('\n')

pdf('db_RNAs_boxplot.pdf')
ggplot(gdat, aes(numRBP, numRNA)) + geom_boxplot() + theme_bw() +
  xlab('Number of RBPs') + ylab('Number of RNAs bound') +
  ggtitle('RBP Experiments vs RNAs Bound') +
  stat_summary(fun.y=mean, geom="line", col='red', aes(group=1))
bar <- dev.off()
