test.data <- test.data[[names(test.data)[1]]]
if (BLACKLIST_TRAIN_SET) test.data <- removeTrainSet(test.data, data)
if (INTERCEPT) {
forms <- list(paste0('~',paste0(names(data)[1:4],collapse = '+')))
} else {
forms <- list(paste0('~',paste0(c(names(data)[1:4],0),collapse = '+')))
}
forms <- lapply(forms, formula)
names(forms) <- TRAIN_LAB2
if (length(OTHER_METHODS) != 0) {
PLOT_COLS <- as.list(c(COLS_FORMS, COLS_OTHER))
names(PLOT_COLS) <- c(names(forms), OTHER_METHODS_LAB)
} else {
PLOT_COLS <- as.list(COLS_FORMS)
names(PLOT_COLS) <- names(forms)
rm(list = ls())
source('logisticReg2_helpers.R')
TRAIN <- 'noyes2015_alpha0.30_BL-hughes' #'hughes2015'#
TEST <- 'hughes2015_alpha0.30'
TRAIN_LAB2 <- 'sF2F3low_d_logRegr' # For finding prediction tables
TEST_LAB2 <- 'hughes'             # For finding prediction tables
ALPHA_TRAIN <- rep('0.30', 3)
OTHER_METHODS <- c('olsf2f3Pred', 'antonSVM_l7') #'hughesPred', 'olsf2f3Pred','nnf2f3Pred')
OTHER_METHODS_LAB <- c('sF2F3low_d_olsRegr', 'svm_l7')#c('RFNH', paste0('f2f3alpha',ALPHA_TRAIN[2],'-OLS'),
PRECOMP_FLAGS <- c(FALSE,TRUE) #c(TRUE,FALSE,TRUE)
COLS_FORMS <- c('black')#c('black', 'red', 'blue')
COLS_OTHER <- c('purple', 'green')#c('green', 'purple', 'orange')
BLACKLIST_TRAIN_SET <- FALSE      # To remove training cores from test cores
print(paste0("Train: ", TRAIN))
print(paste0("Test: ", TEST))
print(paste0("OtherMeths: ", OTHER_METHODS_LAB))
rm(list = ls())
q()
packs = as.data.frame(installed.packages(.libPaths()[1]), stringsAsFactors = F)
install.packages(packs$Package)
if (!requireNamespace("BiocManager"))
install.packages("BiocManager")
BiocManager::install()
if (!requireNamespace("BiocManager"))
install.packages("BiocManager")
BiocManager::install()
.libPaths()
ip <- as.data.frame(installed.packages()[,c(1,3:4)])
rownames(ip) <- NULL
ip <- ip[is.na(ip$Priority),1:2,drop=FALSE]
print(ip, row.names=FALSE)
ip
write.table(ip, file = '~/Desktop/ip.txt', row.names = FALSE, sep = '\t',quote = F)
update.packages(ask = FALSE, checkBuilt = TRUE)
update.packages(ask = FALSE, checkBuilt = TRUE)
BiocManager::install(ask=FALSE, checkBuilt = TRUE)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(version = "3.11")
install.packages("BiocManager")
install.packages("BiocManager")
BiocManager::install(ask=FALSE, checkBuilt = TRUE)
BiocManager::install(version = "3.11", ask=FALSE, checkBuilt = TRUE)
BiocManager::install(‘BSgenome.Hsapiens.UCSC.hg19’, checkBuilt = TRUE)
BiocManager::install(‘BSgenome.Hsapiens.UCSC.hg19’)
BiocManager::install("BSgenome.Hsapiens.UCSC.hg19")
BiocManager::install(version = "3.11", ask=FALSE, checkBuilt = TRUE)
BiocManager::install(version = "3.11", ask=FALSE, checkBuilt = TRUE)
?install.packages
?update.packages
.libPaths()
getOption("repos")
update.packages(ask = FALSE, checkBuilt = TRUE)
update.packages(ask = FALSE, checkBuilt = TRUE)
?
?gsePathway
?gseNCG
?gseNCG
library(data.table)
library(heatmap3)
library(RColorBrewer)
library(dynamicTreeCut)
library(ggplot2)
library(ReactomePA)
library(clusterProfiler)
library(ggrepel)
library(moduleColor)
library(EnsDb.Hsapiens.v86)
library(igraph)
suppressMessages(library(DOSE, quietly = TRUE))
suppressMessages(library(org.Hs.eg.db, quietly = TRUE))
suppressMessages(library(enrichplot, quietly = TRUE))
suppressMessages(library(ggnewscale, quietly = TRUE))
?gseNCG
?gseGO
?gseDO
update.packages(checkBuilt = TRUE)
update.packages(checkBuilt = TRUE)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
pkgs <- rownames(installed.packages())
BiocManager::install(pkgs, type = "source", checkBuilt = TRUE)
setwd('~/research/jointInterfaceLearning/rCLAMPS/code/')
source("~/research/jointInterfaceLearning/rCLAMPS/code/hd1-transfer-predictions-plots.R")
res.noOlap.rfJoint.hd1
res.fig5
res.noOlap.rfJoint.hd1
g <- ggplot(res.noOlap.rfJoint.hd1[transfer == 'transfer\npossible' & predType %in% c('hybrid','rf_joint')],
aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
#geom_boxplot(color = 'black', width = 0.6, position = 'dodge', outlier.size = 0.1) +
#scale_fill_brewer("Method", palette = 'Dark2') +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
#facet_wrap(~dset, ncol = 3) +
scale_y_continuous(limits = c(-1,1)) +
theme_classic()
g
x <- res.noOlap.rfJoint.hd1[transfer == 'transfer\npossible' & predType %in% c('hybrid','rf_joint')]
x
x[,.(rfJoint = pcc[which(predType == 'rf_joint')], rfJoint = pcc[which(predType == 'hybrid')]),by = c('prot','pos')]
x.sideBySidex[,.(rfJoint = pcc[which(predType == 'rf_joint')], rfJoint = pcc[which(predType == 'hybrid')]),by = c('prot','pos')]
x.sideBySide <- x[,.(rfJoint = pcc[which(predType == 'rf_joint')], rfJoint = pcc[which(predType == 'hybrid')]),by = c('prot','pos')]
x.sideBySide
x.sideBySide <- x[,.(rfJoint = pcc[which(predType == 'rf_joint')], hybrid = pcc[which(predType == 'hybrid')]),by = c('prot','pos')]
x.sideBySide
x.sideBySide
sum(x.sideBySide$hybrid > x.sideBySide$rfJoint)
sum(x.sideBySide$hybrid > x.sideBySide$rfJoint)/nrow(x.sideBySide)
x
x.sideBySide <- x[,.(rfJoint.pcc = pcc[which(predType == 'rf_joint')], hybrid.pcc = pcc[which(predType == 'hybrid')], rfJoint.rmse = rmse[which(predType == 'rf_joint')], hybrid.rmse = rmse[which(predType == 'hybrid')]),by = c('prot','pos')]
x.sideBySide
summary(x.sideBySide$rfJoint.rmse)
summary(x.sideBySide$hybrid.rmse)
wilcox.test(x.sideBySide$rfJoint.rmse,x.sideBySide$hybrid.rmse)
tmp <- res.noOlap.rfJoint.hd1[transfer == 'transfer\npossible' & predType %in% c('hybrid','rf_joint')]
tmp <- res.noOlap.rfJoint.hd1[transfer == 'transfer\npossible' & predType %in% c('hybrid','rf_joint')]
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
#geom_boxplot(color = 'black', width = 0.6, position = 'dodge', outlier.size = 0.1) +
#scale_fill_brewer("Method", palette = 'Dark2') +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
#facet_wrap(~dset, ncol = 3) +
scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 5, width = 5)
g <- ggplot(tmp, aes(x = predType, y = rmse, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
#geom_boxplot(color = 'black', width = 0.6, position = 'dodge', outlier.size = 0.1) +
#scale_fill_brewer("Method", palette = 'Dark2') +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
#facet_wrap(~dset, ncol = 3) +
scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'rmse_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 5, width = 5)
g <- ggplot(tmp, aes(x = predType, y = rmse, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
#geom_boxplot(color = 'black', width = 0.6, position = 'dodge', outlier.size = 0.1) +
#scale_fill_brewer("Method", palette = 'Dark2') +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
#facet_wrap(~dset, ncol = 3) +
#scale_y_continuous(limits = c(-1,1)) +
theme_classic()
g <- ggplot(tmp, aes(x = predType, y = rmse, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
#geom_boxplot(color = 'black', width = 0.6, position = 'dodge', outlier.size = 0.1) +
#scale_fill_brewer("Method", palette = 'Dark2') +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
#facet_wrap(~dset, ncol = 3) +
#scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'rmse_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 5, width = 5)
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 5, width = 5)
g <- ggplot(tmp, aes(x = predType, y = rmse, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Method", y = "RMSE between predicted and actual") +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'rmse_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 5, width = 5)
# Alternative side-by-side comparison plot
tmp.sideBySideCmp <- tmp[,.(rfJoint.pcc = pcc[which(predType == 'rf_joint')],
hybrid.pcc = pcc[which(predType == 'hybrid')],
rfJoint.rmse = rmse[which(predType == 'rf_joint')],
hybrid.rmse = rmse[which(predType == 'hybrid')]),
by = c('prot','pos')]
# Plot fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp$pcc.win <- tmp.sideBySideCmp$hybrid.pcc < tmp.sideBySideCmp$rfJoint.pcc
tmp.sideBySideCmp$hybrid.pcc < tmp.sideBySideCmp$rfJoint.pcc
# Plot fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp$pcc.win <- tmp.sideBySideCmp$hybrid.pcc < tmp.sideBySideCmp$rfJoint.pcc
tmp
# Plot fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp.sideBySideCmp$pcc.win <- tmp.sideBySideCmp$hybrid.pcc < tmp.sideBySideCmp$rfJoint.pcc
# Plot fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp.sideBySideCmp$pcc.win <- tmp.sideBySideCmp$hybrid.pcc > tmp.sideBySideCmp$rfJoint.pcc
tmp.sideBySideCmp$rmse.win <- tmp.sideBySideCmp$hybrid.rmse < tmp.sideBySideCmp$rfJoint.rmse
sum(tmp.sideBySideCmp$pcc.win)/nrow(tmp.sideBySideCmp)
sum(tmp.sideBySideCmp$rmse.win)/nrow(tmp.sideBySideCmp)
# Fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp.sideBySideCmp$pcc.diff <- tmp.sideBySideCmp$hybrid.pcc - tmp.sideBySideCmp$rfJoint.pcc
tmp.sideBySideCmp$rmse.diff <- tmp.sideBySideCmp$hybrid.rmse - tmp.sideBySideCmp$rfJoint.rmse
# Alternative side-by-side comparison plot
tmp.sideBySideCmp <- tmp[,.(rfJoint.pcc = pcc[which(predType == 'rf_joint')],
hybrid.pcc = pcc[which(predType == 'hybrid')],
rfJoint.rmse = rmse[which(predType == 'rf_joint')],
hybrid.rmse = rmse[which(predType == 'hybrid')]),
by = c('prot','pos')]
# Fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp.sideBySideCmp$pcc.diff <- tmp.sideBySideCmp$hybrid.pcc - tmp.sideBySideCmp$rfJoint.pcc
tmp.sideBySideCmp$rmse.diff <- tmp.sideBySideCmp$hybrid.rmse - tmp.sideBySideCmp$rfJoint.rmse
tmp.sideBySideCmp$pcc.win <- tmp.sideBySideCmp$pcc.diff >= 0
tmp.sideBySideCmp$rmse.win <- tmp.sideBySideCmp$rmse.diff <= 0
sum(tmp.sideBySideCmp$pcc.win)/nrow(tmp.sideBySideCmp)
sum(tmp.sideBySideCmp$rmse.win)/nrow(tmp.sideBySideCmp)
tmp.sideBySideCmp
tmp <- melt(tmp.sideBySideCmp, id.vars = c('prot','pos'), measure.vars = c('pcc.diff, rmse.diff')
g <- ggplot(), aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_continuous(limits = c(-1,1)) +
theme_classic()
tmp <- melt(tmp.sideBySideCmp, id.vars = c('prot','pos'), measure.vars = c('pcc.diff, rmse.diff'))
tmp.sideBySideCmp
tmp <- melt(tmp.sideBySideCmp, id.vars = c('prot','pos'), measure.vars = c('pcc.diff, rmse.diff'))
tmp <- res.noOlap.rfJoint.hd1[transfer == 'transfer\npossible' & predType %in% c('hybrid','rf_joint')]
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 5, width = 5)
g <- ggplot(tmp, aes(x = predType, y = rmse, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Method", y = "RMSE between predicted and actual") +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'rmse_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 5, width = 5)
# Alternative side-by-side comparison plot
tmp.sideBySideCmp <- tmp[,.(rfJoint.pcc = pcc[which(predType == 'rf_joint')],
hybrid.pcc = pcc[which(predType == 'hybrid')],
rfJoint.rmse = rmse[which(predType == 'rf_joint')],
hybrid.rmse = rmse[which(predType == 'hybrid')]),
by = c('prot','pos')]
# Fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp.sideBySideCmp$pcc.diff <- tmp.sideBySideCmp$hybrid.pcc - tmp.sideBySideCmp$rfJoint.pcc
tmp.sideBySideCmp$rmse.diff <- tmp.sideBySideCmp$hybrid.rmse - tmp.sideBySideCmp$rfJoint.rmse
tmp.sideBySideCmp$pcc.win <- tmp.sideBySideCmp$pcc.diff >= 0
tmp.sideBySideCmp$rmse.win <- tmp.sideBySideCmp$rmse.diff <= 0
sum(tmp.sideBySideCmp$pcc.win)/nrow(tmp.sideBySideCmp)
sum(tmp.sideBySideCmp$rmse.win)/nrow(tmp.sideBySideCmp)
tmp.sideBySideCmp
sum(tmp.sideBySideCmp$rmse.win)/nrow(tmp.sideBySideCmp)
wilcox.test(x.sideBySide$rfJoint.rmse,x.sideBySide$hybrid.rmse)
tmp.sideBySideCmp
pccDiff <- tmp.sideBySideCmp[,c('prot','pos','pcc.diff'),with=FALSE]
pccDiff <- tmp.sideBySideCmp[,c('prot','pos','pcc.diff'),with=FALSE]
rmseDiff <- tmp.sideBySideCmp[,c('prot','pos','rmse.diff'),with=FALSE]
rmseDiff <- tmp.sideBySideCmp[,c('prot','pos','rmse.diff'),with=FALSE]
names(rmseDiff)[3] <- score
pccDiff <- tmp.sideBySideCmp[,c('prot','pos','pcc.diff'),with=FALSE]
names(pccDiff)[3] <- 'score'
pccDiff$measure <- 'PCC'
rmseDiff <- tmp.sideBySideCmp[,c('prot','pos','rmse.diff'),with=FALSE]
names(rmseDiff)[3] <- 'score'
rmseDiff$measure <- 'RMSE'
tmp.sideBySide.long <- rbind(pccDiff, rmseDiff)
tmp.sideBySide.long
g <- ggplot(tmp.sideBySide.long), aes(x = measure, y = score)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Agreement measure", y = "rCLAMPS agreement minus rf_joint agreement") +
theme_classic()
g <- ggplot(tmp.sideBySide.long, aes(x = measure, y = score)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Agreement measure", y = "rCLAMPS agreement minus rf_joint agreement") +
theme_classic()
g
g <- ggplot(tmp.sideBySide.long, aes(x = measure, y = score)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3)
g <- ggplot(tmp.sideBySide.long, aes(x = measure, y = score)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Agreement measure", y = "rCLAMPS agreement minus rf_joint agreement") +
theme_classic()
g
g <- ggplot(tmp.sideBySide.long, aes(x = measure, y = score)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Agreement measure", y = "Agreemnent score (rCLAMPS minus rf_joint)") +
theme_classic()
g <- ggplot(tmp.sideBySide.long, aes(x = measure, y = score)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Agreement measure", y = "Agreemnent score (rCLAMPS minus rf_joint)") +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'agreementDiff_rfJointOnly_hd1_forSupplement.pdf'),
height = 5, width = 5)
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_log10(limits = c(-1,1)) +
theme_classic()
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_log10+#(limits = c(-1,1)) +
theme_classic()
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_log10()+#(limits = c(-1,1)) +
theme_classic()
g
tmp.sideBySideCmp
summary(tmp.sideBySideCmp$hybrid.pcc)
summary(tmp.sideBySideCmp$rfJoint.pcc)
summary(tmp.sideBySideCmp$hybrid.rmse)
summary(tmp.sideBySideCmp$rfJoint.rmse)
# Supplemental Figure S5
res.noOlap.rfJoint.hd1$predType <- plyr::mapvalues(res.noOlap.rfJoint.hd1$predType,
from = c('model', 'model+nn','rf_extant','rf_joint'),
to = c('de novo', 'hybrid','rf_extant','rf_joint'))
tmp <- res.noOlap.rfJoint.hd1[transfer == 'transfer\npossible' & predType %in% c('hybrid','rf_joint')]
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 4, width = 3)
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_log10()+#(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_logScale_forSupplement.pdf'),
height = 4, width = 3)
g <- ggplot(tmp, aes(x = predType, y = rmse, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Method", y = "RMSE between predicted and actual") +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'rmse_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 4, width = 3)
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
#scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 4, width = 3)
tmp <- res.noOlap.rfJoint.hd1[predType %in% c('hybrid','rf_joint')]
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
#scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 4, width = 3)
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_log10()+#(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_logScale_forSupplement.pdf'),
height = 4, width = 3)
g <- ggplot(tmp, aes(x = predType, y = rmse, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Method", y = "RMSE between predicted and actual") +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'rmse_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 4, width = 3)
# Alternative side-by-side comparison plot
tmp.sideBySideCmp <- tmp[,.(rfJoint.pcc = pcc[which(predType == 'rf_joint')],
hybrid.pcc = pcc[which(predType == 'hybrid')],
rfJoint.rmse = rmse[which(predType == 'rf_joint')],
hybrid.rmse = rmse[which(predType == 'hybrid')]),
by = c('prot','pos')]
# Fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp.sideBySideCmp$pcc.diff <- tmp.sideBySideCmp$hybrid.pcc - tmp.sideBySideCmp$rfJoint.pcc
tmp.sideBySideCmp$rmse.diff <- tmp.sideBySideCmp$hybrid.rmse - tmp.sideBySideCmp$rfJoint.rmse
tmp.sideBySideCmp$pcc.win <- tmp.sideBySideCmp$pcc.diff >= 0
tmp.sideBySideCmp$rmse.win <- tmp.sideBySideCmp$rmse.diff <= 0
sum(tmp.sideBySideCmp$pcc.win)/nrow(tmp.sideBySideCmp)
sum(tmp.sideBySideCmp$rmse.win)/nrow(tmp.sideBySideCmp)
tmp <- res.noOlap.rfJoint.hd1[transfer == 'transfer\npossible' & predType %in% c('hybrid','rf_joint')]
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
#scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 4, width = 3)
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_log10()+#(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_logScale_forSupplement.pdf'),
height = 4, width = 3)
g <- ggplot(tmp, aes(x = predType, y = rmse, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Method", y = "RMSE between predicted and actual") +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'rmse_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 4, width = 3)
# Alternative side-by-side comparison plot
tmp.sideBySideCmp <- tmp[,.(rfJoint.pcc = pcc[which(predType == 'rf_joint')],
hybrid.pcc = pcc[which(predType == 'hybrid')],
rfJoint.rmse = rmse[which(predType == 'rf_joint')],
hybrid.rmse = rmse[which(predType == 'hybrid')]),
by = c('prot','pos')]
# Fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp.sideBySideCmp$pcc.diff <- tmp.sideBySideCmp$hybrid.pcc - tmp.sideBySideCmp$rfJoint.pcc
tmp.sideBySideCmp$rmse.diff <- tmp.sideBySideCmp$hybrid.rmse - tmp.sideBySideCmp$rfJoint.rmse
tmp.sideBySideCmp$pcc.win <- tmp.sideBySideCmp$pcc.diff >= 0
tmp.sideBySideCmp$rmse.win <- tmp.sideBySideCmp$rmse.diff <= 0
sum(tmp.sideBySideCmp$pcc.win)/nrow(tmp.sideBySideCmp)
sum(tmp.sideBySideCmp$rmse.win)/nrow(tmp.sideBySideCmp)
pccDiff <- tmp.sideBySideCmp[,c('prot','pos','pcc.diff'),with=FALSE]
names(pccDiff)[3] <- 'score'
pccDiff$measure <- 'PCC'
rmseDiff <- tmp.sideBySideCmp[,c('prot','pos','rmse.diff'),with=FALSE]
names(rmseDiff)[3] <- 'score'
rmseDiff$measure <- 'RMSE'
tmp.sideBySide.long <- rbind(pccDiff, rmseDiff)
g <- ggplot(tmp.sideBySide.long, aes(x = measure, y = score)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Agreement measure", y = "Agreemnent score (rCLAMPS minus rf_joint)") +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'agreementDiff_rfJointOnly_hd1_forSupplement.pdf'),
height = 5, width = 5)
## Figure 5 - plot the overall comparison of transfer vs OUR model
res.fig5 <- merge(res,
res[predType == 'model+nn',.(transfer = nnbrs > 0),by=c('prot','pos')],
by = c('prot','pos'))
res.fig5$transfer <- ifelse(res.fig5$transfer, "transfer", "de novo")
nearMuts <- unique(res.fig5[transfer == 'transfer']$prot)
res.fig5 <- res.fig5[prot %in% nearMuts]
res.fig5$agree <- ifelse(res.fig5$pcc >= 0.5, TRUE, FALSE)
res.fig5$predType <- plyr::mapvalues(res.fig5$predType,
from = c('model', 'model+nn','rf_extant','rf_joint'),
to = c('model', 'hybrid','rf_extant','rf_joint'))
res.fig5$predType <- factor(res.fig5$predType,
levels = c('hybrid','model','rf_extant','rf_joint'))
# Figure 5 boxen plots for hybrid approach only
g <- ggplot(res.fig5[predType == 'hybrid'], aes(x = transfer, y = pcc)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
scale_y_continuous(limits = c(-1,1)) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Column type", y = "PCC between predicted and actual") +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxen_plots_hybridApproach_byColType.pdf'),
height = 4, width = 3)
tmp <- res.noOlap.rfJoint.hd1[predType %in% c('hybrid','rf_joint')]
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
#scale_y_continuous(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 4, width = 3)
g <- ggplot(tmp, aes(x = predType, y = pcc, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
geom_hline(yintercept = 0.5, lty = 'dashed') +
labs(x = "Method", y = "PCC between predicted and actual") +
scale_y_log10()+#(limits = c(-1,1)) +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'pcc_boxplots_rfJointOnly_hd1_byColType_logScale_forSupplement.pdf'),
height = 4, width = 3)
g <- ggplot(tmp, aes(x = predType, y = rmse, fill = predType)) +
geom_lv(color = 'black', fill = "gray20", outlier.size = 1, alpha = 0.3) +
labs(x = "Method", y = "RMSE between predicted and actual") +
theme_classic()
ggsave(plot = g, file = paste0(RES_DIR, 'rmse_boxplots_rfJointOnly_hd1_byColType_forSupplement.pdf'),
height = 4, width = 3)
# Alternative side-by-side comparison plot
tmp.sideBySideCmp <- tmp[,.(rfJoint.pcc = pcc[which(predType == 'rf_joint')],
hybrid.pcc = pcc[which(predType == 'hybrid')],
rfJoint.rmse = rmse[which(predType == 'rf_joint')],
hybrid.rmse = rmse[which(predType == 'hybrid')]),
by = c('prot','pos')]
# Fraction for which PCC > for hybrid than rf_joint (and frac for which rmse < for hybrid than rf_joint)
tmp.sideBySideCmp$pcc.diff <- tmp.sideBySideCmp$hybrid.pcc - tmp.sideBySideCmp$rfJoint.pcc
tmp.sideBySideCmp$rmse.diff <- tmp.sideBySideCmp$hybrid.rmse - tmp.sideBySideCmp$rfJoint.rmse
tmp.sideBySideCmp$pcc.win <- tmp.sideBySideCmp$pcc.diff >= 0
tmp.sideBySideCmp$rmse.win <- tmp.sideBySideCmp$rmse.diff <= 0
sum(tmp.sideBySideCmp$pcc.win)/nrow(tmp.sideBySideCmp)
sum(tmp.sideBySideCmp$rmse.win)/nrow(tmp.sideBySideCmp)
tmp
min(tmp$pcc)
6*18
