data.frame(x = 1)
})
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-')))
print(do.call(paste0, x, df$end))
#print(paste0(df$chr, df$start, df$chr))
#print(,
#              df$end)))
#lociRows <- paste0(paste0(df$chr, ':'), paste0(df$start, '-'), df$end)
#print(lociRows)
data.frame(x = 1)
})
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-')))
print(do.call(paste0, list(x, df$end)))
#print(paste0(df$chr, df$start, df$chr))
#print(,
#              df$end)))
#lociRows <- paste0(paste0(df$chr, ':'), paste0(df$start, '-'), df$end)
#print(lociRows)
data.frame(x = 1)
})
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-')))
print(x)
print(df$end)
#print(paste0(df$chr, df$start, df$chr))
#print(,
#              df$end)))
#lociRows <- paste0(paste0(df$chr, ':'), paste0(df$start, '-'), df$end)
#print(lociRows)
data.frame(x = 1)
})
clust_dir <- paste0('../../dnase/clusters/', CHROM_STATES, '/')
loci <- NULL
for (bedfile in dir(clust_dir)) {
tmp <- read.table(paste0(clust_dir, bedfile))[,1:3]
names(tmp) <- c('chr', 'start', 'end')
tmp$clustNum <- rep(as.numeric(strsplit(strsplit(bedfile,
split = '_')[[1]][2],
split = '\\.')[[1]][1]),
nrow(tmp))
loci <- rbind(loci, tmp)
}
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
print(x)
#print(df$end)
#print(paste0(df$chr, df$start, df$chr))
#print(,
#              df$end)))
#lociRows <- paste0(paste0(df$chr, ':'), paste0(df$start, '-'), df$end)
#print(lociRows)
data.frame(x = 1)
})
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
data.frame(x = 1)
})
load(paste0('../../dnase/state_calls/state_calls_', CHROM_STATES,
'.RData'))
state <- max_states
state <- state[,which(colnames(state) %in% colnames(expr.zf.mat))]
#state <- state[which(rowSums(state) > 0),]
state <- ifelse(state != 0, 1, state)
state.prob <- max_probs
state.prob <- state.prob[,which(colnames(state.prob) %in% colnames(expr.zf.mat))]
#state.prob <- state.prob[which(rowSums(state.prob, na.rm = TRUE) > 0),]
# Read in the cluster regions/labels for the CHROM_STATES
clust_dir <- paste0('../../dnase/clusters/', CHROM_STATES, '/')
loci <- NULL
for (bedfile in dir(clust_dir)) {
tmp <- read.table(paste0(clust_dir, bedfile))[,1:3]
names(tmp) <- c('chr', 'start', 'end')
tmp$clustNum <- rep(as.numeric(strsplit(strsplit(bedfile,
split = '_')[[1]][2],
split = '\\.')[[1]][1]),
nrow(tmp))
loci <- rbind(loci, tmp)
}
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
data.frame(x = 1)
})
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
subMat <- subMat[which(rowSums(subMat) > 0),]
data.frame(x = 1)
})
# Compute a fraction of loci where loci is marked with CHROM_STATE
# for each cluster
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
subMat <- subMat[which(rowSums(subMat) > 0),]
v <- colSums(subMat)
data.frame(x = v)
})
clusts
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
subMat <- subMat[which(rowSums(subMat) > 0),]
v <- colSums(subMat)
data.frame(colSum = v)
})
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
data.frame(colSum = v)
})
head(clusts)
clusts <- cast(ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
data.frame(colSum = v)}),
clustNum ~ colSum)
library(reshape2)
clusts <- cast(ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
data.frame(colSum = v)}),
clustNum ~ colSum)
# Compute a fraction of loci where loci is marked with CHROM_STATE
# for each cluster
clusts <- dcast(ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
data.frame(colSum = v)}),
clustNum ~ colSum)
clusts <- dcast(ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
cellType = colnames(state)
data.frame(colSum = v, cellType = colnames(state))}),
clustNum ~ colSum)
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
cellType = colnames(state)
data.frame(colSum = v, cellType = colnames(state))})
head(clusts)
SN = c(1:100, 1:100, 1:100, 1:100)
class = c(rep("A1", 100), rep("B2", 100), rep("C3", 100), rep("D4", 100)) # total 6000 levels
myvar = rnorm(400)
mydf = data.frame(SN, class, myvar)
mydf
str(mdf)
str(mydf)
str(clusts)
head(dcast(mydf, SN ~ class))
head(dcast(clusts, clustNum ~ cellType))
head(dcast(clusts, clustNum ~ rowSums))
head(dcast(mydf, SN ~ class))
head(dcast(clusts, clustNum ~ cellType))
head(mydf)
head(dcast(clusts, clustNum ~ cellType))
clusts <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
cellType = colnames(state)
data.frame(cellType = colnames(state), colSum = as.numeric(v))})
head(dcast(clusts, clustNum ~ cellType))
source('~/research/epigenome/rnaseq/code/explore.R')
ncol(state)
nrow(state)*nrow(clusts)
?clust
clust <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
cellType = colnames(state)
data.frame(cellType = colnames(state), colSum = as.numeric(v))})
clust <- dcast(clust, clustNum ~ cellType)
clust
clust <- as.matrix(clust[,2:])
clust <- as.matrix(clust[,2:length(clust)])
rm(clusts)
rownames(clust) <- 1:nrow(clust)
head(clust)
source('~/research/epigenome/rnaseq/code/explore.R')
source('~/research/epigenome/rnaseq/code/explore.R')
clust <- dcast(clust, clustNum ~ cellType)
clust <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
cellType = colnames(state)
data.frame(cellType = colnames(state), colSum = as.numeric(v))})
clust <- dcast(clust, clustNum ~ cellType, value.var = colSum)
source('~/research/epigenome/rnaseq/code/explore.R')
head(clust)
?dcast
clust <- dcast(clust, clustNum ~ cellType, value.var = colSum)
clust <- dcast(clust, clustNum ~ cellType, value.var = clust$colSum)
clust <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
cellType = colnames(state)
data.frame(cellType = colnames(state), colSum = as.numeric(v))})
clust <- dcast(clust, clustNum ~ cellType, value.var = clust$colSum)
clust <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
cellType = colnames(state)
data.frame(cellType = colnames(state), colSum = as.numeric(v))})
clust <- dcast(clust, clustNum ~ cellType)
clust <- as.matrix(clust[,2:length(clust)])
source('~/research/epigenome/rnaseq/code/explore.R')
head(loci)
source('~/research/epigenome/rnaseq/code/explore.R')
nrow(expr.zf.mat)
nrow(expr.zf.mat)*nrow(clust)
heatmap.2(clust, trace = 'none')
clust <- apply(clust, 1, function(x) x/sum(x))
head(clust)
clust <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
cellType = colnames(state)
data.frame(cellType = colnames(state), colSum = as.numeric(v))})
clust <- dcast(clust, clustNum ~ cellType)
clust <- as.matrix(clust[,2:length(clust)])
head(clust)
clust <- t(apply(clust, 1, function(x) x/sum(x)))
head(clust)
heatmap.2(clust, trace = 'none')
clust <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colSums(subMat)
cellType = colnames(state)
data.frame(cellType = colnames(state), colMean = as.numeric(v))})
clust <- dcast(clust, clustNum ~ cellType)
clust <- as.matrix(clust[,2:length(clust)])
clust <- t(apply(clust, 1, function(x) x/sum(x)))
clust <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colMeans(subMat)
cellType = colnames(state)
data.frame(cellType = colnames(state), colMean = as.numeric(v))})
clust <- dcast(clust, clustNum ~ cellType)
clust <- as.matrix(clust[,2:length(clust)])
clust <- t(apply(clust, 1, function(x) x/sum(x)))
heatmap.2(clust, trace = 'none')
clust <- ddply(loci, "clustNum", function(df) {
x <- do.call(paste0, list(paste0(df$chr, ':'),
paste0(df$start, '-'),
df$end))
subMat <- state[x,]
v <- colMeans(subMat)
cellType = colnames(state)
data.frame(cellType = colnames(state), colMean = as.numeric(v))})
clust <- dcast(clust, clustNum ~ cellType)
clust <- as.matrix(clust[,2:length(clust)])
clust <- t(apply(clust, 1, function(x) {j <- x/sum(x); log(j)}))
heatmap.2(clust, trace = 'none')
source('~/research/epigenome/rnaseq/code/explore.R')
heatmap.2(clust, trace = 'none')
heatmap.2(clust, trace = 'none', ColV = NA)
heatmap.2(clust, trace = 'none', Colv = NA)
update.packages(checkBuilt=TRUE, ask=FALSE)
update.packages()
getwd()
log(exp(1))
log(1)
log(0.99)
log(0.)
log(0.2)
log(-0.2)
version()
install.packages(
lib  = lib <- .libPaths()[1],
pkgs = as.data.frame(installed.packages(lib), stringsAsFactors=FALSE)$Package,
type = 'source'
)
install.packages(
lib  = .libPaths()[1],
pkgs = as.data.frame(installed.packages(lib), stringsAsFactors=FALSE)$Package,
type = 'source'
)
lib <- .libPaths()[1]
)install.packages(
lib  = lib,
pkgs = as.data.frame(installed.packages(lib), stringsAsFactors=FALSE)$Package,
type = 'source'
)
install.packages(
lib  = lib,
pkgs = as.data.frame(installed.packages(lib), stringsAsFactors=FALSE)$Package,
type = 'source'
)
105.81 + 666.07 + 158.17
q()
# Make plots related to specificity change diffusion and
# specifciity change for Aim 1 in NIH grant proposal
rm(list = ls())
#source('~/research/rmodules/plottingFuncs.R')
source('logisticReg2_helpers.R')
####################################################
# GLOBAL PARAMETERS
####################################################
TRAIN <- 'noyes2015_alpha0.30_BL-hughes' #'hughes2015'#
TEST <- 'hughes2015_alpha0.30'
TRAIN_LAB2 <- 'sF2F3low_d_logRegr' # For finding prediction tables
TEST_LAB2 <- 'hughes'             # For finding prediction tables
ALPHA_TRAIN <- rep('0.30', 3)
OTHER_METHODS <- c('olsf2f3Pred', 'antonSVM_l7') #'hughesPred', 'olsf2f3Pred','nnf2f3Pred')
OTHER_METHODS_LAB <- c('sF2F3low_d_olsRegr', 'svm_l7')#c('RFNH', paste0('f2f3alpha',ALPHA_TRAIN[2],'-OLS'),
#   'NN-f2f3low')
PRECOMP_FLAGS <- c(FALSE,TRUE) #c(TRUE,FALSE,TRUE)
COLS_FORMS <- c('black')#c('black', 'red', 'blue')
COLS_OTHER <- c('purple', 'green')#c('green', 'purple', 'orange')
BLACKLIST_TRAIN_SET <- FALSE      # To remove training cores from test cores
# If TRAIN ends in _BL-xxx, this is unneccesary
print(paste0("Train: ", TRAIN))
print(paste0("Test: ", TEST))
print(paste0("OtherMeths: ", OTHER_METHODS_LAB))
K <- 10                    # Number of folds for CV
MIN_POS <- 0.75            # PCC cutoff for +'s
MAX_NEG <- 0.0             # PCC cutoff for -'s
MIN_OBS_PER_FEATURE <- 10  # Min # obs. to include feature in model
if (length(OTHER_METHODS) > 0) {
PLOT_DIR <- paste0('../plots/nihGrantFigs/',TRAIN,'/','otherMethods/')
} else {
PLOT_DIR <- paste0('../plots/nihGrantFigs/',TRAIN,'/')
}
SUB_DIRS <- c(paste0(PLOT_DIR, 'bySubPos.CV/'),
paste0(PLOT_DIR, TEST, '/bySubPos/vanilla/'),
paste0(PLOT_DIR, TEST, '/bySubPos/ridge_lambda1se/'),
paste0(PLOT_DIR, TEST, '/rankPlots/'))
DO_CV <- FALSE#
DO_RIDGE <- FALSE
STANDARD_X <- TRUE     # Use the glmnet internal standardization of features
INTERCEPT <- FALSE     # Compute an intercept term
for (s in SUB_DIRS){
if (!dir.exists(s)) dir.create(s, recursive = T, showWarnings = F)
}
set.seed(87235)
# Import the data and set up the model formulas, etc...
data <- getPairDataSet(TRAIN)
TRAIN_LAB <- names(data)[1]
data <- data[[names(data)[1]]]
test.data <- getPairDataSet(TEST)
TEST_LAB <- names(test.data)[1]
test.data <- test.data[[names(test.data)[1]]]
if (BLACKLIST_TRAIN_SET) test.data <- removeTrainSet(test.data, data)
if (INTERCEPT) {
forms <- list(paste0('~',paste0(names(data)[1:4],collapse = '+')))
} else {
forms <- list(paste0('~',paste0(c(names(data)[1:4],0),collapse = '+')))
}
forms <- lapply(forms, formula)
names(forms) <- TRAIN_LAB2
if (length(OTHER_METHODS) != 0) {
PLOT_COLS <- as.list(c(COLS_FORMS, COLS_OTHER))
names(PLOT_COLS) <- c(names(forms), OTHER_METHODS_LAB)
} else {
PLOT_COLS <- as.list(COLS_FORMS)
names(PLOT_COLS) <- names(forms)
rm(list = ls())
source('logisticReg2_helpers.R')
TRAIN <- 'noyes2015_alpha0.30_BL-hughes' #'hughes2015'#
TEST <- 'hughes2015_alpha0.30'
TRAIN_LAB2 <- 'sF2F3low_d_logRegr' # For finding prediction tables
TEST_LAB2 <- 'hughes'             # For finding prediction tables
ALPHA_TRAIN <- rep('0.30', 3)
OTHER_METHODS <- c('olsf2f3Pred', 'antonSVM_l7') #'hughesPred', 'olsf2f3Pred','nnf2f3Pred')
OTHER_METHODS_LAB <- c('sF2F3low_d_olsRegr', 'svm_l7')#c('RFNH', paste0('f2f3alpha',ALPHA_TRAIN[2],'-OLS'),
PRECOMP_FLAGS <- c(FALSE,TRUE) #c(TRUE,FALSE,TRUE)
COLS_FORMS <- c('black')#c('black', 'red', 'blue')
COLS_OTHER <- c('purple', 'green')#c('green', 'purple', 'orange')
BLACKLIST_TRAIN_SET <- FALSE      # To remove training cores from test cores
print(paste0("Train: ", TRAIN))
print(paste0("Test: ", TEST))
print(paste0("OtherMeths: ", OTHER_METHODS_LAB))
rm(list = ls())
q()
packs = as.data.frame(installed.packages(.libPaths()[1]), stringsAsFactors = F)
install.packages(packs$Package)
if (!requireNamespace("BiocManager"))
install.packages("BiocManager")
BiocManager::install()
if (!requireNamespace("BiocManager"))
install.packages("BiocManager")
BiocManager::install()
.libPaths()
ip <- as.data.frame(installed.packages()[,c(1,3:4)])
rownames(ip) <- NULL
ip <- ip[is.na(ip$Priority),1:2,drop=FALSE]
print(ip, row.names=FALSE)
ip
write.table(ip, file = '~/Desktop/ip.txt', row.names = FALSE, sep = '\t',quote = F)
update.packages(ask = FALSE, checkBuilt = TRUE)
update.packages(ask = FALSE, checkBuilt = TRUE)
BiocManager::install(ask=FALSE, checkBuilt = TRUE)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(version = "3.11")
install.packages("BiocManager")
install.packages("BiocManager")
BiocManager::install(ask=FALSE, checkBuilt = TRUE)
BiocManager::install(version = "3.11", ask=FALSE, checkBuilt = TRUE)
BiocManager::install(‘BSgenome.Hsapiens.UCSC.hg19’, checkBuilt = TRUE)
BiocManager::install(‘BSgenome.Hsapiens.UCSC.hg19’)
BiocManager::install("BSgenome.Hsapiens.UCSC.hg19")
BiocManager::install(version = "3.11", ask=FALSE, checkBuilt = TRUE)
BiocManager::install(version = "3.11", ask=FALSE, checkBuilt = TRUE)
?install.packages
?update.packages
.libPaths()
getOption("repos")
update.packages(ask = FALSE, checkBuilt = TRUE)
update.packages(ask = FALSE, checkBuilt = TRUE)
?
?gsePathway
?gseNCG
?gseNCG
library(data.table)
library(heatmap3)
library(RColorBrewer)
library(dynamicTreeCut)
library(ggplot2)
library(ReactomePA)
library(clusterProfiler)
library(ggrepel)
library(moduleColor)
library(EnsDb.Hsapiens.v86)
library(igraph)
suppressMessages(library(DOSE, quietly = TRUE))
suppressMessages(library(org.Hs.eg.db, quietly = TRUE))
suppressMessages(library(enrichplot, quietly = TRUE))
suppressMessages(library(ggnewscale, quietly = TRUE))
?gseNCG
?gseGO
?gseDO
update.packages(checkBuilt = TRUE)
update.packages(checkBuilt = TRUE)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
pkgs <- rownames(installed.packages())
BiocManager::install(pkgs, type = "source", checkBuilt = TRUE)
setwd('~/research/jointInterfaceLearning/rCLAMPS/flyFactorSurvey/enuameh/')
source("~/research/jointInterfaceLearning/rCLAMPS/flyFactorSurvey/enuameh/examineEnuameh.R")
specs
# Order based on finger positions and output table to read into gibbs sampler
specs.protInfo <- specs[,c('prot','motif','zfNum','core','helix'),with=FALSE]
specs.protInfo$motifNameStem.ffs <- sapply(specs.protInfo$prot, function(x) paste(x,'SOLEXA',sep = '_'))
specs.protInfo <- specs.protInfo[!duplicated(specs.protInfo)]
specs.protInfo
specs.protInfo
specs.protInfo <- specs.protInfo[order(prot, zfNum)]
source("~/research/jointInterfaceLearning/rCLAMPS/flyFactorSurvey/enuameh/examineEnuameh.R")
source("~/research/jointInterfaceLearning/rCLAMPS/flyFactorSurvey/enuameh/examineEnuameh.R")
source("~/research/jointInterfaceLearning/rCLAMPS/flyFactorSurvey/enuameh/examineEnuameh.R")
