setwd('/Volumes/MyBook_5/RNAseq_datasets_for_Deconvolution/Process_for_deconvolution/InSilicoMixtures')
setwd('/Volumes/MyBook_5 1//RNAseq_datasets_for_Deconvolution/Process_for_deconvolution/InSilicoMixtures')
load('../2017-01-18/2017-01-18aggregated_counts_matrix_for_Deconvolution_withPseudoBulkPool_VST_normalized.RData')
load('../2017-01-18/2017-01-18_aggregated_counts_matrix_for_Deconvolution_withPseudoBulk_pooledBulk.RData')
dim(vsd)
my.sparse <- which(apply(my.count.matrix.2 == 0, 2, sum) > 17000) # 9 samples
my.count.matrix.process <- data.frame(my.count.matrix.2[,-c(1:2,my.sparse)])
rownames(my.count.matrix.process) <- my.count.matrix.2$GeneName
colnames(my.count.matrix.2)[1:5]
rownames(my.count.matrix.process) <- my.count.matrix.2$GeneName
my.meta.data.process <- my.meta.data.2[-my.sparse,]
my.lib.size <- apply(my.count.matrix.process,2,sum)
my.count.matrix.process.NORM1 <- my.count.matrix.process/my.lib.size
boxplot(my.count.matrix.process.NORM1+0.01, outline=F,
log = 'y', las = 2, cex.axis = 0.5, col="tomato",
ylab="Raw counts Library size normalization")
###### Try quantile normalization before making mixtures
# do simple library size normalization to help scaling before adding
my.count.matrix.process.NORM2 <- data.frame(normalize.quantiles(as.matrix(my.count.matrix.process),copy=TRUE))
rownames(my.count.matrix.process.NORM2) <- rownames(my.count.matrix.process)
colnames(my.count.matrix.process.NORM2) <- colnames(my.count.matrix.process)
my.count.matrix.process.NORM2 <- data.frame(normalize.quantiles(as.matrix(my.count.matrix.process),copy=TRUE))
rownames(my.count.matrix.process.NORM2) <- rownames(my.count.matrix.process)
colnames(my.count.matrix.process.NORM2) <- colnames(my.count.matrix.process)
boxplot(my.count.matrix.process.NORM2+0.01, outline=F,
log = 'y', las = 2, cex.axis = 0.5, col="tomato",
ylab="Raw counts quantile normalization")
my.count.matrix.process.NORM2 <- data.frame(normalize.quantiles(as.matrix(my.count.matrix.process),copy=TRUE))
options(stringsAsFactors=F)
library('preprocessCore')
my.count.matrix.process.NORM2 <- data.frame(normalize.quantiles(as.matrix(my.count.matrix.process),copy=TRUE))
rownames(my.count.matrix.process.NORM2) <- rownames(my.count.matrix.process)
colnames(my.count.matrix.process.NORM2) <- colnames(my.count.matrix.process)
# visualize data spread
pdf("2017-01-20_boxplot_RNASeq_counts_PSEUDONORM_Quantile.pdf", width=25, height=6)
boxplot(my.count.matrix.process.NORM2+0.01, outline=F,
log = 'y', las = 2, cex.axis = 0.5, col="tomato",
ylab="Raw counts quantile normalization")
dev.off()
######
# quantile looks good
my.astro <- which(as.character(my.meta.data.process$Cell_type) %in% 'Astrocytes')
my.neurons <- which(as.character(my.meta.data.process$Cell_type) %in% 'Neurons')
my.mph <- which(as.character(my.meta.data.process$Cell_type) %in% 'Macrophages')
my.mgl <- which(as.character(my.meta.data.process$Cell_type) %in% 'Microglia')
my.hepa <- which(as.character(my.meta.data.process$Cell_type) %in% 'Hepatocytes')
my.Fib <- which(as.character(my.meta.data.process$Cell_type) %in% 'Dermal_fibroblasts')
my.cardC <- which(as.character(my.meta.data.process$Cell_type) %in% 'Cardiomyocytes')
my.cardFib <- which(as.character(my.meta.data.process$Cell_type) %in% 'Cardiac_fibroblasts')
# get a representative expression per cell type
my.astro.exp    <- apply(my.count.matrix.process.NORM2[,my.astro   ],1,mean)
my.neurons.exp  <- apply(my.count.matrix.process.NORM2[,my.neurons ],1,mean)
my.mph.exp      <- apply(my.count.matrix.process.NORM2[,my.mph     ],1,mean)
my.mgl.exp      <- apply(my.count.matrix.process.NORM2[,my.mgl     ],1,mean)
my.hepa.exp     <- apply(my.count.matrix.process.NORM2[,my.hepa    ],1,mean)
my.Fib.exp      <- apply(my.count.matrix.process.NORM2[,my.Fib    ],1,mean)
my.cardC.exp    <- apply(my.count.matrix.process.NORM2[,my.cardC   ],1,mean)
my.cardFib.exp  <- apply(my.count.matrix.process.NORM2[,my.cardFib ],1,mean)
################################################
get_wt_mean <- function(exp.matrix,my.weights){
my.result <- rep(0,dim(exp.matrix)[1])
for (i in 1: dim(exp.matrix)[1]) {
my.result[i] <- weighted.mean(exp.matrix[i,], my.weights)
}
return(my.result)
}
################################################
#### simulate mixtures
# Brain
my.astro.neurons.50_50 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp), c(0.5,0.5))
my.astro.neurons.30_70 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp), c(0.3,0.7))
my.astro.neurons.70_30 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp), c(0.7,0.3))
my.astro.neurons.mgl.0.1 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.499,0.499,0.001))
my.astro.neurons.mgl.1 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.495,0.495,0.01))
my.astro.neurons.mgl.5 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.475,0.475,0.05))
my.astro.neurons.mgl.10 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.45,0.45,0.1))
my.astro.neurons.mgl.15 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.425,0.425,0.15))
my.astro.neurons.mgl.20 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.4,0.4,0.2))
# Heart
my.cardio_fibro.90_10 <- get_wt_mean(cbind(my.cardC.exp,my.cardFib.exp), c(0.9,0.1))
my.cardio_fibro.95_5 <- get_wt_mean(cbind(my.cardC.exp,my.cardFib.exp), c(0.95,0.05))
my.cardio_fibro.99_1 <- get_wt_mean(cbind(my.cardC.exp,my.cardFib.exp), c(0.99,0.01))
my.cardio.mph.0.1 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.999,0.001))
my.cardio.mph.1 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.99,0.01))
my.cardio.mph.5 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.95,0.05))
my.cardio.mph.10 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.9,0.1))
my.cardio.mph.15 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.85,0.15))
my.cardio.mph.20 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.8,0.2))
# Liver
my.liver_fibro.90_10 <- get_wt_mean(cbind(my.hepa.exp,my.Fib.exp), c(0.9,0.1))
my.liver_fibro.95_5 <- get_wt_mean(cbind(my.hepa.exp,my.Fib.exp), c(0.95,0.05))
my.liver_fibro.99_1 <- get_wt_mean(cbind(my.hepa.exp,my.Fib.exp), c(0.99,0.01))
my.liver.mph.0.1 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.999,0.001))
my.liver.mph.1 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.99,0.01))
my.liver.mph.5 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.95,0.05))
my.liver.mph.10 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.9,0.1))
my.liver.mph.15 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.85,0.15))
my.liver.mph.20 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.8,0.2))
### get a mixture matrix
my.count.mixtures <- data.frame('Neuron_Astrocytes.50_50'= my.astro.neurons.50_50,
'Neuron_Astrocytes.30_70'= my.astro.neurons.30_70,
'Neuron_Astrocytes.70_30'= my.astro.neurons.70_30,
'Neuron_Astrocytes.Microglia_0.1'= my.astro.neurons.mgl.0.1,
'Neuron_Astrocytes.Microglia_1'= my.astro.neurons.mgl.1,
'Neuron_Astrocytes.Microglia_5'= my.astro.neurons.mgl.5,
'Neuron_Astrocytes.Microglia_10'= my.astro.neurons.mgl.10,
'Neuron_Astrocytes.Microglia_15'= my.astro.neurons.mgl.15,
'Neuron_Astrocytes.Microglia_20'= my.astro.neurons.mgl.20,
'Cardiomyocytes_CardiacFibro.90_10'= my.cardio_fibro.90_10,
'Cardiomyocytes_CardiacFibro.95_5'= my.cardio_fibro.95_5,
'Cardiomyocytes_CardiacFibro.99_1'= my.cardio_fibro.99_1,
'Cardiomyocytes_Macrophages_0.1'= my.cardio.mph.0.1,
'Cardiomyocytes_Macrophages_1'= my.cardio.mph.1,
'Cardiomyocytes_Macrophages_5'= my.cardio.mph.5,
'Cardiomyocytes_Macrophages_10'= my.cardio.mph.10,
'Cardiomyocytes_Macrophages_15'= my.cardio.mph.15,
'Cardiomyocytes_Macrophages_20'= my.cardio.mph.20,
'Hepatocytes_Fibro.90_10'= my.liver_fibro.90_10,
'Hepatocytes_Fibro.95_5'= my.liver_fibro.95_5,
'Hepatocytes_Fibro.99_1'= my.liver_fibro.99_1,
'Hepatocytes_Macrophages_0.1'= my.liver.mph.0.1,
'Hepatocytes_Macrophages_1'= my.liver.mph.1,
'Hepatocytes_Macrophages_5'= my.liver.mph.5,
'Hepatocytes_Macrophages_10'= my.liver.mph.10,
'Hepatocytes_Macrophages_15'= my.liver.mph.15,
'Hepatocytes_Macrophages_20'= my.liver.mph.20
)
rownames(my.count.mixtures) <- my.count.matrix.2$GeneName
### see summary to figure out if any multiplication will be necessary before rounding
summary(my.count.mixtures)
my.count.mixtures.clean <- round(10*my.count.mixtures)
### run VST normalization for CIBERSORT
library('DESeq2')
my.big.type <- c(rep("Brain",9), rep("Heart",9), rep("Liver",9))
# design matrix
dataDesign = data.frame( row.names = colnames( my.count.mixtures.clean ), my.big.type = my.big.type)
# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.count.mixtures.clean,
colData = dataDesign,
design = ~ my.big.type)
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
pdf("2017-01-20_Dsipersion_plot_for_Mixtures.pdf", width=25, height=6)
plotDispEsts(dds)
dev.off()
vsd <- data.frame(getVarianceStabilizedData(dds))
vsd$GeneName <- my.count.matrix.2$GeneName
vsd <- vsd[,c(28,1:27)]
save(dds,vsd,file=paste(Sys.Date(),"DEseq_processed_object_inSilicoMixtures.RData", sep=""))
write.table(vsd,file=paste(Sys.Date(),"aggregated_counts_matrix_for_Deconvolution_withPseudoBulkPool_VST_normalized_inSilicoMixtures.txt", sep="_"),
quote=F,row.names=F, sep="\t")
dataDesign
dds <- DESeqDataSetFromMatrix(countData = my.count.mixtures.clean,
colData = dataDesign,
design = ~ my.big.type)
dataDesign
sum(is.na(my.count.mixtures.clean))
View(my.count.mixtures)
View(my.count.mixtures.clean)
summary(my.count.mixtures.clean)
my.cardFib
my.meta.data.process$Cell_type
my.astro <- which(as.character(my.meta.data.process$Cell_type) %in% 'Astrocytes')
my.neurons <- which(as.character(my.meta.data.process$Cell_type) %in% 'Neurons')
my.mph <- which(as.character(my.meta.data.process$Cell_type) %in% 'Macrophages')
my.mgl <- which(as.character(my.meta.data.process$Cell_type) %in% 'Microglia')
my.hepa <- which(as.character(my.meta.data.process$Cell_type) %in% 'Hepatocytes')
my.Fib <- which(as.character(my.meta.data.process$Cell_type) %in% 'Dermal_Fibroblasts')
my.cardC <- which(as.character(my.meta.data.process$Cell_type) %in% 'Cardiomyocytes')
my.cardFib <- which(as.character(my.meta.data.process$Cell_type) %in% 'Cardiac_Fibroblasts')
# get a representative expression per cell type
my.astro.exp    <- apply(my.count.matrix.process.NORM2[,my.astro   ],1,mean)
my.neurons.exp  <- apply(my.count.matrix.process.NORM2[,my.neurons ],1,mean)
my.mph.exp      <- apply(my.count.matrix.process.NORM2[,my.mph     ],1,mean)
my.mgl.exp      <- apply(my.count.matrix.process.NORM2[,my.mgl     ],1,mean)
my.hepa.exp     <- apply(my.count.matrix.process.NORM2[,my.hepa    ],1,mean)
my.Fib.exp      <- apply(my.count.matrix.process.NORM2[,my.Fib    ],1,mean)
my.cardC.exp    <- apply(my.count.matrix.process.NORM2[,my.cardC   ],1,mean)
my.cardFib.exp  <- apply(my.count.matrix.process.NORM2[,my.cardFib ],1,mean)
################################################
get_wt_mean <- function(exp.matrix,my.weights){
my.result <- rep(0,dim(exp.matrix)[1])
for (i in 1: dim(exp.matrix)[1]) {
my.result[i] <- weighted.mean(exp.matrix[i,], my.weights)
}
return(my.result)
}
################################################
#### simulate mixtures
# Brain
my.astro.neurons.50_50 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp), c(0.5,0.5))
my.astro.neurons.30_70 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp), c(0.3,0.7))
my.astro.neurons.70_30 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp), c(0.7,0.3))
my.astro.neurons.mgl.0.1 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.499,0.499,0.001))
my.astro.neurons.mgl.1 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.495,0.495,0.01))
my.astro.neurons.mgl.5 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.475,0.475,0.05))
my.astro.neurons.mgl.10 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.45,0.45,0.1))
my.astro.neurons.mgl.15 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.425,0.425,0.15))
my.astro.neurons.mgl.20 <- get_wt_mean(cbind(my.astro.exp,my.neurons.exp,my.mgl.exp), c(0.4,0.4,0.2))
# Heart
my.cardio_fibro.90_10 <- get_wt_mean(cbind(my.cardC.exp,my.cardFib.exp), c(0.9,0.1))
my.cardio_fibro.95_5 <- get_wt_mean(cbind(my.cardC.exp,my.cardFib.exp), c(0.95,0.05))
my.cardio_fibro.99_1 <- get_wt_mean(cbind(my.cardC.exp,my.cardFib.exp), c(0.99,0.01))
my.cardio.mph.0.1 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.999,0.001))
my.cardio.mph.1 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.99,0.01))
my.cardio.mph.5 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.95,0.05))
my.cardio.mph.10 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.9,0.1))
my.cardio.mph.15 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.85,0.15))
my.cardio.mph.20 <- get_wt_mean(cbind(my.cardC.exp,my.mph.exp), c(0.8,0.2))
# Liver
my.liver_fibro.90_10 <- get_wt_mean(cbind(my.hepa.exp,my.Fib.exp), c(0.9,0.1))
my.liver_fibro.95_5 <- get_wt_mean(cbind(my.hepa.exp,my.Fib.exp), c(0.95,0.05))
my.liver_fibro.99_1 <- get_wt_mean(cbind(my.hepa.exp,my.Fib.exp), c(0.99,0.01))
my.liver.mph.0.1 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.999,0.001))
my.liver.mph.1 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.99,0.01))
my.liver.mph.5 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.95,0.05))
my.liver.mph.10 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.9,0.1))
my.liver.mph.15 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.85,0.15))
my.liver.mph.20 <- get_wt_mean(cbind(my.hepa.exp,my.mph.exp), c(0.8,0.2))
### get a mixture matrix
my.count.mixtures <- data.frame('Neuron_Astrocytes.50_50'= my.astro.neurons.50_50,
'Neuron_Astrocytes.30_70'= my.astro.neurons.30_70,
'Neuron_Astrocytes.70_30'= my.astro.neurons.70_30,
'Neuron_Astrocytes.Microglia_0.1'= my.astro.neurons.mgl.0.1,
'Neuron_Astrocytes.Microglia_1'= my.astro.neurons.mgl.1,
'Neuron_Astrocytes.Microglia_5'= my.astro.neurons.mgl.5,
'Neuron_Astrocytes.Microglia_10'= my.astro.neurons.mgl.10,
'Neuron_Astrocytes.Microglia_15'= my.astro.neurons.mgl.15,
'Neuron_Astrocytes.Microglia_20'= my.astro.neurons.mgl.20,
'Cardiomyocytes_CardiacFibro.90_10'= my.cardio_fibro.90_10,
'Cardiomyocytes_CardiacFibro.95_5'= my.cardio_fibro.95_5,
'Cardiomyocytes_CardiacFibro.99_1'= my.cardio_fibro.99_1,
'Cardiomyocytes_Macrophages_0.1'= my.cardio.mph.0.1,
'Cardiomyocytes_Macrophages_1'= my.cardio.mph.1,
'Cardiomyocytes_Macrophages_5'= my.cardio.mph.5,
'Cardiomyocytes_Macrophages_10'= my.cardio.mph.10,
'Cardiomyocytes_Macrophages_15'= my.cardio.mph.15,
'Cardiomyocytes_Macrophages_20'= my.cardio.mph.20,
'Hepatocytes_Fibro.90_10'= my.liver_fibro.90_10,
'Hepatocytes_Fibro.95_5'= my.liver_fibro.95_5,
'Hepatocytes_Fibro.99_1'= my.liver_fibro.99_1,
'Hepatocytes_Macrophages_0.1'= my.liver.mph.0.1,
'Hepatocytes_Macrophages_1'= my.liver.mph.1,
'Hepatocytes_Macrophages_5'= my.liver.mph.5,
'Hepatocytes_Macrophages_10'= my.liver.mph.10,
'Hepatocytes_Macrophages_15'= my.liver.mph.15,
'Hepatocytes_Macrophages_20'= my.liver.mph.20
)
rownames(my.count.mixtures) <- my.count.matrix.2$GeneName
### see summary to figure out if any multiplication will be necessary before rounding
summary(my.count.mixtures)
my.count.mixtures.clean <- round(10*my.count.mixtures)
my.big.type <- c(rep("Brain",9), rep("Heart",9), rep("Liver",9))
# design matrix
dataDesign = data.frame( row.names = colnames( my.count.mixtures.clean ), my.big.type = my.big.type)
# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.count.mixtures.clean,
colData = dataDesign,
design = ~ my.big.type)
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
pdf("2017-01-20_Dsipersion_plot_for_Mixtures.pdf", width=25, height=6)
plotDispEsts(dds)
dev.off()
vsd <- data.frame(getVarianceStabilizedData(dds))
vsd$GeneName <- my.count.matrix.2$GeneName
vsd <- vsd[,c(28,1:27)]
save(dds,vsd,file=paste(Sys.Date(),"DEseq_processed_object_inSilicoMixtures.RData", sep=""))
write.table(vsd,file=paste(Sys.Date(),"aggregated_counts_matrix_for_Deconvolution_withPseudoBulkPool_VST_normalized_inSilicoMixtures.txt", sep="_"),
quote=F,row.names=F, sep="\t")
my.ben.metadata <- read.csv("/Volumes/MyBook_5\ 1/RNAseq_datasets_for_Deconvolution/OTHER/For_Berenice_from_Ben/Table\ S1\ -\ Sequencing\ Specs.txt", sep="\t",header=T)
my.ben.data <- read.csv("/Volumes/MyBook_5\ 1/RNAseq_datasets_for_Deconvolution/OTHER/For_Berenice_from_Ben/Table\ S2\ -\ Raw\ Counts\ -\ All\ High\ Quality\ Cells\ -\ MiSeq\ and\ HiSeq.txt", sep="\t",header=T)
head(my.ben.metadata)
head(my.ben.data)
dim(my.ben.metadata)
dim(my.ben.data)
unique(my.ben.metadata$Sequencing.Platform)
summary(my.ben.metadata$Sequencing.Platform)
summary(factor(my.ben.metadata$Sequencing.Platform))
head(my.ben.metadata)
summary(factor(my.ben.metadata$Cell.Subgroup))
my.miseq25 <- my.ben.metadata$Sequencing.Platform %in% "MiSeq - Paired End - 101bp"
sum(my.miseq25)
my.miseq25 <- my.ben.metadata$Sequencing.Platform %in% "MiSeq - Paired End - 25bp"
sum(my.miseq25)
summary(factor(my.ben.metadata$Cell.Subgroup[my.miseq25,]))
summary(factor(my.ben.metadata[my.miseq25,]$Cell.Subgroup))
18+31+23
my.outliers <- my.ben.metadata$Cell.Subgroup %in% "Outlier" #220
sum(my.outliers)
72+29+33+39+41
round(100*summary(factor(my.ben.metadata[my.miseq25,]$Cell.Subgroup))/214)
round(100*summary(factor(my.ben.metadata[my.miseq25,]$Cell.Subgroup))/214, digits = 2)
round(100*summary(factor(my.ben.metadata[my.miseq25,]$Cell.Subgroup))/214, digits = 1)
72/214
8.4+14.5+10.7
my.ben.metadata[my.miseq25,]
colnames(my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier"])
my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier"])
my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier"]
# Oligo = 18.2 %
my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier"
my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",]
my.ben.data[,colnames(my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",])]
colnames(my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",])
my.ben.data[,my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",]$Cell.Name]
my.miseq25.mixture <- my.ben.data[,my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",]$Cell.Name]
head(my.miseq25.mixture)
my.miseq25.mixture <- apply(my.ben.data[,my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",]$Cell.Name],2, sum)
my.miseq25.mixture
my.miseq25.mixture <- apply(my.ben.data[,my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",]$Cell.Name],1, sum)
my.miseq25.mixture
rownames(my.ben.data)
rownames(my.ben.data) <- my.ben.data$X
my.miseq25.mixture <- apply(my.ben.data[,my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",]$Cell.Name],1, sum)
my.miseq25.mixture
my.miseq25 <- my.ben.metadata$Sequencing.Platform %in% "MiSeq - Paired End - 75bp" #220
my.miseq25 <- my.ben.metadata$Sequencing.Platform %in% "MiSeq - Paired End - 25bp" #220
my.miseq75 <- my.ben.metadata$Sequencing.Platform %in% "MiSeq - Paired End - 75bp" #109
summary(factor(my.ben.metadata[my.miseq75,]$Cell.Subgroup))
20+16+35
round(100*summary(factor(my.ben.metadata[my.miseq75,]$Cell.Subgroup))/109, digits = 1)
round(100*summary(factor(my.ben.metadata[my.miseq75,]$Cell.Subgroup))/109, digits = 1)
18.3+14.7+32.1
my.miseq75.mixture <- apply(my.ben.data[,my.ben.metadata[my.ben.metadata[my.miseq75,]$Cell.Subgroup != "Outlier",]$Cell.Name],1, sum)
my.miseq75.mixture
my.miseq101 <- my.ben.metadata$Sequencing.Platform %in% "MiSeq - Paired End - 101bp" #62
my.hiseq101 <- my.ben.metadata$Sequencing.Platform %in% "HiSeq - Paired End - 101bp" #122
summary(factor(my.ben.metadata[my.miseq101,]$Cell.Subgroup))
summary(factor(my.ben.metadata[my.hiseq101,]$Cell.Subgroup))
24+44+36
sum(summary(factor(my.ben.metadata[my.hiseq101,]$Cell.Subgroup)))
round(100*summary(factor(my.ben.metadata[my.hiseq101,]$Cell.Subgroup))/122, digits = 1)
19.7+36.1+29.5
my.hiseq101.mixture <- apply(my.ben.data[,my.ben.metadata[my.ben.metadata[my.hiseq101,]$Cell.Subgroup != "Outlier",]$Cell.Name],1, sum)
my.ben.mixtures <- cbind(my.miseq25.mixture,my.miseq75.mixture,my.hiseq101.mixture)
head(my.ben.mixtures)
summary(my.ben.mixtures)
my.miseq75.mixture <- apply(my.ben.data[,my.ben.metadata$Cell.Name],1, sum)
head(my.miseq75.mixture)
head(my.ben.data[,my.ben.metadata$Cell.Name])
my.miseq75.mixture <- apply(my.ben.data[,my.ben.metadata[my.miseq75,]$Cell.Name],1, sum)
my.miseq75.mixture
head(my.miseq75.mixture)
my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier"
my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",]$Cell.Name
my.ben.metadata[my.ben.metadata[my.miseq25,]$Cell.Subgroup != "Outlier",]$Cell.Name
my.ben.metadata[my.miseq25,]
my.ben.metadata[my.miseq25,]$Cell.Subgroup
my.ben.metadata$Cell.Subgroup[my.miseq25]
my.ben.metadata$Cell.Subgroup[my.miseq25] != "Outlier"
my.ben.metadata[my.ben.metadata$Cell.Subgroup[my.miseq25] != "Outlier",]$Cell.Name
my.ben.metadata$Cell.Subgroup[my.miseq25] != "Outlier"
my.ben.metadata$Cell.Name[my.ben.metadata$Cell.Subgroup[my.miseq25] != "Outlier"]
my.ben.metadata$Cell.Subgroup[my.miseq25]
my.ben.metadata[my.miseq25,]$Cell.Name[my.ben.metadata$Cell.Subgroup[my.miseq25] != "Outlier"]
my.miseq25.mixture <- apply(my.ben.data[,my.ben.metadata[my.miseq25,]$Cell.Name[my.ben.metadata$Cell.Subgroup[my.miseq25] != "Outlier"]],1, sum)
my.miseq75.mixture <- apply(my.ben.data[,my.ben.metadata[my.miseq75,]$Cell.Name],1, sum)
my.hiseq101.mixture <- apply(my.ben.data[,my.ben.metadata[my.hiseq101,]$Cell.Name],1, sum)
my.ben.mixtures <- cbind(my.miseq25.mixture,my.miseq75.mixture,my.hiseq101.mixture)
head(my.ben.mixtures)
my.big.type <- c(rep("MiSeq",2), rep("HiSeq",1))
# design matrix
dataDesign = data.frame( row.names = colnames( my.ben.mixtures ), my.big.type = my.big.type)
# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.ben.mixtures,
colData = dataDesign,
design = ~ my.big.type)
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
pdf("2017-01-20_Dsipersion_plot_for_BEN_Mixtures.pdf", width=25, height=6)
plotDispEsts(dds)
dev.off()
vsd <- data.frame(getVarianceStabilizedData(dds))
vsd$GeneName <- my.count.matrix.2$GeneName
vsd <- vsd[,c(28,1:27)]
save(dds,vsd,file=paste(Sys.Date(),"DEseq_processed_object_inSilicoMixtures.RData", sep=""))
write.table(vsd,file=paste(Sys.Date(),"aggregated_counts_matrix_for_Deconvolution_BEN_inSilicoMixtures.txt", sep="_"),
quote=F,row.names=F, sep="\t")
head(my.ben.mixtures)
vsd$GeneName <- rownames(my.ben.mixtures)
save(dds,vsd,file=paste(Sys.Date(),"DEseq_processed_object_inSilicoMixtures.RData", sep=""))
write.table(vsd,file=paste(Sys.Date(),"aggregated_counts_matrix_for_Deconvolution_BEN_inSilicoMixtures.txt", sep="_"),
quote=F,row.names=F, sep="\t")
vsd <- vsd[,c(4,1:3)]
save(dds,vsd,file=paste(Sys.Date(),"DEseq_processed_object_inSilicoMixtures.RData", sep=""))
write.table(vsd,file=paste(Sys.Date(),"aggregated_counts_matrix_for_Deconvolution_BEN_inSilicoMixtures.txt", sep="_"),
quote=F,row.names=F, sep="\t")
