my.gonad <- read.table("./Input/Gene_TE_Counts.cntTable", header = T)
# reorganize by sex and age
my.gonad <- my.gonad[,c("GeneName",
"YM1",
"YM2",
"YM3",
"YM5",
"MM1",
"MM2",
"MM3",
"MM5",
"OM1",
"OM3",
"OM4",
"OM5",
"YF1",
"YF2",
"YF3",
"YF4",
"YF5",
"MF1",
"MF2",
"MF3",
"MF4",
"MF5",
"OF1",
"OF3",
"OF4",
"OF5"
)]
# generate age variables for females
my.Age             <- c(rep("Young",5),rep("Middle",5),rep("Old",4))
# retain only female data
my.gonad <- my.gonad[,c(1,14:27)]
# Generate a merged matrix with counts and transcript length
my.gonad.lgth           <- merge(killi.tx.lg.flt[,c("gene_id","tx_len")], my.gonad, by.x = "gene_id", by.y = "GeneName")
rownames(my.gonad.lgth) <- my.gonad.lgth$gene_id
# Generate tpm values
my.ov.tpm <- tpm3(my.gonad.lgth[,-c(1:2)], my.gonad.lgth$tx_len)
write.table(my.ov.tpm, file = paste0(Sys.Date(),"_Killifish_ovary_TPM.txt"), sep = "\t", quote = F)
######## A. Read in the Seurat and metadata  ########
# Create Seurat object using expression data and published cell annotation
my.meta.data.all              <- read.csv("./Input/Ovary_scRNAseq/zx124_40com_ssportal_meta.txt"    , "\t", header = T)
my.meta.data.all              <- my.meta.data[-1,] # remove line with redundant header info
rownames(my.meta.data.all)    <- my.meta.data.all$NAME
colnames(my.meta.data.all)[5] <- "Comp_annot"
# Load Seurat object
load('./Input/Ovary_scRNAseq/zx124_40com_final_orig.robj')
zx124_40com_final_orig
# Add metadata
my.zebra.ov  <- AddMetaData(zx124_40com_final_orig, my.meta.data.all, col.name = NULL) # from Figure 1 annotation
# Examine distribution
table(my.zebra.ov@meta.data$Comp_annot)
grep("Stromal_",my.zebra.ov@meta.data$Comp_annot)
# Examine distribution
table(my.zebra.ov@meta.data$Comp_annot)
# Create higher level (less subclassses) annotation
my.zebra.ov@meta.data$Clean <- NA
my.zebra.ov@meta.data$Clean[my.zebra.ov@meta.data$Comp_annot %in% c("GSC+GC_Pro") ] <- "GSPCs"
my.zebra.ov@meta.data$Clean[my.zebra.ov@meta.data$Comp_annot %in% c("Early_Meio","Late_Meio","Meio") ] <- "Meiotic"
my.zebra.ov@meta.data$Clean[my.zebra.ov@meta.data$Comp_annot %in% c("Early_OO_1","Early_OO_2","Early_OO_3") ] <- "Oocytes"
my.zebra.ov@meta.data$Clean[my.zebra.ov@meta.data$Comp_annot %in% c("Follicle_1","Follicle_2","Follicle_lhx9") ] <- "Follicle"
my.zebra.ov@meta.data$Clean[my.zebra.ov@meta.data$Comp_annot %in% c("Macrophage") ]  <- "Macrophage"
my.zebra.ov@meta.data$Clean[my.zebra.ov@meta.data$Comp_annot %in% c("Neutrophils") ] <- "Neutrophils"
my.zebra.ov@meta.data$Clean[my.zebra.ov@meta.data$Comp_annot %in% c("NK-like") ]     <- "NK-like"
my.zebra.ov@meta.data$Clean[my.zebra.ov@meta.data$Comp_annot %in% c("Theca") ]       <- "Theca"
my.zebra.ov@meta.data$Clean[my.zebra.ov@meta.data$Comp_annot %in% c("Vasculature") ] <- "Vasculature"
my.zebra.ov@meta.data$Clean[grep("Stromal_",my.zebra.ov@meta.data$Comp_annot) ]      <- "Stromal"
my.zebra.ov@meta.data$Clean[grep("Unknown_",my.zebra.ov@meta.data$Comp_annot) ]      <- "Unknown"
table(my.zebra.ov@meta.data$Comp_annot,my.zebra.ov@meta.data$Clean)
table(my.zebra.ov@meta.data$Clean)
?subset
### subset to remove "unknown"
Idents(object = my.zebra.ov) <- "Clean"
my.zebra.ov.cl <- subset(my.zebra.ov, idents = "Unknown", invert = TRUE)
my.zebra.ov@meta.data$Rep <- NA
my.cell.types <- unique(my.zebra.ov@meta.data$Clean)
my.cell.types
i = 1
my.cells <- my.zebra.ov@meta.data$Clean %in% my.cell.types[i]
my.cells
my.cells <- which(my.zebra.ov@meta.data$Clean %in% my.cell.types[i])
my.cells
j:(j+99)
j = 1
j:(j+99)
# Create "rep" slot to do pseudobulk replicates
my.zebra.ov@meta.data$Rep <- NA
my.cell.types <- unique(my.zebra.ov@meta.data$Clean)
for (i in 1:length(my.cell.types)) {
# get cells of that identity
my.cells <- which(my.zebra.ov@meta.data$Clean %in% my.cell.types[i])
j = 1; rep = 1;
while(j < length(my.cells)) {
my.zebra.ov@meta.data$Rep[my.cells[j:min(length(my.cells),j+99)]] <- rep
j <- j+100
rep <- rep + 1
}
}
View(my.zebra.ov@meta.data)
# Create "rep" slot to do pseudobulk replicates
my.zebra.ov@meta.data$Rep <- NA
my.cell.types <- unique(my.zebra.ov@meta.data$Clean)
set.seed(1234567890)
for (i in 1:length(my.cell.types)) {
# get cells of that identity and randomize order
my.cells <- sample(which(my.zebra.ov@meta.data$Clean %in% my.cell.types[i]))
j = 1; rep = 1;
while(j < length(my.cells)) {
my.zebra.ov@meta.data$Rep[my.cells[j:min(length(my.cells),j+99)]] <- rep
j <- j+100
rep <- rep + 1
}
}
View(my.zebra.ov@meta.data)
paste0(my.zebra.ov@meta.data$Clean,my.zebra.ov@meta.data$Rep)
# Make groups for bulk
my.zebra.ov@meta.data$Rep_cells <- paste0(my.zebra.ov@meta.data$Clean,"_",my.zebra.ov@meta.data$Rep)
my.zebra.ov@meta.data$Rep_cells
### subset to remove "unknown"
Idents(object = my.zebra.ov) <- "Clean"
my.zebra.ov.cl <- subset(my.zebra.ov, idents = "Unknown", invert = TRUE)
# Create "rep" slot to do pseudobulk replicates
my.zebra.ov.cl@meta.data$Rep <- NA
my.cell.types <- unique(my.zebra.ov.cl@meta.data$Clean)
set.seed(1234567890)
for (i in 1:length(my.cell.types)) {
# get cells of that identity and randomize order
my.cells <- sample(which(my.zebra.ov.cl@meta.data$Clean %in% my.cell.types[i]))
j = 1; rep = 1;
while(j < length(my.cells)) {
my.zebra.ov.cl@meta.data$Rep[my.cells[j:min(length(my.cells),j+99)]] <- rep
j <- j+100
rep <- rep + 1
}
}
# Make groups for bulk
my.zebra.ov.cl@meta.data$Rep_cells <- paste0(my.zebra.ov.cl@meta.data$Clean,"_",my.zebra.ov.cl@meta.data$Rep)
# Make global reference pseudobulks
zebra.ov.comp.pb   <- data.frame(AggregateExpression(my.zebra.ov.cl   , group.by = "Rep_cells", slot = "counts", assay = "RNA")$RNA)
# Generate tpm values for pure pseudobulk
zebra.ov.comp.pb.tpm <- tpmUMI(zebra.ov.comp.pb)
View(zebra.ov.comp.pb.tpm)
######## B. Make ovary pseudobulks to test deconvolution accuracy  ########
# Get "True" Percentages
global.props <- table(my.zebra.ov.cl@meta.data$Clean)/length(my.zebra.ov.cl@meta.data$Clean[!is.na(my.zebra.ov.cl@meta.data$Clean)])
#### simulate mixtures
set.seed(123456789)
# Create full pseudobulk for each cell type
zebra.ov.pure.pb  <- data.frame(AggregateExpression(my.zebra.ov.cl, group.by = "Clean", slot = "counts", assay = "RNA")$RNA)
global.props
# Make up random "shuffles" of ground truth percentages (first 1 is ground truth)
my.pb.freqs <- data.frame("PB_1" = as.numeric(global.props),
"PB_2" = sample(as.numeric(global.props)),
"PB_3" = sample(as.numeric(global.props)),
"PB_4" = sample(as.numeric(global.props)),
"PB_5" = sample(as.numeric(global.props)))
rownames(my.pb.freqs) <- names(global.props)
# Make ovarian bulks using "real" proportions for a weighted mean
my.ov.sim.1 <- get_wt_mean(zebra.ov.pure.pb, my.pb.freqs$PB_1)
my.ov.sim.2 <- get_wt_mean(zebra.ov.pure.pb, my.pb.freqs$PB_2)
my.ov.sim.3 <- get_wt_mean(zebra.ov.pure.pb, my.pb.freqs$PB_3)
my.ov.sim.4 <- get_wt_mean(zebra.ov.pure.pb, my.pb.freqs$PB_4)
my.ov.sim.5 <- get_wt_mean(zebra.ov.pure.pb, my.pb.freqs$PB_5)
# Make dataframe
Zebra.ov.PBmix <- round(data.frame('PB_1'= my.ov.sim.1,
'PB_2'= my.ov.sim.2,
'PB_3'= my.ov.sim.3,
'PB_4'= my.ov.sim.4,
'PB_5'= my.ov.sim.5))
rownames(Zebra.ov.PBmix) <- rownames(zebra.ov.small.pb)
# Generate tpm values for fake bulks
Zebra.ov.PBmix.tpm <- tpmUMI(Zebra.ov.PBmix)
# Make dataframe
Zebra.ov.PBmix <- round(data.frame('PB_1'= my.ov.sim.1,
'PB_2'= my.ov.sim.2,
'PB_3'= my.ov.sim.3,
'PB_4'= my.ov.sim.4,
'PB_5'= my.ov.sim.5))
rownames(Zebra.ov.PBmix) <- rownames(zebra.ov.pure.pb)
# Generate tpm values for fake bulks
Zebra.ov.PBmix.tpm <- tpmUMI(Zebra.ov.PBmix)
########################################################################
# 3. Killify zebrafish ovary pseudobulk
# load orthology table
load('./Input/2022-11-02_homology_table_killifish_zebrafish.RData')
z2k.cl.ann
# "killify" zebrafish pure cells and mixes
k.zebra.pure.ov.tmp <- merge(unique(z2k.cl.ann[,2:3]), zebra.ov.comp.pb.tpm, by.x = "Danrer_GeneName", by.y = "row.names")
k.zebra.ov.mix.tmp  <- merge(unique(z2k.cl.ann[,2:3]), Zebra.ov.PBmix.tpm  , by.x = "Danrer_GeneName", by.y = "row.names")
# For some genes, there are 2 zebrafish genes but only one killi homolog
# aggregate the tpms
k.zebra.pure.ov <- aggregate(k.zebra.pure.ov.tmp[,-c(1:2)], by = list(k.zebra.pure.ov.tmp$gene_id), sum)
k.zebra.ov.mix  <- aggregate(k.zebra.ov.mix.tmp [,-c(1:2)], by = list(k.zebra.ov.mix.tmp$gene_id ), sum)
rownames(k.zebra.pure.ov) <- k.zebra.pure.ov$Group.1
rownames(k.zebra.ov.mix ) <- k.zebra.ov.mix$Group.1
k.zebra.pure.ov <-  k.zebra.pure.ov[,-1]
k.zebra.ov.mix  <-  k.zebra.ov.mix [,-1]
save(my.ov.tpm, k.zebra.pure.ov, k.zebra.ov.mix, file = paste0(Sys.Date(),"_TPM_normalized_matrices.RData") )
save(my.pb.freqs, file = paste0(Sys.Date(),"_Proportions_for_PBmix.RData") )
# Save text files
write.table(k.zebra.ov.mix, file = paste0(Sys.Date(),"_Zebrafish_PBmixes_ovary_TPM.txt"), sep = "\t", quote = F)
write.table(my.pb.freqs, file = paste0(Sys.Date(),"_Zebrafish_PBmixes_ovary_Proportions.txt"), sep = "\t", quote = F)
#
write.table(my.ov.tpm[rownames(k.zebra.pure.ov),], file = paste0(Sys.Date(),"_Killifish_ovary_TPM_zebgenesonly.txt"), sep = "\t", quote = F)
my.pheno.class <- matrix(2,ncol(k.zebra.pure.ov),ncol(k.zebra.pure.ov))
rownames(my.pheno.class) <- colnames(k.zebra.pure.ov)
View(my.pheno.class)
i = 1
grep(my.cell.types[i],rownames(my.pheno.class))
rownames(my.pheno.class)
my.cell.types[i]
unique(lapply(strsplit(rownames(my.pheno.class),"_"),'[',1) )
my.cell.types <- unlist(unique(lapply(strsplit(rownames(my.pheno.class),"_"),'[',1) ))
grep(my.cell.types[i],rownames(my.pheno.class))
my.pheno.class <- matrix(2,ncol(k.zebra.pure.ov),ncol(k.zebra.pure.ov))
rownames(my.pheno.class) <- colnames(k.zebra.pure.ov)
my.cell.types <- unlist(unique(lapply(strsplit(rownames(my.pheno.class),"_"),'[',1) ))
for(i in 1:length(my.cell.types)) {
my.pheno.class[grep(my.cell.types[i],rownames(my.pheno.class)),grep(my.cell.types[i],rownames(my.pheno.class))] <- 1
}
write.table(my.pheno.class, file = paste0(Sys.Date(),"_PhenoClass_v2.txt"), sep = "\t", quote = F, col.names = F)
# Save text files
write.table(k.zebra.pure.ov, file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPM.txt"), sep = "\t", quote = F)
head(k.zebra.pure.ov)
# need to have same cell types have the same name in file for CIBERSORT
strsplit(colnames(k.zebra.pure.ov),"_")
# need to have same cell types have the same name in file for CIBERSORT
lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1)
# need to have same cell types have the same name in file for CIBERSORT
unlist(lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1))
# need to have same cell types have the same name in file for CIBERSORT
colnames <- paste(unlist(lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1)), sep = "\t", collapse = T)
paste(unlist(lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1)), collapse = "\t")
?paste
write.table(colnames, file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPMv2.txt"), sep = "\t", quote = F)
colnames
my.colnames <- paste(unlist(lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1)), collapse = "\t")
write.table(my.colnames, file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPMv2.txt"), sep = "\t", quote = F)
write.table(my.colnames, file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPMv2.txt"), sep = "\t", quote = F, row.names = F, col.names = F)
# need to have same cell types have the same name in file for CIBERSORT
my.colnames <- paste("GeneSymbol",unlist(lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1)), collapse = "\t")
write.table(my.colnames, file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPMv2.txt"), sep = "\t", quote = F, row.names = F, col.names = F)
my.colnames <- paste0("GeneSymbol",paste(unlist(lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1)), collapse = "\t"), collapse = "\t")
write.table(my.colnames, file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPMv2.txt"), sep = "\t", quote = F, row.names = F, col.names = F)
# need to have same cell types have the same name in file for CIBERSORT
my.colnames <- paste("GeneSymbol", paste(unlist(lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1)), collapse = "\t"), collapse = "\t")
write.table(my.colnames, file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPMv2.txt"), sep = "\t", quote = F, row.names = F, col.names = F)
# need to have same cell types have the same name in file for CIBERSORT
my.colnames <- paste(c("GeneSymbol", paste(unlist(lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1)), collapse = "\t")), collapse = "\t")
write.table(my.colnames, file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPMv2.txt"), sep = "\t", quote = F, row.names = F, col.names = F)
# need to have same cell types have the same name in file for CIBERSORT
my.colnames <- paste(c("GeneSymbol", paste(unlist(lapply(strsplit(colnames(k.zebra.pure.ov),"_"),"[",1)), collapse = "\t")), collapse = "\t")
write.table(my.colnames    , file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPMv2.txt"), sep = "\t", quote = F, row.names = F, col.names = F)
write.table(k.zebra.pure.ov, file = paste0(Sys.Date(),"_Zebrafish_pure_ovary_TPMv2.txt"), sep = "\t", quote = F, col.names = F, append = T)
# Make global reference pseudobulks
zebra.ov.comp.pb.v2   <- data.frame(AggregateExpression(my.zebra.ov.cl   , group.by = "Clean", slot = "counts", assay = "RNA")$RNA)
# Generate tpm values for pure pseudobulk
zebra.ov.comp.pb.v2.tpm <- tpmUMI(zebra.ov.comp.pb.v2)
# "killify" zebrafish pure cells and mixes
k.zebra.pure.ov.v2.tmp <- merge(unique(z2k.cl.ann[,2:3]), zebra.ov.comp.pb.v2.tpm, by.x = "Danrer_GeneName", by.y = "row.names")
k.zebra.ov.mix.tmp  <- merge(unique(z2k.cl.ann[,2:3]), Zebra.ov.PBmix.tpm  , by.x = "Danrer_GeneName", by.y = "row.names")
# For some genes, there are 2 zebrafish genes but only one killi homolog
# aggregate the tpms
k.zebra.pure.ov.v2 <- aggregate(k.zebra.pure.ov.v2.tmp[,-c(1:2)], by = list(k.zebra.pure.ov.v2.tmp$gene_id), sum)
k.zebra.ov.mix     <- aggregate(k.zebra.ov.mix.tmp [,-c(1:2)]   , by = list(k.zebra.ov.mix.tmp$gene_id ), sum)
# save R objects
save(my.ov.tpm, k.zebra.pure.ov, k.zebra.ov.mix, file = paste0(Sys.Date(),"_TPM_normalized_matrices_forGranulator.RData") )
save(my.pb.freqs, file = paste0(Sys.Date(),"_Proportions_for_PBmix.RData") )
library('granulator')
k.zebra.pure.ov.v2 <- aggregate(k.zebra.pure.ov.v2.tmp[,-c(1:2)], by = list(k.zebra.pure.ov.v2.tmp$gene_id), sum)
k.zebra.ov.mix     <- aggregate(k.zebra.ov.mix.tmp [,-c(1:2)]   , by = list(k.zebra.ov.mix.tmp$gene_id ), sum)
rownames(k.zebra.pure.ov.v2) <- k.zebra.pure.ov$Group.1
rownames(k.zebra.ov.mix ) <- k.zebra.ov.mix$Group.1
k.zebra.pure.ov.v2 <-  k.zebra.pure.ov.v2[,-1]
k.zebra.ov.mix     <-  k.zebra.ov.mix [,-1]
# save R objects
save(my.ov.tpm, k.zebra.pure.ov.v2, k.zebra.ov.mix, file = paste0(Sys.Date(),"_TPM_normalized_matrices_forGranulator.RData") )
save(my.pb.freqs, file = paste0(Sys.Date(),"_Proportions_for_PBmix.RData") )
# plot signature matrix similarity using spearman correlation
my.cors <- cor(k.zebra.pure.ov.v2v, method ="spearman")
# plot signature matrix similarity using spearman correlation
my.cors <- cor(k.zebra.pure.ov.v2, method ="spearman")
pheatmap(my.cors)
pdf(paste0(Sys.Date(),"_Heatmap_cell_type_ovary_correlation_spearman_collapse.pdf"))
pheatmap(my.cors)
dev.off()
dev.off()
pdf(paste0(Sys.Date(),"_Heatmap_cell_type_ovary_correlation_spearman_collapse.pdf"))
pheatmap(my.cors)
dev.off()
pheatmap(k.zebra.pure.ov.v2["G4P62_016517",], cluster_rows = F, cellheight = 20)
head(k.zebra.pure.ov.v2)
# "killify" zebrafish pure cells and mixes
k.zebra.pure.ov.v2.tmp <- merge(unique(z2k.cl.ann[,2:3]), zebra.ov.comp.pb.v2.tpm, by.x = "Danrer_GeneName", by.y = "row.names")
k.zebra.ov.mix.tmp  <- merge(unique(z2k.cl.ann[,2:3]), Zebra.ov.PBmix.tpm  , by.x = "Danrer_GeneName", by.y = "row.names")
# For some genes, there are 2 zebrafish genes but only one killi homolog
# aggregate the tpms
k.zebra.pure.ov.v2 <- aggregate(k.zebra.pure.ov.v2.tmp[,-c(1:2)], by = list(k.zebra.pure.ov.v2.tmp$gene_id), sum)
k.zebra.ov.mix     <- aggregate(k.zebra.ov.mix.tmp [,-c(1:2)]   , by = list(k.zebra.ov.mix.tmp$gene_id ), sum)
rownames(k.zebra.pure.ov.v2) <- k.zebra.pure.ov$Group.1
rownames(k.zebra.ov.mix ) <- k.zebra.ov.mix$Group.1
k.zebra.pure.ov.v2 <-  k.zebra.pure.ov.v2[,-1]
k.zebra.ov.mix     <-  k.zebra.ov.mix [,-1]
# save R objects
save(my.ov.tpm, k.zebra.pure.ov.v2, k.zebra.ov.mix, file = paste0(Sys.Date(),"_TPM_normalized_matrices_forGranulator.RData") )
save(my.pb.freqs, file = paste0(Sys.Date(),"_Proportions_for_PBmix.RData") )
# plot signature matrix similarity using spearman correlation
my.cors <- cor(k.zebra.pure.ov.v2, method ="spearman")
pheatmap(my.cors)
pdf(paste0(Sys.Date(),"_Heatmap_cell_type_ovary_correlation_spearman_collapse.pdf"))
pheatmap(my.cors)
dev.off()
dev.off()
pdf(paste0(Sys.Date(),"_Heatmap_cell_type_ovary_correlation_spearman_collapse.pdf"))
pheatmap(my.cors)
dev.off()
pheatmap(k.zebra.pure.ov.v2["G4P62_016517",], cluster_rows = F, cellheight = 20)
k.zebra.pure.ov.v2
k.zebra.pure.ov.v2 <- aggregate(k.zebra.pure.ov.v2.tmp[,-c(1:2)], by = list(k.zebra.pure.ov.v2.tmp$gene_id), sum)
k.zebra.ov.mix     <- aggregate(k.zebra.ov.mix.tmp [,-c(1:2)]   , by = list(k.zebra.ov.mix.tmp$gene_id ), sum)
rownames(k.zebra.pure.ov.v2) <- k.zebra.pure.ov.v2$Group.1
rownames(k.zebra.ov.mix ) <- k.zebra.ov.mix$Group.1
k.zebra.pure.ov.v2 <-  k.zebra.pure.ov.v2[,-1]
k.zebra.ov.mix     <-  k.zebra.ov.mix [,-1]
# save R objects
save(my.ov.tpm, k.zebra.pure.ov.v2, k.zebra.ov.mix, file = paste0(Sys.Date(),"_TPM_normalized_matrices_forGranulator.RData") )
save(my.pb.freqs, file = paste0(Sys.Date(),"_Proportions_for_PBmix.RData") )
# plot signature matrix similarity using spearman correlation
my.cors <- cor(k.zebra.pure.ov.v2, method ="spearman")
pdf(paste0(Sys.Date(),"_Heatmap_cell_type_ovary_correlation_spearman_collapse.pdf"))
pheatmap(my.cors)
dev.off()
pheatmap(k.zebra.pure.ov.v2["G4P62_016517",], cluster_rows = F, cellheight = 20)
# piwil1 - KAF7199819.1/G4P62_016517
pdf(paste0(Sys.Date(),"_Heatmap_piwil1_tpm_Ovary_PB.pdf"))
pheatmap(k.zebra.pure.ov.v2["G4P62_016517",], cluster_rows = F, cellheight = 20)
dev.off()
dev.off()
pdf(paste0(Sys.Date(),"_Heatmap_piwil1_tpm_Ovary_PB.pdf"))
pheatmap(k.zebra.pure.ov.v2["G4P62_016517",], cluster_rows = F, cellheight = 20)
dev.off()
# Deconvolution of pseudo bulk RNA-seq data from zebrafish ovary to determine performance
PBmix.decon <- deconvolute(m = as.matrix(k.zebra.ov.mix), sigMatrix = as.matrix(k.zebra.pure.ov.v2))
PBmix.decon
# We can plot the estimated cell type proportions with the function plot_proportions().
# Notice that while the sum of cell types proportions cannot exceed 100%, for some methods part of the bulk RNA-seq signal remains unassigned.
# plot cell type proportions for svr model on ABIS_S0 reference profile
plot_proportions(deconvoluted = PBmix.decon, method = 'svr')
PBmix.bench <- benchmark(deconvoluted = PBmix.decon, ground_truth = as.matrix(t(my.pb.freqs)) )
# print metrics
PBmix.bench$rank
write.table(PBmix.bench$rank, file = paste0(Sys.Date(),"_granulator_deconvolution_algorithms_performance_on_PBmix.txt"), sep = "\t", quote = F, row.names = F)
plot_benchmark(benchmarked = PBmix.bench, metric = 'pcc')
# plot pearson correlation between predictions and true proportions
pdf(paste0(Sys.Date(),"_Deconvolution_algorithms_performance_on_PBmix_PCC_metric.pdf"), height = 5, width = 6)
plot_benchmark(benchmarked = PBmix.bench, metric = 'pcc')
dev.off()
plot_regress(benchmarked = PBmix.bench, method = 'svr')
plot_regress(benchmarked = PBmix.bench, method = 'nnls')
# Extract regression/correlation plots for top 2 methods
pdf(paste0(Sys.Date(),"_realvspred_cell_type_ovary_correlation_SVR.pdf"), height = 5, width = 8)
plot_regress(benchmarked = PBmix.bench, method = 'svr')
dev.off()
pdf(paste0(Sys.Date(),"_realvspred_cell_type_ovary_correlation_NNLS.pdf"), height = 5, width = 8)
plot_regress(benchmarked = PBmix.bench, method = 'nnls')
dev.off()
# Deconvolution of pseudo bulk RNA-seq data using top 2 algorithms from benchmark
killi.decon <- deconvolute(m = as.matrix(my.ov.tpm), sigMatrix = as.matrix(k.zebra.pure.ov.v2), c("nnls","svr"))
killi.decon$proportions
killi.decon$proportions$nnls_sig1
nnls.res <- killi.decon$proportions$nnls_sig1
nnls.res
nnls.res$Age <- factor(c(rep("Middle",5),rep("Old",4),rep("Young",5)), levels = c("Young","Middle","Old") )
boxplot(GSPCs ~ Age, data = nnls.res)
svr.res <- killi.decon$proportions$svr_sig1
svr.res$Age <- factor(c(rep("Middle",5),rep("Old",4),rep("Young",5)), levels = c("Young","Middle","Old") )
boxplot(GSPCs ~ Age, data = svr.res)
boxplot(Meiotic ~ Age, data = svr.res)
boxplot(Meiotic ~ Age, data = nnls.res)
beeswarm::beeswarm(Meiotic ~ Age, data = nnls.res)
beeswarm::beeswarm(GSPCs ~ Age, data = nnls.res)
beeswarm::beeswarm(Meiotic ~ Age, data = nnls.res, pch = 16, ylim = c(0,50))
beeswarm::beeswarm(GSPCs ~ Age  , data = nnls.res, pch = 16, ylim = c(0,50))
beeswarm::beeswarm(Oocytes ~ Age  , data = nnls.res, pch = 16, ylim = c(0,50))
beeswarm::beeswarm(Meiotic + GSPCs ~ Age, data = nnls.res, pch = 16, ylim = c(0,50))
beeswarm::beeswarm(Meiotic + GSPCs ~ Age, data = svr.res, pch = 16, ylim = c(0,50))
pheatmap(k.zebra.pure.ov.v2["G4P62_016517",], cluster_rows = F, cellheight = 20)
beeswarm::beeswarm(Oocytes ~ Age  , data = nnls.res, pch = 16, ylim = c(0,50))
beeswarm::beeswarm(Oocytes ~ Age  , data = svr.res , pch = 16, ylim = c(0,15))
beeswarm::beeswarm(Oocytes ~ Age  , data = nnls.res, pch = 16, ylim = c(0,25))
beeswarm::beeswarm(Oocytes ~ Age  , data = svr.res , pch = 16, ylim = c(0,25))
beeswarm::beeswarm(Oocytes ~ Age  , data = nnls.res, pch = 16, ylim = c(0,25))
beeswarm::beeswarm(Oocytes ~ Age  , data = svr.res , pch = 16, ylim = c(0,25))
library('beeswarm')
?beeswarm
# GSPCs and Meiotic OOcytes have the highest piwil1 expression
# Is the decreased expression due to decreased expression?
beeswarm(Meiotic + GSPCs ~ Age, data = nnls.res, pch = 16, ylim = c(0,50),pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
# GSPCs and Meiotic OOcytes have the highest piwil1 expression
# Is the decreased expression due to decreased expression?
beeswarm(Meiotic + GSPCs ~ Age, data = nnls.res, pch = 16, ylim = c(0,60), pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
beeswarm(Meiotic + GSPCs ~ Age, data = svr.res , pch = 16, ylim = c(0,60), pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
# GSPCs and Meiotic OOcytes have the highest piwil1 expression
# Is the decreased expression due to decreased expression?
beeswarm(Meiotic + GSPCs ~ Age, data = nnls.res, pch = 16, ylim = c(0,60), main = "NNLS", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
beeswarm(Meiotic + GSPCs ~ Age, data = svr.res , pch = 16, ylim = c(0,60), main = "SVR", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
# GSPCs and Meiotic OOcytes have the highest piwil1 expression
# Is the decreased expression due to decreased expression?
beeswarm(Meiotic + GSPCs ~ Age, data = nnls.res, pch = 16, ylim = c(0,60), main = "NNLS deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
beeswarm(Meiotic + GSPCs ~ Age, data = svr.res , pch = 16, ylim = c(0,60), main = "SVR deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
nnls.res$PiwiHi <-  nnls.res$GSPCs + nnls.res$Meiotic
svr.res$PiwiHi  <-  svr.res$GSPCs  + svr.res$Meiotic
wilcox.test(nnls.res$PiwiHi[nnls.res$Age == "Young"], nnls.res$PiwiHi[nnls.res$Age == "Middle"])
wilcox.test(nnls.res$PiwiHi[svr.res$Age == "Young"], svr.res$PiwiHi[nnls.res$Age == "Middle"])
wilcox.test(svr.res$PiwiHi[svr.res$Age == "Young"] , svr.res$PiwiHi[svr.res$Age == "Middle"])
wilcox.test(svr.res$PiwiHi[svr.res$Age == "Middle"]  , svr.res$PiwiHi[svr.res$Age == "Old"])   # p-value = 0.07491
wilcox.test(svr.res$PiwiHi[svr.res$Age == "Middle"]  , svr.res$PiwiHi[svr.res$Age == "Old"])   # p-value = 0.07491
wilcox.test(nnls.res$PiwiHi[nnls.res$Age == "Young"] , nnls.res$PiwiHi[nnls.res$Age == "Middle"]) # p-value = 0.08969
wilcox.test(nnls.res$PiwiHi[nnls.res$Age == "Middle"], nnls.res$PiwiHi[nnls.res$Age == "Old"]   ) # p-value = 0.08969
wilcox.test(svr.res$PiwiHi[svr.res$Age == "Young"]  , svr.res$PiwiHi[svr.res$Age == "Middle"])   # p-value = 0.02684
wilcox.test(svr.res$PiwiHi[svr.res$Age == "Middle"] , svr.res$PiwiHi[svr.res$Age == "Old"]   )   # p-value = 0.07491
beeswarm(Meiotic + GSPCs ~ Age, data = nnls.res, pch = 16, ylim = c(0,60), main = "NNLS deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
text(1.5, 55, "ns")
text(2.5, 55, "*")
beeswarm(Meiotic + GSPCs ~ Age, data = svr.res , pch = 16, ylim = c(0,60), main = "SVR deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
text(1.5, 55, "ns")
text(2.5, 55, "*")
pdf(paste0(Sys.Date(),"_killi_ovary_piwiHi_cells_NNLS.pdf"), height = 4, width = 6)
beeswarm(Meiotic + GSPCs ~ Age, data = nnls.res, pch = 16, ylim = c(0,60), main = "NNLS deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
text(1.5, 55, "ns")
text(2.5, 55, "*")
dev.off()
pdf(paste0(Sys.Date(),"_killi_ovary_piwiHi_cells_SVR.pdf"), height = 4, width = 6)
beeswarm(Meiotic + GSPCs ~ Age, data = svr.res , pch = 16, ylim = c(0,60), main = "SVR deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
text(1.5, 55, "ns")
text(2.5, 55, "*")
dev.off()
pdf(paste0(Sys.Date(),"_killi_ovary_piwiHi_cells_NNLS.pdf"), height = 4, width = 5)
beeswarm(Meiotic + GSPCs ~ Age, data = nnls.res, pch = 16, ylim = c(0,60), main = "NNLS deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
text(1.5, 55, "ns")
text(2.5, 55, "*")
dev.off()
pdf(paste0(Sys.Date(),"_killi_ovary_piwiHi_cells_SVR.pdf"), height = 4, width = 5)
beeswarm(Meiotic + GSPCs ~ Age, data = svr.res , pch = 16, ylim = c(0,60), main = "SVR deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
text(1.5, 55, "ns")
text(2.5, 55, "*")
dev.off()
pdf(paste0(Sys.Date(),"_killi_ovary_piwiHi_cells_NNLS.pdf"), height = 4, width = 5)
beeswarm(Meiotic + GSPCs ~ Age, data = nnls.res, pch = 16, ylim = c(0,60), main = "NNLS deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
text(1.5, 55, "ns")
text(2.5, 55, "*")
dev.off()
pdf(paste0(Sys.Date(),"_killi_ovary_piwiHi_cells_SVR.pdf"), height = 4, width = 5)
beeswarm(Meiotic + GSPCs ~ Age, data = svr.res , pch = 16, ylim = c(0,60), main = "SVR deconvolution", pwcol = c(rep("deeppink3",5),rep("deeppink4",4),rep("deeppink",5)) )
text(1.5, 55, "ns")
text(2.5, 55, "*")
dev.off()
#######################
sink(file = paste0(Sys.Date(),"_Granulators_prep_session_Info.txt", sep =""))
sessionInfo()
sink()
dotchart(PBmix.bench$rank$mean_pcc)
dotchart(PBmix.bench$rank$mean_pcc, labels = PBmix.bench$method, xlim = c(0,1))
dotchart(PBmix.bench$rank$mean_pcc, labels = PBmix.bench$method, xlim = c(0.5,1))
dotchart(PBmix.bench$rank$mean_pcc, labels = PBmix.bench$method, xlim = c(0.5,1), pch = 16)
dotchart(PBmix.bench$rank$mean_pcc, labels = PBmix.bench$rank$method, xlim = c(0.5,1), pch = 16)
pdf(paste0(Sys.Date(),"_dothcart_granulator_algorithms_performance_on_PBmix_benchmarking.pdf"), height = 4, width = 6)
dotchart(PBmix.bench$rank$mean_pcc, labels = PBmix.bench$rank$method, xlim = c(0.5,1), pch = 16)
dev.off()
pdf(paste0(Sys.Date(),"_dothcart_granulator_algorithms_performance_on_PBmix_benchmarking.pdf"), height = 4, width = 4)
dotchart(PBmix.bench$rank$mean_pcc, labels = PBmix.bench$rank$method, xlim = c(0.5,1), pch = 16)
dev.off()
dotchart(PBmix.bench$rank$mean_pcc, labels = PBmix.bench$rank$method, xlim = c(0.5,1), pch = 16, xlab = "Mean PCC")
pdf(paste0(Sys.Date(),"_dothcart_granulator_algorithms_performance_on_PBmix_benchmarking.pdf"), height = 4, width = 4)
dotchart(PBmix.bench$rank$mean_pcc, labels = PBmix.bench$rank$method, xlim = c(0.5,1), pch = 16, xlab = "Mean PCC")
dev.off()
#######################
sink(file = paste0(Sys.Date(),"_Granulator_prep_session_Info.txt", sep =""))
sessionInfo()
sink()
my.pb.freqs
my.pb.preds <- read.table('CIBERSORT/CIBERSORTx_Job18_Adjusted.txt', sep = "\t", header = T)
my.pb.preds
my.pb.preds <- t(read.table('CIBERSORT/CIBERSORTx_Job18_Adjusted.txt', sep = "\t", header = T))
my.pb.preds
t(my.pb.freqs)
my.pb.preds <- read.table('CIBERSORT/CIBERSORTx_Job18_Adjusted.txt', sep = "\t", header = T)
my.pb.preds
my.pb.freqs.t <- t(my.pb.freqs)
my.pb.preds <- read.table('CIBERSORT/CIBERSORTx_Job18_Adjusted.txt', sep = "\t", header = T)
my.cors <- vector(mode = "numeric", length = 5)
names(my.cors) <- my.pb.preds$Mixture
my.cors
my.pb.freqs.t[1,-1]
my.pb.freqs.t
my.pb.preds[1,-1]
View(my.pb.preds)
my.pb.preds[1,-c(1,12:14)]
my.pb.freqs.t[1,]
rbind(my.pb.freqs.t[1,],my.pb.preds[1,-c(1,12:14)])
cor(my.pb.freqs.t[1,],my.pb.preds[1,-c(1,12:14)])
cor(as.vector(my.pb.freqs.t[1,]), as.vector(my.pb.preds[1,-c(1,12:14)]))
cor(as.numeric(my.pb.freqs.t[1,]), as.numeric(my.pb.preds[1,-c(1,12:14)]))
my.pb.freqs.t <- t(my.pb.freqs)
my.pb.preds   <- read.table('CIBERSORT/CIBERSORTx_Job18_Adjusted.txt', sep = "\t", header = T)
my.cors <- vector(mode = "numeric", length = 5)
names(my.cors) <- my.pb.preds$Mixture
for (i in 1:5) {
my.cors[i] <- cor(as.numeric(my.pb.freqs.t[i,]), as.numeric(my.pb.preds[i,-c(1,12:14)]))
}
my.cors
