Here, fastMNN is executed on the Seurat object for Study 1 (Count)
data. The parameters and commands are derived from the fastMNN
documentation.
Load libraries
library(batchelor)
library(SeuratWrappers)
library(Seurat)
library(dplyr)
library(ggplot2)
source("~/Supplemental_code/Data_integration.R")
Load datasets for Study 1
load("~/Supplemental_code/unCTC_datasets/Zheng_et_al._Data.RData")
load("~/Supplemental_code/unCTC_datasets/Zheng_et_al._metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Velten_et_al._Data.RData")
load("~/Supplemental_code/unCTC_datasets/Velten_et_al._metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Sarioglu_et_al._Data.RData")
load("~/Supplemental_code/unCTC_datasets/Sarioglu_et_al._metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Jordan_et_al._Data.RData")
load("~/Supplemental_code/unCTC_datasets/Jordan_et_al._metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Aceto_et_al._metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Aceto_et_al._Data.RData")
load("~/Supplemental_code/unCTC_datasets/Yu_et_al._metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Yu_et_al._Data.RData")
load("~/Supplemental_code/unCTC_datasets/Ting_et_al._metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Ting_et_al._Data.RData")
Data integration based on common genes
Data2 = Data_integration(data_list=list(Velten_et_al._Data,
Ting_et_al._Data,
Yu_et_al._Data,
Sarioglu_et_al._Data,
Jordan_et_al._Data,
Aceto_et_al._Data,
Zheng_et_al._Data))
data2_metadata = rbind(Velten_et_al._metaData,
Ting_et_al._metaData,
Yu_et_al._metaData,
Sarioglu_et_al._metaData,
Jordan_et_al._metaData,
Aceto_et_al._metaData,
Zheng_et_al._metaData)
Seurat_obj_study1 <- CreateSeuratObject(counts = Data2, project = "Study_1_data",meta.data = data2_metadata)
# normalize and identify variable features for each dataset independently
Seurat_obj_study1 <- NormalizeData(Seurat_obj_study1, normalization.method = "LogNormalize", scale.factor = 10000)
#Identification of highly variable features (feature selection)
Seurat_obj_study1 <- FindVariableFeatures(Seurat_obj_study1, selection.method = "vst", nfeatures = 2000)%>%
ScaleData(verbose = FALSE)
Seurat’s fastMNN wrapper
Seurat_obj_study1 <- RunFastMNN(object.list = SplitObject(Seurat_obj_study1, split.by = "DataID"))
Seurat_obj_study1 <- RunUMAP(Seurat_obj_study1, reduction = "mnn", dims = 1:30)
Seurat_obj_study1 <- FindNeighbors(Seurat_obj_study1, reduction = "mnn", dims = 1:30)
Seurat_obj_study1 <- FindClusters(Seurat_obj_study1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1184
## Number of edges: 50200
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6826
## Number of communities: 6
## Elapsed time: 0 seconds
Colorkey for Visualization
ColorKeyDataID = c("coral4","darkcyan","steelblue","orangered",
"darkolivegreen4","lightsteelblue3","darkorchid4",
"darkslategray","salmon3","paleturquoise1",
"mediumaquamarine","greenyellow","black",
"deepskyblue3","mediumblue","peru","gold",
"gray50","hotpink","khaki3","yellow4","lavender",
"cornsilk4","orchid4","yellow3", "darkgreen",
"skyblue1","khaki4","tan4","firebrick1","pink")
mnn reduction
DimPlot(Seurat_obj_study1,reduction = "mnn", pt.size = 1, group.by = "DataID")+scale_color_manual(values=ColorKeyDataID)

DimPlot(Seurat_obj_study1,reduction = "mnn", pt.size = 1)+scale_color_manual(values=ColorKeyDataID)

UMAP reduction
DimPlot(Seurat_obj_study1,reduction = "umap", pt.size = 1, group.by = "DataID")+scale_color_manual(values=ColorKeyDataID)

DimPlot(Seurat_obj_study1,reduction = "umap", pt.size = 1)+scale_color_manual(values=ColorKeyDataID)

barplot
df_study1 = as.data.frame(Seurat_obj_study1@reductions$umap@cell.embeddings)
df_study1$Cell_type = Seurat_obj_study1@meta.data$Cell_type
df_study1$DataID = Seurat_obj_study1@meta.data$DataID
df_study1$Clusters = Seurat_obj_study1@meta.data$seurat_clusters
ggplot(df_study1, aes(x=Clusters, fill = Cell_type)) + theme_classic()+
geom_bar(stat="count")+scale_color_manual()+
scale_fill_manual(values = c("dodgerblue4","firebrick3","darkgreen","dark turquoise"))+
theme(legend.text = element_text(size=14),
plot.title = element_text(size=16),
legend.title=element_text(size=20),axis.text=element_text(size=20),
axis.title=element_text(size=22,face="bold")) +
guides(colour = guide_legend(override.aes = list(size = 6)))

ARI, NMI and Cluster purity
aricode::ARI(df_study1$Clusters,df_study1$Cell_type)
## [1] 0.05272533
aricode::NMI(df_study1$Clusters,df_study1$Cell_type)
## [1] 0.03954897
ClusterPurity <- function(clusters, classes) {
sum(apply(table(classes, clusters), 2, max)) / length(clusters)
}
ClusterPurity(df_study1$Clusters,df_study1$Cell_type)
## [1] 0.8758446