Here, fastMNN is executed on the Seurat object for Study 2 (TPM) data. The parameters and commands are derived from the fastMNN documentation.

Load libraries

library(batchelor)
library(SeuratWrappers)
library(Seurat)
library(dplyr)
library(ggplot2)
source("~/Supplemental_code/Data_integration.R")

Load datasets

#Create Expression data list
load("~/Supplemental_code/unCTC_datasets/Poonia_et_al._TPMData.RData")
load("~/Supplemental_code/unCTC_datasets/Poonia_et_al._metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Ding_et_al._WBC1_metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Ding_et_al._WBC1_TPMData.RData")
load("~/Supplemental_code/unCTC_datasets/Ding_et_al._WBC2_TPMData.RData")
load("~/Supplemental_code/unCTC_datasets/Ding_et_al._WBC2_metaData.RData")
load("~/Supplemental_code/unCTC_datasets/Ebright_et_al._TPMData.RData")
load("~/Supplemental_code/unCTC_datasets/Ebright_et_al._metaData.RData")

Integrate data on the basis of common gene using Data_integration() method

Stydy2Data = Data_integration(data_list =list(Poonia_et_al._TPMData,Ding_et_al._WBC1_TPMData,
                                              Ebright_et_al._TPMData,Ding_et_al._WBC2_TPMData))
Stydy2Datametadata = rbind(Poonia_et_al._metaData,Ding_et_al._WBC1_metaData,
                           Ebright_et_al._metaData,Ding_et_al._WBC2_metaData)

Log transform data taken as input to create Seurat object

Seurat_obj_study2 <- CreateSeuratObject(counts = log2(Stydy2Data+1), project = "Study2_Data",meta.data = Stydy2Datametadata)

Skipping normalization step as we are taking log transformed length normalized data (TPM)

#Seurat_obj_study2 <- NormalizeData(Seurat_obj_study2, normalization.method = "LogNormalize", scale.factor = 10000)

#Identification of highly variable features (feature selection)
Seurat_obj_study2 <- FindVariableFeatures(Seurat_obj_study2, selection.method = "vst", nfeatures = 2000)%>% 
  ScaleData(verbose = FALSE) %>% 
  RunPCA(pc.genes = Seurat_obj_study2@var.genes, npcs = 20, verbose = FALSE)

Seurat FastMNN wrapper

Seurat_obj_study2 <- RunFastMNN(object.list = SplitObject(Seurat_obj_study2, split.by = "Class"))
Seurat_obj_study2 <- RunUMAP(Seurat_obj_study2, reduction = "mnn", dims = 1:30)
Seurat_obj_study2 <- FindNeighbors(Seurat_obj_study2, reduction = "mnn", dims = 1:30)
Seurat_obj_study2 <- FindClusters(Seurat_obj_study2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1648
## Number of edges: 53336
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8846
## Number of communities: 16
## Elapsed time: 0 seconds

Colorkey for Visualization

ColorKeyDataID = c("peru","steelblue","darkolivegreen4","palevioletred4",
                   "darkcyan","darkorchid4","darkslategray","firebrick1",
                   "salmon3","paleturquoise1","mediumaquamarine",
                   "greenyellow","black","deepskyblue3","mediumblue",
                   "darkred","gold","gray50","hotpink","khaki3",
                   "yellow4","lavender","cornsilk4","orchid4",
                   "yellow3", "darkgreen","skyblue1","khaki4",
                   "tan4","pink")

mnn reduction

DimPlot(Seurat_obj_study2,reduction = "mnn", pt.size = 1, group.by = "Class")+scale_color_manual(values=ColorKeyDataID)

DimPlot(Seurat_obj_study2,reduction = "mnn", pt.size = 1)+scale_color_manual(values=ColorKeyDataID)

UMAP reduction

DimPlot(Seurat_obj_study2,reduction = "umap", pt.size = 1, group.by = "Class")+scale_color_manual(values=ColorKeyDataID)

DimPlot(Seurat_obj_study2,reduction = "umap", pt.size = 1)+scale_color_manual(values=ColorKeyDataID)

barplot

df_study2 = as.data.frame(Seurat_obj_study2@reductions$umap@cell.embeddings)
df_study2$Cell_type = Seurat_obj_study2@meta.data$Cell_type
df_study2$Class = Seurat_obj_study2@meta.data$Class
df_study2$Clusters = Seurat_obj_study2@meta.data$seurat_clusters

ggplot(df_study2, aes(x=Clusters, fill = Cell_type)) + theme_classic()+
       geom_bar(stat="count")+scale_color_manual()+
       scale_fill_manual(values = c("dodgerblue4","firebrick3","darkgreen","dark turquoise"))+
       theme(legend.text = element_text(size=14),
        plot.title = element_text(size=16),
        legend.title=element_text(size=20),axis.text=element_text(size=20),
        axis.title=element_text(size=22,face="bold")) +
        guides(colour = guide_legend(override.aes = list(size = 6)))

ARI, NMI and Cluster purity

aricode::ARI(df_study2$Clusters,df_study2$Cell_type)
## [1] 0.09756743
aricode::NMI(df_study2$Clusters,df_study2$Cell_type)
## [1] 0.1676598
ClusterPurity <- function(clusters, classes) {
  sum(apply(table(classes, clusters), 2, max)) / length(clusters)
}
ClusterPurity(df_study2$Clusters,df_study2$Cell_type)
## [1] 0.8895631