Here, Harmony is executed on the Seurat object for Study 2 (Count)
data. The parameters and commands are derived from the Harmony
documentation.
source("~/Supplemental_code/Data_integration.R")
library(Seurat)
library(dplyr)
library(harmony)
library(ggplot2)
Load datasets
#Create Expression data list
load("~/Supplemental_code/unCTC_datasets/Poonia_et_al._CountData.RData")
load("~/Supplemental_code/unCTC_datasets/Ding_et_al._WBC1_CountData.RData")
load("~/Supplemental_code/unCTC_datasets/Ding_et_al._WBC2_CountData.RData")
load("~/Supplemental_code/unCTC_datasets/Ebright_et_al._CountData.RData")
Data integration based on common genes
Stydy2Data = Data_integration(data_list=list(Poonia_et_al._CountData,
Ebright_et_al._CountData,
Ding_et_al._WBC1_CountData,
Ding_et_al._WBC2_CountData))
Stydy2Datametadata = rbind(Poonia_et_al._CountmetaData,Ebright_et_al._CountmetaData,
Ding_et_al._WBC1_CountmetaData,Ding_et_al._WBC2_CountmetaData)
harmony_obj_study2 <- CreateSeuratObject(counts = Stydy2Data, project = "Harmony_study2", min.cells = 5,meta.data = Stydy2Datametadata) %>%
Seurat::NormalizeData(verbose = FALSE) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData(verbose = FALSE) %>%
RunPCA(pc.genes = pbmc@var.genes, npcs = 20, verbose = FALSE)
Run RunHarmony Seurat wrapper
harmony_obj_study2 = harmony::RunHarmony(harmony_obj_study2,group.by.vars = "Data_id")
harmony_embeddings <- Embeddings(harmony_obj_study2, 'harmony')
harmony_embeddings[1:5, 1:5]
## harmony_1 harmony_2 harmony_3 harmony_4 harmony_5
## 1850055018_CS13_S2 18.29675 11.71153 1.0098857 2.911585 0.4413637
## 1850055018_CS32_S12 18.94523 12.48664 0.9780357 2.186623 0.4185233
## 1850055018_CS24_S7 19.57142 13.25548 1.6422007 3.069661 0.1125002
## 1850055018_CS18_S5 20.26635 14.11057 1.6570702 3.608592 0.4950125
## 1850055018_CS25_S8 19.50240 13.85487 1.8494443 3.048712 0.2196713
harmony_obj_study2 <- harmony_obj_study2 %>%
RunUMAP(reduction = "harmony", dims = 1:20) %>%
FindNeighbors(reduction = "harmony", dims = 1:20) %>%
FindClusters(resolution = 0.5) %>%
identity()
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1480
## Number of edges: 54152
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8502
## Number of communities: 9
## Elapsed time: 0 seconds
visualization colorkey
ColorKeyDataID = c("peru","steelblue","darkolivegreen4","palevioletred4","darkcyan",
"darkorchid4",
"darkslategray","firebrick1","salmon3",
"paleturquoise1","mediumaquamarine",
"greenyellow","black","deepskyblue3","mediumblue",
"darkred","gold","gray50","hotpink","khaki3",
"yellow4","lavender","cornsilk4","orchid4",
"yellow3", "darkgreen","skyblue1","khaki4",
"tan4","pink")
PCA plot
DimPlot(object = harmony_obj_study2, reduction = "pca", pt.size = 1,group.by = "Data_id")+scale_color_manual(values=ColorKeyDataID)

DimPlot(object = harmony_obj_study2, reduction = "pca", pt.size = 1)+scale_color_manual(values=ColorKeyDataID)

Harmony plot
DimPlot(object = harmony_obj_study2, reduction = "harmony", pt.size = 1,group.by = "Data_id") +scale_color_manual(values=ColorKeyDataID)

DimPlot(object = harmony_obj_study2, reduction = "harmony", pt.size = 1) +scale_color_manual(values=ColorKeyDataID)

UMAP plot
DimPlot(harmony_obj_study2, reduction = "umap", pt.size =1,group.by = "Data_id")+scale_color_manual(values=ColorKeyDataID)

DimPlot(harmony_obj_study2, reduction = "umap", pt.size =1)+scale_color_manual(values=ColorKeyDataID)

barplot
harmony_df_study2 = harmony_obj_study2@meta.data
ggplot(harmony_df_study2, aes(x=seurat_clusters, fill = Class)) + theme_classic()+
geom_bar(stat="count")+scale_color_manual()+
scale_fill_manual(values = c("dodgerblue4","firebrick3","darkgreen","dark turquoise"))+
theme(legend.text = element_text(size=14),
plot.title = element_text(size=16),
legend.title=element_text(size=20),axis.text=element_text(size=20),
axis.title=element_text(size=22,face="bold")) +
guides(colour = guide_legend(override.aes = list(size = 6)))

ARI, NMI and Cluster purity
aricode::ARI(harmony_df_study2$seurat_clusters,harmony_df_study2$Class)
## [1] 0.08404987
aricode::NMI(harmony_df_study2$seurat_clusters,harmony_df_study2$Class)
## [1] 0.1380744
ClusterPurity <- function(clusters, classes) {
sum(apply(table(classes, clusters), 2, max)) / length(clusters)
}
ClusterPurity(harmony_df_study2$seurat_clusters,harmony_df_study2$Class)
## [1] 0.7844595