Load library and data

library(Seurat)
library(ggplot2)
library(mclust)
library(plyr)
library(dplyr)    

Next we load two count matrices : one for the RNA measurements, and one for the antibody-derived tags (ADT).

selected_cells <- read.csv(file = "data/pbmc_nodoublet_minPts50_selectedcells.csv", sep = ",", 
                           header = TRUE, row.names = 1)

pbmc.rna <- as.sparse(read.csv(file = "data/pbmc_rna.csv", sep = ",", 
                               header = TRUE, row.names = 1))
pbmc.rna <- pbmc.rna[, rownames(selected_cells)] # remove doublet

# Load in the ADT UMI matrix
adt <- read.csv(file = "data/pbmc_adt.csv", sep = ",", 
                header = TRUE, row.names = 1)
pbmc.adt <- as.sparse(adt[1:49, ])
pbmc.adt <- pbmc.adt[ ,rownames(selected_cells)]

Create Seurat object and preprocess the data

Preprocessing mRNA

pbmc <- CreateSeuratObject(counts = pbmc.rna)
# standard log-normalization
pbmc <- NormalizeData(pbmc)
# choose ~1k variable features
pbmc <- FindVariableFeatures(pbmc)
# standard scaling (no regression)
pbmc <- ScaleData(pbmc)
## Centering and scaling data matrix
# Run PCA, select 13 PCs for tSNE visualization and graph-based clustering
pbmc <- RunPCA(pbmc, verbose = FALSE)
#ElbowPlot(pbmc, ndims = 50)
# Run t-SNE on mRNA
pbmc <- RunTSNE(pbmc, dims = 1:20, method = "FIt-SNE")
#DimPlot(pbmc, label = TRUE) + NoLegend()

Preprocessing ADT

pbmc[["ADT"]] <- CreateAssayObject(counts = pbmc.adt)

# Now we can repeat the preprocessing (normalization and scaling) steps that we typically run
# with RNA, but modifying the 'assay' argument. 
pbmc <- NormalizeData(pbmc, assay = "ADT", normalization.method = "CLR")
## Normalizing across features
pbmc <- ScaleData(pbmc, assay = "ADT")
## Centering and scaling data matrix
DefaultAssay(pbmc) <- "ADT"
pbmc <- RunPCA(pbmc, features = rownames(pbmc), reduction.name = "pca_adt", reduction.key = "pca_adt_", 
               verbose = FALSE)

adt.data <- GetAssayData(pbmc, slot = "data")
adt.dist <- dist(t(adt.data))

# Now, we rerun tSNE using our distance matrix defined only on ADT (protein) levels.
pbmc[["tsne_adt"]] <- RunTSNE(adt.dist, assay = "ADT", reduction.key = "adtTSNE_")

#DimPlot(pbmc, reduction = "tsne_adt") + NoLegend()

Read and visualize clusters from Citefuse

sce_labels <- read.csv("output/pbmc_citefuse_clustering.csv", header = F)
sce_labels <- as.factor(sce_labels$V1)
names(sce_labels) <- pbmc.adt@Dimnames[[2]]

pbmc[["sceClusterID"]] <- sce_labels
DefaultAssay(pbmc) <- "RNA"
Idents(pbmc) <- sce_labels
#Plot clusters on RNA and ADT t-SNE. 
tsne_rnaClusters <- DimPlot(pbmc, reduction = "tsne", group.by = "sceClusterID") + NoLegend()
tsne_rnaClusters <- tsne_rnaClusters + ggtitle("Citefuse's clusters on RNA") + theme(plot.title = element_text(hjust = 0.5))
tsne_rnaClusters <- LabelClusters(plot = tsne_rnaClusters, id = "sceClusterID", size = 4)

tsne_adtClusters <- DimPlot(pbmc, reduction = "tsne_adt", group.by = "sceClusterID", pt.size = 0.5) + NoLegend()
tsne_adtClusters <- tsne_adtClusters + ggtitle("Citefuse's clusters on ADT") + theme(plot.title = element_text(hjust = 0.5))
tsne_adtClusters <- LabelClusters(plot = tsne_adtClusters, id = "sceClusterID", size = 4)
CombinePlots(plots = list(tsne_rnaClusters, tsne_adtClusters), ncol = 2)

Running DE test to assign cell type identity to clusters

DE test on RNA

Note that gene name starts with a prefix “hg19”.

# We need to switch to RNA
DefaultAssay(pbmc) <- "RNA"
Citefuse.rna.markers <- FindAllMarkers(pbmc, max.cells.per.ident = 100, min.diff.pct = 0.3, only.pos = TRUE)
# Extract top 10 marker genes for each cluster
rna.top10 <- Citefuse.rna.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
# head(rna.top10, n = 15)

You can plot heatmap on top markers genes

# Plot heatmap on top marker ADTs
# DoHeatmap(pbmc, features = unique(rna.top10$gene), assay = "RNA", angle = 0) 

DE test on ADT

# Find protein markers for all clusters, and draw a heatmap
DefaultAssay(pbmc) <- "ADT"
adt.markers <- FindAllMarkers(pbmc, assay = "ADT", only.pos = TRUE)
# Extract ADT top 10 markers
adt.top10 <- adt.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
#head(adt.top10, n = 15)

Plot heatmap on 10 markers ADT, but it is not very informative.

# Plot heatmap on top marker ADTs
#DoHeatmap(pbmc, features = unique(adt.top10$gene), assay = "ADT", angle = 0) 

For PBMCs cells we have canonical markers to match the unbiased clustering to known cell types:

Cell Type Marker genes
Naive CD4+ T IL7R, CCR7, SELL
Memory CD4+ IL7R, S100A4, CD27
CD14+ Mono CD14, LYZ
B MS4A1
CD8+ T CD8A
FCGR3A+ Mono FCGR3A, MS4A7
NK GNLY, NKG7
DC FCER1A, CST3

Look at the top marker genes and ADT, we get the following cluster IDs.

new.cluster.ids <- c("B","CD8+CD27- T","Naive CD4+ T","FCGR3A+ Mono","CD14+ Mono","NK","CD4+CD27+ T","CD8+CD27- T", "CD4+CD27- T","CD8+CD27+ T","CD8+CD27+ T","CD8+CD27+ T","CD14+ Mono","Effector CD8+ T","CD14+ Mono","CD8+CD27- T")

Visualize on cluster IDs

sce_labels <- mapvalues(sce_labels, from = levels(sce_labels), to = new.cluster.ids)
pbmc[["sceClusterID"]] <- sce_labels
Idents(pbmc) <- sce_labels
#Plot clusters on RNA and ADT t-SNE. 
tsne_rnaClusters <- DimPlot(pbmc, reduction = "tsne", group.by = "sceClusterID") + NoLegend()
tsne_rnaClusters <- tsne_rnaClusters + ggtitle("Citefuse's clusters on RNA") + theme(plot.title = element_text(hjust = 0.5))
tsne_rnaClusters <- LabelClusters(plot = tsne_rnaClusters, id = "sceClusterID", size = 4)

tsne_adtClusters <- DimPlot(pbmc, reduction = "tsne_adt", group.by = "sceClusterID", pt.size = 0.5) + NoLegend()
tsne_adtClusters <- tsne_adtClusters + ggtitle("Citefuse's clusters on ADT") + theme(plot.title = element_text(hjust = 0.5))
tsne_adtClusters <- LabelClusters(plot = tsne_adtClusters, id = "sceClusterID", size = 4)
CombinePlots(plots = list(tsne_rnaClusters, tsne_adtClusters), ncol = 2)

Checking

Similar celltypes as CiteFuse

Confirm CD27+ vs CD27- subtypes (based on CD27)

RidgePlot(pbmc, assay = "ADT", features = c("CD27"), ncol = 2)

Confirm DR+ DR- subtype based on genes “NKG7”, “GZMA” (see Citefuse)

all_markers <- c("NKG7", "GZMA")
RidgePlot(pbmc, assay = "RNA", features = paste("hg19-", all_markers, sep=""), ncol = 2)