library(Linnorm)
library(SingleCellExperiment)
library(GSVA)
library(ggplot2)
library(factoextra)
library(NbClust)
library(reticulate)
library(PCAtools)
library(umap)
library(dplyr)
library(infercnv)
library(readtext)
library(D3GB)
library(GenomicRanges)
library(IRanges)
library(S4Vectors)
source("~/Supplemental_code/unCTC/CreateSingleCellObject.R")
source("~/Supplemental_code/unCTC/DDLK_Clust.R")
source("~/Supplemental_code/unCTC/Gene_Violin_plots.R")
source("~/Supplemental_code/unCTC/PathwayEnrichmentScore.R")
source("~/Supplemental_code/unCTC/Stouffer_score.R")
source("~/Supplemental_code/unCTC/unCTC_pathway_plots.R")
source("~/Supplemental_code/unCTC/unCTC_libraries.R")
source("~/Supplemental_code/unCTC/GroupsDiffGenes.R")
source("~/Supplemental_code/unCTC/GroupsDiffPathways.R")
source("~/Supplemental_code/unCTC/CNV_alterations.R")
#Create Expression data list
load("~/Supplemental_code/unCTC_datasets/Poonia_et_al._CountData.RData")
load("~/Supplemental_code/unCTC_datasets/Ding_et_al._WBC1_CountData.RData")
load("~/Supplemental_code/unCTC_datasets/Ding_et_al._WBC2_CountData.RData")
load("~/Supplemental_code/unCTC_datasets/Ebright_et_al._CountData.RData")
Poonia_et_al._CountmetaData = as.data.frame(rep("Poonia_et_al._CountData",ncol(Poonia_et_al._CountData)))
colnames(Poonia_et_al._CountmetaData) = "Data_source"
rownames(Poonia_et_al._CountmetaData) = colnames(Poonia_et_al._CountData)
Poonia_et_al._CountmetaData$Class = "CTC"
Ebright_et_al._CountmetaData = as.data.frame(rep("Ebright_et_al._CountData",ncol(Ebright_et_al._CountData)))
colnames(Ebright_et_al._CountmetaData) = "Data_source"
rownames(Ebright_et_al._CountmetaData) = colnames(Ebright_et_al._CountData)
Ebright_et_al._CountmetaData$Class = "CTC"
Ding_et_al._WBC1_CountmetaData= as.data.frame(rep("Ding_et_al._WBC1_CountData",ncol(Ding_et_al._WBC1_CountData)))
colnames(Ding_et_al._WBC1_CountmetaData) = "Data_source"
rownames(Ding_et_al._WBC1_CountmetaData) = colnames(Ding_et_al._WBC1_CountData)
Ding_et_al._WBC1_CountmetaData$Class = "WBC"
Ding_et_al._WBC2_CountmetaData =as.data.frame(rep("Ding_et_al._WBC2_CountData",ncol(Ding_et_al._WBC2_CountData)))
colnames(Ding_et_al._WBC2_CountmetaData) = "Data_source"
rownames(Ding_et_al._WBC2_CountmetaData) = colnames(Ding_et_al._WBC2_CountData)
Ding_et_al._WBC2_CountmetaData$Class = "WBC"
This package includes one geneset, which is taken from molecular signature database.
load("~/Supplemental_code/unCTC_datasets/c2.all.v7.2.symbols.RData")
#Create Expression data list
dataList = list(Poonia_et_al._CountData,Ebright_et_al._CountData,
Ding_et_al._WBC1_CountData,Ding_et_al._WBC2_CountData)
#Create Data Id's list
dataId = list("Poonia_et_al._CountData","Ebright_et_al._CountData",
"Ding_et_al._WBC1_CountData","Ding_et_al._WBC2_CountData")
#Create Meta data list
MetaData = list(Poonia_et_al._CountmetaData, Ebright_et_al._CountmetaData,
Ding_et_al._WBC1_CountmetaData, Ding_et_al._WBC2_CountmetaData )
#Genesets given with this package
genesets = c2.all.v7.2.symbols
PathwayEnrichmentScore uses the following steps:
Integrate data passed in the list based on common genes.
PathwayEnrichmentScore requires following inputs:
* data_list: List of expression data matrices
* data_id: List of expression data matrices’ name in the same order. *
Genesets: List of pathways * min.size: Minimum size of genes in
pathways/Genesets, Default is 10
* max.size: Maximum size of genes in pathways/Genesets, Default is
500
* min_Sample: filter out genes which are not expressedin at least
min_Sample cells, Default is 5.
* min_Gene: Filter out those cells which do not express at least
min_Gene genes, Default is 1500. * Parallel_threads : Number of threads
in parallel to execute process
invisible({capture.output({
Pathway_score = unCTC::PathwayEnrichmentScore(data_list =dataList,
data_id = dataId,
Genesets = genesets,
min.size=10,
max.size = 500,
min_Sample = 5,
min_Gene = 1500,
Parallel_threads=8L)
})})
For the above pathway enrichment score matrix, we calculate the number of clusters using the Elbow method.
#Retrive information about the version of python being used by reticulate
#reticulate::py_config()
#If version is different from the the given path then restart session and
#give path again can change path
DDLK_Clusters = DDLK_Clust(PathwayScore = Pathway_score$Pathway_score,
PathwayMetaData = Pathway_score$Pathway_metadata,
n = 4, MetaData = MetaData,
out.dir = getwd()
)
unCTC_plots Plots principal components of pathway enrichment score.
Required input for unCTC_plots method is:
plots = unCTC_pathway_plots(Pathway_score = DDLK_Clusters$Pathway_score,
Pathway_metadata = DDLK_Clusters$PathwayDDLK_clust,
colorby = "Data_id",
Color_cluster = "Clusters",
pairsplotLegend = "none")
plots$group_by_Class_PCA
plots$group_by_Cluster_PCA
plots$group_by_Class_umap
plots$group_by_Cluster_umap
library(ggplot2)
ggplot(DDLK_Clusters$PathwayDDLK_clust, aes(x=Clusters, fill = Class))+
theme_classic()+
geom_bar(stat="count")+
scale_color_manual()+
scale_fill_manual(
values = c("dodgerblue4","firebrick3","darkgreen","dark turquoise"))
print("ARI:")
## [1] "ARI:"
aricode::ARI(DDLK_Clusters$PathwayDDLK_clust$Clusters,DDLK_Clusters$PathwayDDLK_clust$Class)
## [1] 0.5495047
print("NMI:")
## [1] "NMI:"
aricode::NMI(DDLK_Clusters$PathwayDDLK_clust$Clusters,DDLK_Clusters$PathwayDDLK_clust$Class)
## [1] 0.4795896
print("ClusterPurity:")
## [1] "ClusterPurity:"
ClusterPurity <- function(clusters, classes) {
sum(apply(table(classes, clusters), 2, max)) / length(clusters)
}
ClusterPurity(DDLK_Clusters$PathwayDDLK_clust$Clusters,DDLK_Clusters$PathwayDDLK_clust$Class)
## [1] 0.9875098