load("/home/sgao30/BITFAM/BITFAM/data/example.RData")
dim(data_matrix_normalized)
library(ggplot2)
library(data.table)
library(Seurat)
### input your data ###
dat <- fread("/Shang_PHD/khetani/scRNASeq_BITFAM/DATA1/GSE124395_Normalhumanlivercellatlasdata.txt")
dat <- as.data.frame(dat)
rownames(dat) <- dat$V1
dat <- dat[, -1]
clusters <- read.table("/Shang_PHD/khetani/scRNASeq_BITFAM/DATA1/GSE124395_clusterpartition.txt", stringsAsFactors = F, header = T)
clusters_sub <- clusters$sct.cpart
names(clusters_sub) <- rownames(clusters)
clusters_sub <- clusters_sub[clusters_sub %in% c(c(11,14,17,30),c(2,6,31,25,23), c(4,7,24,39), c(9,13,20))]
# Hepo            #Kupffer           # epcam+      # LSEC
df <- data.frame(clusters_sub, row.names = names(clusters_sub))
df$cell_types <- "o"
df$cell_types <- ifelse(df$clusters_sub %in% c(11,14,17,30), "Hepatocytes", df$cell_types)
df$cell_types <- ifelse(df$clusters_sub %in% c(2,6,31,25,23), "Kupffer cell", df$cell_types)
df$cell_types <- ifelse(df$clusters_sub %in% c(4,7,24,39), "EPCAM+", df$cell_types)
df$cell_types <- ifelse(df$clusters_sub %in% c(9,13,20), "LSEC", df$cell_types)
### make a seurat object ###
seuset_data <- CreateSeuratObject(counts = dat[,names(clusters_sub)])
seuset_data <- NormalizeData(object = seuset_data)
seuset_data <- FindVariableFeatures(object = seuset_data, nfeatures = 5000) ## ??
### generate the Y matrix ###
data_matrix_normalized <- t(as.matrix(GetAssayData(object = seuset_data)[VariableFeatures(seuset_data), ]))
head(rownames(data_matrix_normalized))
data_matrix_normalized <- t(data_matrix_normalized)
head(rownames(data_matrix_normalized))
save(data_matrix_normalized, file = "/home/sgao30/BITFAM/BITFAM/data/example.RData")
head(df)
write.table(df, "/home/sgao30/backup/BITFAM/inst/extdata/liver_cell_type.txt", sep = "\t", quote = F)
