library(Seurat)
#regions = "acc"
#regions = "cn"
#regions = "cer"

for (regions in c("acc","cn","cer","brainmap")){
#for (regions in c("acc")){


path = paste0("/uge_mnt/home/ilyak/SCrnaseq/brainmap/all_",regions,"/outs/filtered_gene_bc_matrices_mex")

cd1 = Read10X(data.dir = paste0(path,"/hg38/"))
cd2 = Read10X(data.dir = paste0(path,"/panTro5/"))
cd3 = Read10X(data.dir = paste0(path,"/panPan2/"))
cd4 = Read10X(data.dir = paste0(path,"/rheMac8/"))
cd = list(H=cd1, C=cd2, B=cd3, M=cd4)

orth <- as.matrix(read.delim("/uge_mnt/home/ilyak/SCrnaseq/brainmap/orth.sc.data.txt",header=T)) # obtained from Pasha
head(orth)
dim(orth)
ids <- list(H=orth[,1],C=orth[,1],B=orth[,1],M=orth[,1])
names(ids[["H"]]) <- orth[,5]
names(ids[["C"]]) <- orth[,6]
names(ids[["B"]]) <- orth[,7]
names(ids[["M"]]) <- orth[,8]
head(ids[["C"]])

for (i in names(ids)){
  print(dim(cd[[i]]))
  cd[[i]] <- cd[[i]][rownames(cd[[i]]) %in% names(ids[[i]]),]
  dimnames(cd[[i]])[[1]] <- ids[[i]][rownames(cd[[i]])]
  print(dim(cd[[i]]))
}

genes <- c()
for (i in names(ids)){
  tab <- table(rownames(cd[[i]]))
  genes <- c(genes,names(tab)[tab==1])
}
tab <- table(genes)
genes <- names(tab)[tab==4]
length(genes)

for (i in names(ids)){
  print(dim(cd[[i]]))
  cd[[i]] <- cd[[i]][genes,]
  print(dim(cd[[i]]))
}

colnames(cd[["H"]]) = paste0("H_",colnames(cd[["H"]]))
colnames(cd[["C"]]) = paste0("C_",colnames(cd[["C"]]))
colnames(cd[["B"]]) = paste0("B_",colnames(cd[["B"]]))
colnames(cd[["M"]]) = paste0("M_",colnames(cd[["M"]]))


#cd <- cbind(cd[["H"]],cd[["C"]],cd[["B"]],cd[["M"]])
#dim(cd)

#species <- unlist(lapply(strsplit(colnames(cd),"_"),function (x) x[1]))
#table(species)

#str(cd)

region.table = read.table(paste0("/uge_mnt/home/khrameeva/single-cell/bams/all_",regions,".cells"),
header=F,sep="\t")

#for (species.selected in c("H","C","B","M","ALL")){
for (species.selected in c("ALL")){
if (species.selected == "H"){
	print(species.selected)
	cd.species = cd[["H"]]
	cd.species = cd.species[,colnames(cd.species) %in% region.table[,1]]
}else if (species.selected == "C"){
	print(species.selected)
	cd.species = cd[["C"]]
	cd.species = cd.species[,colnames(cd.species) %in% region.table[,1]]
}else if (species.selected == "B"){
	print(species.selected)
	cd.species = cd[["B"]]
	cd.species = cd.species[,colnames(cd.species) %in% region.table[,1]]
}else if (species.selected == "M"){
	print(species.selected)
	cd.species = cd[["M"]]
	cd.species = cd.species[,colnames(cd.species) %in% region.table[,1]]
}else if (species.selected == "ALL"){
	print(species.selected)
	cd.species = cbind(cd[["H"]],cd[["C"]],cd[["B"]],cd[["M"]])
	cd.species = cd.species[,colnames(cd.species) %in% region.table[,1]]
}
}


seurat.brain <- CreateSeuratObject(count = cd.species, min.cells = round(dim(cd.species)[2]*0.001,0), min.features = 0, project = "10X_brain")
#saveRDS(seurat.brain,paste0("../seurat.initial.",regions,".rds"))


rownames(orth) = orth[,1]
orth1 = orth[rownames(seurat.brain@assays$RNA),]
mito.genes <- names(grep(pattern = "^hg38____MT-", x = orth1[,5], value = TRUE))
percent.mito <- Matrix::colSums(seurat.brain@assays$RNA[mito.genes, ])/Matrix::colSums(seurat.brain@assays$RNA)
seurat.brain <- AddMetaData(object = seurat.brain, metadata = percent.mito, col.name = "percent.mito")
#seurat.brain <- subset(x = seurat.brain, subset = nFeature_RNA > 50 & nFeature_RNA < 5000 & percent.mito < 0.05)

brain.list <- SplitObject(object = seurat.brain, split.by = "orig.ident")


for (i in 1:length(x = brain.list)) {
    brain.list[[i]] <- NormalizeData(object = brain.list[[i]], verbose = FALSE)
    brain.list[[i]] <- FindVariableFeatures(object = brain.list[[i]], 
        selection.method = "vst", nfeatures = 2000, verbose = FALSE)
}

reference.list <- brain.list[c("H","M","C","B")]

#for(int.dims in c(20,25,30,35,40,45,50)){
#for(int.dims in c(30)){

int.dims = 30

brain.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:int.dims)
brain.integrated <- IntegrateData(anchorset = brain.anchors, dims = 1:int.dims)
seurat.brain = brain.integrated


library(ggplot2)
library(cowplot)
# switch to integrated assay. The variable features of this assay are
# automatically set during IntegrateData
DefaultAssay(object = seurat.brain) <- "integrated"
# Run the standard workflow for visualization and clustering
seurat.brain <- ScaleData(object = seurat.brain, verbose = FALSE)

#for (regions in c("acc","cn","cer","brainmap")){
#seurat.brain = readRDS(paste0("../species.regions/seurat.after.align.",regions,".rds"))

pcs.stores = 60
seurat.brain <- RunPCA(object = seurat.brain, npcs = pcs.stores, verbose = FALSE)
seurat.brain <- ProjectDim(object = seurat.brain)
seurat.brain <- JackStraw(object = seurat.brain, dims = pcs.stores, num.replicate = 100)
seurat.brain <- ScoreJackStraw(object = seurat.brain, dims = 1:pcs.stores)
jack.object = JackStrawPlot(object = seurat.brain, dims = 1:pcs.stores)

for (j in 3:10){
threshold = j
p.vals <- as.numeric(as.numeric(unlist(strsplit(levels(unique(jack.object$data$PC.Score)),": "))[seq(2,2*pcs.stores,2)]))
dims.selected = (min(which(p.vals > 10^(-1*threshold)))-1)
if (dims.selected == Inf){
	dims.selected = pcs.stores
}


seurat.brain <- RunTSNE(object = seurat.brain, reduction = "pca", dims = 1:dims.selected,check_duplicates = FALSE)
p1 <- DimPlot(object = seurat.brain, reduction = "tsne", group.by = "orig.ident")

seurat.brain <- FindNeighbors(object = seurat.brain, dims = 1:dims.selected)
seurat.brain <- FindClusters(object = seurat.brain, resolution = 0.6)
p2 <- DimPlot(object = seurat.brain, reduction = "tsne")

#pdf(paste0("new.genes.",regions,".",int.dims,".",threshold,".pdf"),width=8,height=5)
png(paste0("new.genes.",regions,".",int.dims,".",threshold,".png"),res=600,pointsize=5.75,width=6400,height=4800)
t.plt <-plot_grid(p1, p2)
print(t.plt)
dev.off()
}

saveRDS(seurat.brain,paste0("seurat.with.jackstraw.",regions,".",int.dims,".rds"))
}
}


abc.frame = data.frame(Names = names(seurat.brain$orig.ident), Clusters = seurat.brain$integrated_snn_res.0.6)
write.table(abc.frame,paste0("../new.clusters.",regions,".txt"),quote=F,sep="\t",col.names = T,row.names = F)

   ct <- c("In","Ex","spiny.neurons","purkinje","astrocyte","oligodendrocyte.progenitor","microglia","vascular.endothelial.cells","oligodendrocyte","cajal.retzius","granule","immune")

   markers <- as.matrix(read.table("../markers_3sets.txt",header=T,sep="\t"))
   markers <- markers[markers[,3] %in% rownames(seurat.brain@assays$integrated),]
   cell.types <- unique(markers[,4])
   cell.types <- ct[ct %in% cell.types]
   print(cell.types)
   avg <- c()
   for (i in cell.types){
     gene.set <- markers[markers[,4]==i,3]
     print(i)
     print(gene.set)
     # Get mean expression of genes of interest per cell
     mtx <- seurat.brain@assays$integrated[c(gene.set,gene.set),]
     # Add mean expression values in ‘object@meta.data$gene.set.score’
     seurat.brain@meta.data[[i]] <- Matrix::colMeans(mtx)
     # Get mean expression of genes of interest over cells in each cluster
     mtx <- seurat.brain@assays$integrated[c(gene.set,gene.set),]
     avg.ct <- c()
     for (cl in unique(seurat.brain@active.ident)){
       avg.ct <- cbind(avg.ct, Matrix::rowMeans(mtx[,seurat.brain@active.ident==cl]))
     }
     dimnames(avg.ct)[[1]] <- c(gene.set,gene.set)
     dimnames(avg.ct)[[2]] <- unique(seurat.brain@active.ident)
     avg <- rbind(avg, avg.ct)
   }
   avg <- avg[unique(rownames(avg)),]


seurat.brain.markers <- FindAllMarkers(object = seurat.brain, only.pos = TRUE, test.use = "wilcox", min.pct = 0, return.thresh = 1)
write.table(seurat.brain.markers,paste0("../new.markers.",regions,".txt"),quote=F,sep="\t",col.names = T,row.names = F)


pdf(paste0("../new.markers.",regions,".pdf"),width=12,height = 12)
plot<-FeaturePlot(object = seurat.brain, features = cell.types)
plot
dev.off()


###Testing another set of genes for integration
drops.idx = apply(seurat.brain@assays$RNA@counts,1,function(x){sum(x==0)})
dropsH = apply(brain.list$H@assays$RNA@data,1,function(x){sum(x==0)})
dropsM = apply(brain.list$M@assays$RNA@data,1,function(x){sum(x==0)})
dropsC = apply(brain.list$C@assays$RNA@data,1,function(x){sum(x==0)})
dropsB = apply(brain.list$B@assays$RNA@data,1,function(x){sum(x==0)})

#for(gnum in seq(2000,6000,250)){
#  for (sel.dim1 in seq(20,50,5)){
#    for (sel.dim2 in seq(20,50,5)){
for(gnum in 3000){
  for (sel.dim1 in 30){
    for (sel.dim2 in 30){
      
test.genes = names(model)[which(model<gnum)]

top.genes = test.genes

d1 = dim(brain.list$H)[2]
d2 = dim(brain.list$M)[2]
d3 = dim(brain.list$C)[2]
d4 = dim(brain.list$B)[2]


sel.genes1 = top.genes[top.genes %in% names(dropsH)[which(dropsH<d1)]]
sel.genes2 = top.genes[top.genes %in% names(dropsM)[which(dropsM<d2)]]
sel.genes3 = top.genes[top.genes %in% names(dropsC)[which(dropsC<d3)]]
sel.genes4 = top.genes[top.genes %in% names(dropsB)[which(dropsB<d3)]]


anchor.set = intersect(intersect(sel.genes1,sel.genes2),intersect(sel.genes3,sel.genes4))

brain.list <- SplitObject(object = seurat.brain, split.by = "orig.ident")
for (i in 1:length(x = brain.list)) {
  brain.list[[i]] <- NormalizeData(object = brain.list[[i]], verbose = FALSE)
#  brain.list[[i]] <- FindVariableFeatures(object = brain.list[[i]],
#      selection.method = "vst", nfeatures = integ.feature, verbose = FALSE)
}

reference.list <- brain.list[c("H","M","C","B")]

brain.anchors <- FindIntegrationAnchors(object.list = reference.list, anchor.features = anchor.set, dims = 1:sel.dim1)
brain.integrated <- IntegrateData(anchorset = brain.anchors, dims = 1:sel.dim1)
library(ggplot2)
library(cowplot)
# switch to integrated assay. The variable features of this assay are
# automatically set during IntegrateData
DefaultAssay(object = brain.integrated) <- "integrated"
brain.integrated <- Seurat::ScaleData(object = brain.integrated, verbose = FALSE)

brain.integrated <- Seurat::RunPCA(object = brain.integrated, npcs = sel.dim2, verbose = FALSE)
brain.integrated <- Seurat::RunTSNE(object = brain.integrated, reduction = "pca",
dims = 1:sel.dim2,check_duplicates = FALSE)

pdf(paste0("~/Downloads/brain.align.",gnum,".",sel.dim1,".",sel.dim2,".pdf"),width=6,height=5)
p1 <- DimPlot(object = brain.integrated, reduction = "tsne", group.by = "orig.ident")
print(p1)
dev.off()

}
}
}