setwd("~/brainmap/seurat")
library(Seurat)
library(squash)
library(dplyr)
library(clusterProfiler)
library(gplots)
library(viridis)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)

regions <- c("acc","cn","cer")
species <- c("H","C","B","M")

rlist <- list()
for (r in regions){
  rlist[[r]] <- readRDS(paste0("Map.Separately/remapping_",toupper(r),"_none.rds"))
}

for (r in regions){
  rlist[[r]] <- FindNeighbors(object = rlist[[r]], dims = 1:30)
}

# # choose resolution based on the visual inspection of clusters
# for (i in 2:15){
#   res <- list(cn=0.05*i,cer=0.01*i,acc=0.06*i)
#   print(res)
# 
#   for (r in regions){
#     rlist[[r]] <- FindClusters(object = rlist[[r]], reduction.type = "pca", resolution = res[[r]], print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
#   
#     #dims.selected = 30
#     #rlist[[r]] <- RunTSNE(object = rlist[[r]], reduction = "pca", dims = 1:dims.selected, check_duplicates = FALSE)
#   
#     png(paste0("tsne.mapsep.res",res[[r]],".",r,".png"),res=600,pointsize=5.75,width=3400,height=3000)
#     plot1 <- DimPlot(object = rlist[[r]], reduction="tsne", pt.size=0.3, pch.use = 16)
#     print(plot1)
#     dev.off()
#   }
# }

# set here the chosen resolution
res <- list(cn=0.45,cer=0.09,acc=0.54)
for (r in regions){
  rlist[[r]] <- FindClusters(object = rlist[[r]], reduction.type = "pca", resolution = res[[r]], print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
}

cells <- lapply(rlist, function (x) x@active.ident)
save(cells,file="cells_manyClusters.forIlia.RData")

pbmc.markers <- list()
for (r in regions){
  # find markers for every cluster compared to all remaining cells, report
  # only the positive ones
  pbmc.markers[[r]] <- FindAllMarkers(object = rlist[[r]], assay="RNA", only.pos = TRUE, test.use = "wilcox", min.pct = 0, return.thresh = 1)
}

save(rlist,file="integrated.many_clusters.mapsep.RData")
save(pbmc.markers,file="pbmc.markers.many_clusters.mapsep.RData")

top10 <- list()
for (r in regions){
  # find markers for every cluster compared to all remaining cells, report
  # only the positive ones
  top10[[r]] <- pbmc.markers[[r]] %>% group_by(cluster) %>% top_n(10, avg_logFC)
}

top10.symb <- top10
for (r in regions){
  symb <- c()
  for(i in 1:nrow(top10[[r]])){
    gE <- top10[[r]]$gene[i]
    gS <- gE
    try(gS <- bitr(gE, fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL[1])
    symb <- c(symb,gS)
  }
  top10.symb[[r]]$gene <- symb
}

for (r in regions){
  gene.set <- top10[[r]]$gene
  mtx <- rlist[[r]]@assays$RNA@data[gene.set,]
  avg <- c()
  clusters <- unique(rlist[[r]]@active.ident)
  clusters <- clusters[order(as.numeric(clusters))]
  for (cl in clusters){
    avg <- cbind(avg, Matrix::rowMeans(mtx[,rlist[[r]]@active.ident==cl]))
  }
  dimnames(avg)[[1]] <- top10.symb[[r]]$gene
  dimnames(avg)[[2]] <- clusters
  #    avg <- log10(avg+0.001)
  avg[avg>2.5] <- 2.5
  print(avg)
  
  pdf(paste0("heatmap.many_clusters.mapsep.",r,".pdf"),pointsize=5.75,width=8,height=length(clusters)*2)
  pheatmap(avg,
           color=colorRampPalette(c("lightgray","blue"))(48),
           #breaks=10^seq(log10(min(overlap,na.rm=T)),log10(max(overlap,na.rm=T)),length.out=91),
           border_color=NA,
           fontsize=5.75*3,
           na_col="lightgray",
           gaps_row=(1:ncol(avg))*10,
           cluster_rows=F,
           cluster_cols=F,
           show_rownames=T)
  dev.off()
}

load("cells_manyClusters.forIlia.RData")
regions <- c("acc","cn","cer")
species <- c("H","C","B","M")

pdf("Ncells_per_many_clusters.pdf",width=7,height=3.5)
par(mfrow=c(1,3),las=1)
for (r in regions){
  sp.r <- unlist(lapply(strsplit(names(cells[[r]]), "_"), function (x) x[[1]]))
  ct.r <- cells[[r]]
  tab <- table(ct.r,sp.r)
  tab <- tab[,species]
  tab <- apply(tab,1,function (x) x/sum(x)*100)
  barplot(tab,horiz=T,col=c("red","blue","#800080","forestgreen"),main=r)
}
dev.off()

col.species <- c("red","blue","#800080","forestgreen")
names(col.species) <- c("H","C","B","M")

pdf("Ncells_batches_per_many_clusters.pdf",width=9,height=7)
par(mfrow=c(3,4),las=2)
for (r in regions){
  sp.r <- unlist(lapply(strsplit(names(cells[[r]]), "_"), function (x) x[[1]]))
  batch.r <- unlist(lapply(strsplit(names(cells[[r]]), "-"), function (x) x[[2]]))
  batch.r <- batches[[r]][batch.r]
  ct.r <- cells[[r]]
  for (s in species){
    tab <- table(ct.r[sp.r==s],batch.r[sp.r==s])
    tab <- apply(tab,2,function (x) x/sum(x)*100)
    sds <- apply(tab,1,sd)
    means <- apply(tab,1,mean)
    bars <- barplot(means,col=col.species[s],main=s,ylab="Percentage of cells",ylim=c(0,100))
    arrows(bars,means-sds,bars,means+sds,length = 0)
    stripchart(as.data.frame(t(tab)),add=T,at=bars,vertical=T,pch=21,bg=rgb(0.5,0.5,0.5,0.5),cex=1)
  }
}
dev.off()

# mito clusters:
mito <- list(acc=6,cer=2,cn=5)

no.mito <- list()
for (r in regions){
  no.mito[[r]] <- names(cells[[r]])[cells[[r]]!=mito[[r]]]
}
lapply(cells,length)
lapply(no.mito,length)

save(no.mito,file="cells.nomito.RData")

load("integrated.many_clusters.mapsep.RData")
for (r in regions){
  rlist[[r]] <- SubsetData(rlist[[r]],cells=no.mito[[r]])
}
save(rlist,file="integrated.many_clusters.nomito.RData")

load("integrated.mapsep.RData")
for (r in regions){
  rlist[[r]] <- SubsetData(rlist[[r]],cells=no.mito[[r]])
}
save(rlist,file="integrated.nomito.RData")
