setwd("~/brainmap/seurat")
library(Seurat)
library(squash)
library(dplyr)
library(clusterProfiler)
library(gplots)
library(viridis)
library(RColorBrewer)
library(ggplot2)

regions <- c("acc","cn","cer")
species <- c("H","C","B","M")

rlist <- list()
for (r in regions){
  rlist[[r]] <- readRDS(paste0("Map.Separately/remapping_",toupper(r),"_none.rds"))
}

# common <- list()
# for (r in regions){
#   common[[r]] <- as.matrix(read.delim(paste0(r,".common_cells.srong.txt"),header=F))
# }
# 
# for (r in regions){
#   rlist[[r]] <- Seurat::SubsetData(rlist[[r]],cells=common[[r]][,6])
# }

for (r in regions){
  rlist[[r]] <- FindNeighbors(object = rlist[[r]], dims = 1:30)
}

res <- list(cn=0.05,cer=0.01,acc=0.06)
res

for (r in regions){
  rlist[[r]] <- FindClusters(object = rlist[[r]], reduction.type = "pca", resolution = res[[r]], print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
}

#save(rlist,file="integrated.mapsep.RData")
load("integrated.mapsep.RData")

cells <- lapply(rlist, function (x) x@active.ident)
save(cells,file="cells.forIlia.RData")

r <- "acc"
png(paste0("tsne.mapsep.",r,".png"),res=600,pointsize=5.75,width=3400,height=3000)
plot1 <- DimPlot(object = rlist[[r]], reduction="tsne", cols=c("#CE6928","#B02325","#208C43", "#9F5DA5","#F16EAA","#4A87C6"), pt.size=0.3, pch.use = 16)
print(plot1)
dev.off()

r <- "cn"
png(paste0("tsne.mapsep.",r,".png"),res=600,pointsize=5.75,width=3400,height=3000)
plot1 <- DimPlot(object = rlist[[r]], reduction="tsne", cols=c("#CE6928","#9F5DA5","#208C43", "#B02325","#F16EAA","#4A87C6"), pt.size=0.3, pch.use = 16)
print(plot1)
dev.off()

r <- "cer"
png(paste0("tsne.mapsep.",r,".png"),res=600,pointsize=5.75,width=3400,height=3000)
plot1 <- DimPlot(object = rlist[[r]], reduction="tsne", cols=c("#CE6928","#B02325","#208C43", "#9F5DA5"), pt.size=0.3, pch.use = 16)
print(plot1)
dev.off()

for (r in regions){
  png(paste0("tsne.mapsep.species.",r,".png"),res=600,pointsize=5.75,width=3400,height=3000)
  plot1 <- DimPlot(object = rlist[[r]], pt.size=0.05, cols=c("#800080","blue","red","forestgreen"), reduction = "tsne", group.by = "orig.ident")
  print(plot1)
  dev.off()
}

##### all regions together #####
pbmc = readRDS(paste0("Map.Separately/filter.remapping_BRAINMAP_none_2000_30.lower.bound.none.rds"))

png("tsne.ALL.species.cols.mapsep.png",res=600,pointsize=5.75,width=4800,height=4800)
DimPlot(object = pbmc,pt.size=0.05, cols=c("#800080","blue","red","forestgreen"), reduction = "tsne", group.by = "orig.ident",legend='none')
dev.off()

numbers.v2 = as.numeric(unlist(strsplit(names(pbmc@active.ident),"-"))[seq(2,2*length(names(pbmc@active.ident)),2)])
 selected.regions = c()
 for (i in 1:length(numbers.v2)){
   if (numbers.v2[i] %in% 1:6){
     selected.regions = c(selected.regions, "Cer")
   }else if(numbers.v2[i] %in% 7:12){
     selected.regions = c(selected.regions, "Cn")
   }else if(numbers.v2[i] %in% 13:17){
     selected.regions = c(selected.regions, "Acc")
   }
 }

 pbmc <- AddMetaData(object = pbmc, metadata = selected.regions, col.name = "regions")
 png("tsne.ALL.region.cols.mapsep.png",res=600,pointsize=5.75,width=4800,height=4800)
 DimPlot(object = pbmc,pt.size=0.05, cols=c("#FF9801","#26A59A","#1665C0"), reduction = "tsne", group.by = "regions",legend='none')
 dev.off()
 
# rlist[["brainmap"]] <- pbmc
##########

ct <- c("In","Ex","Sn","Pur","OPC","Ast","OD","CR","MG","VEC","Gr")
#ct <- unique(markers[,"Type"])

avgs <- list()
cell.types <- list()
ct.ids <- list()
for (r in regions){
  markers <- as.matrix(read.table("markers_3sets.txt",header=T,sep="\t"))
  markers <- markers[markers[,3] %in% rownames(rlist[[r]]@assays$RNA@data),]
  cell.types[[r]] <- unique(markers[,4])
  cell.types[[r]] <- ct[ct %in% cell.types[[r]]]
  avg <- c()
  for (i in cell.types[[r]]){
    gene.set <- markers[markers[,4]==i,3]
    print(i)
    # Get mean expression of genes of interest per cell
    mtx <- rlist[[r]]@assays$RNA@data[c(gene.set,gene.set),]
    # Add mean expression values in ???object@meta.data$gene.set.score???
    rlist[[r]][[i]] <- log10(Matrix::colMeans(mtx)+0.001)
    print(min(rlist[[r]][[i]]))
    print(max(rlist[[r]][[i]]))
    # Get mean expression of genes of interest over cells in each cluster
    avg.ct <- c()
    for (cl in unique(rlist[[r]]@active.ident)){
      avg.ct <- cbind(avg.ct, Matrix::rowMeans(mtx[,rlist[[r]]@active.ident==cl]))
    }
    dimnames(avg.ct)[[1]] <- c(gene.set,gene.set)
    dimnames(avg.ct)[[2]] <- unique(rlist[[r]]@active.ident)
    avg <- rbind(avg, avg.ct)
  }
  avg <- avg[unique(rownames(avg)),]
  avg <- log10(avg+0.001)
  
  symb <- c()
  for(i in 1:nrow(avg)){
    gE <- rownames(avg)[i]
    gS <- gE
    try(gS <- bitr(gE, fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL[1])
    symb <- c(symb,gS)
  }
  dimnames(avg)[[1]] <- symb
  print(avg)
  
  ord <- apply(avg,2,which.max)
  ids <- sapply(rownames(avg)[ord],function (x) markers[markers[,2]==x&markers[,4] %in% cell.types[[r]],4])
  ids <- paste(ids,names(ord))
  names(ids) <- names(ord)
  ct.ids[[r]] <- ids
  dimnames(avg)[[2]] <- ids
  avg <- avg[,order(ord)]
  
  avg[10^avg>1] <- log10(1)
  avgs[[r]] <- avg
}
save(ct.ids,file="ct.ids.mapsep.RData")
save(avgs,file="avgs.mapsep.RData")

markers <- as.matrix(read.table("markers_3sets.txt",header=T,sep="\t"))
markers <- markers[markers[,3] %in% unlist(lapply(rlist, function (x) rownames(x@assays$RNA@data))),]

expressed.markers <- c()
for (i in ct){
  gene.set <- markers[markers[,4]==i,3]
  for(j in gene.set){
    gS <- bitr(j, fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL[1]
    expressed.markers <- c(expressed.markers,gS)
  }
}
expressed.markers

combined <- matrix(NA,sum(unlist(lapply(avgs,ncol))),length(expressed.markers))
dimnames(combined)[[1]] <- unlist(sapply(names(avgs),function (x) paste(x,colnames(avgs[[x]]))))
dimnames(combined)[[2]] <- expressed.markers
combined
for (r in regions){
  print(combined[paste(r,colnames(avgs[[r]])),rownames(avgs[[r]])])
  print(avgs[[r]])
  combined[paste(r,colnames(avgs[[r]])),rownames(avgs[[r]])] <- t(avgs[[r]])
}
combined

library(pheatmap)
pdf("avgHeatmap.mapsep.pdf",width=8,height=6)
#heatmap.2(10^combined,Rowv=F,Colv=F,adjCol=c(NA,0.5),dendrogram="none",density.info="none",scale="none",trace="none",symbreaks=F,symkey=F,keysize=2,key.title="",colsep=cumsum(table(markers[,4])[ct]),col=colorRampPalette(c("lightgray","blue"))(24))
pheatmap(10^combined,
         color=colorRampPalette(c("lightgray","blue"))(48),
         #breaks=10^seq(log10(min(overlap,na.rm=T)),log10(max(overlap,na.rm=T)),length.out=91),
         border_color=NA,
         fontsize=5.75*3,
         fontsize_row=7.92*3,
         na_col="lightgray",
         gaps_row=c(6,12),
         gaps_col=cumsum(table(markers[,4])[ct]),
         cluster_rows=F,
         cluster_cols=F,
         show_rownames=T)
dev.off()

for (r in regions){
  for (i in cell.types[[r]]){
    p <- FeaturePlot(object = rlist[[r]], features = i)
    p$data <- p$data[order(p$data[[3]]),]
    
    max.exp <- max(p$data[[3]])
    p$layers[[1]]$mapping$fill <- p$layers[[1]]$mapping$colour
    bg <- rgb(0.9,0.9,0.9)
    
    png(paste0("mapsep/",r,".",i,".nolegend.png"),res=600,pointsize=5.75,width=1500,height=1700)
    p <- p + scale_color_gradientn(colours = c(bg, "blue"), guide = F, limits = c(-3, max.exp), na.value = bg) + scale_fill_gradientn(colours = c(bg, "blue"), name = expression(atop(Expression, (log))), limits = c(-3, max.exp), na.value = bg) + theme_void() + theme(legend.position="none")
    p$layers[[1]]$aes_params$size = 0.001
    print(p)
    dev.off()
  }
}

# for (r in regions){
#   i <- "Ast"
#   p <- FeaturePlot(object = rlist[[r]], features = i)
#   p$data <- p$data[order(p$data[[3]]),]
#   
#   max.exp <- max(p$data[[3]])
#   p$layers[[1]]$mapping$fill <- p$layers[[1]]$mapping$colour
#   bg <- rgb(0.9,0.9,0.9)
#   png(paste0("markers_new/",r,".empty.nolegend.png"),res=600,pointsize=5.75,width=1500,height=1700)
#   p <- p + scale_color_gradientn(colours = c(bg, bg), guide = F, limits = c(-3, max.exp), na.value = bg) + scale_fill_gradientn(colours = c(bg, bg), name = expression(atop(Expression, (log))), limits = c(-3, max.exp), na.value = bg) + theme_void() + theme(legend.position="none")
#   p$layers[[1]]$aes_params$size = 0.001
#   print(p)
#   dev.off()
# }
# 
# png(paste0("markers_new/legend.png"),res=600,pointsize=5.75,width=1500,height=1700)
# p <- p + scale_color_gradientn(colours = c(bg, "blue"), guide = F, limits = c(-3, max.exp), na.value = bg) + scale_fill_gradientn(colours = c(bg, "blue"), name = expression(atop(Expression, (log))), limits = c(-3, max.exp), na.value = bg) + theme_void()
# p$layers[[1]]$aes_params$size = 0.001
# print(p)
# dev.off()

pbmc.markers <- list()
for (r in regions){
  # find markers for every cluster compared to all remaining cells, report
  # only the positive ones
  pbmc.markers[[r]] <- FindAllMarkers(object = rlist[[r]], assay="RNA", only.pos = TRUE, test.use = "wilcox", min.pct = 0, return.thresh = 1)
}

save(pbmc.markers,file="pbmc.markers.mapsep.RData")


top10 <- list()
for (r in regions){
    # find markers for every cluster compared to all remaining cells, report
    # only the positive ones
    top10[[r]] <- pbmc.markers[[r]] %>% group_by(cluster) %>% top_n(10, avg_logFC)
}

top10.symb <- top10
for (r in regions){
    symb <- c()
    for(i in 1:nrow(top10[[r]])){
      gE <- top10[[r]]$gene[i]
      gS <- gE
      try(gS <- bitr(gE, fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL[1])
      symb <- c(symb,gS)
    }
    top10.symb[[r]]$gene <- symb
}

for (r in regions){
    gene.set <- top10[[r]]$gene
    mtx <- rlist[[r]]@assays$RNA@data[gene.set,]
    avg <- c()
    clusters <- unique(rlist[[r]]@active.ident)
    clusters <- clusters[order(as.numeric(clusters))]
    for (cl in clusters){
      avg <- cbind(avg, Matrix::rowMeans(mtx[,rlist[[r]]@active.ident==cl]))
    }
    dimnames(avg)[[1]] <- top10.symb[[r]]$gene
    dimnames(avg)[[2]] <- clusters
#    avg <- log10(avg+0.001)
    avg[avg>2.5] <- 2.5
    print(avg)
  
    pdf(paste0("heatmap.mapsep.",r,".pdf"),pointsize=5.75,width=8,height=length(clusters)*2)
    pheatmap(avg,
           color=colorRampPalette(c("lightgray","blue"))(48),
           #breaks=10^seq(log10(min(overlap,na.rm=T)),log10(max(overlap,na.rm=T)),length.out=91),
           border_color=NA,
           fontsize=5.75*3,
           na_col="lightgray",
           gaps_row=(1:ncol(avg))*10,
           cluster_rows=F,
           cluster_cols=F,
           show_rownames=T)
    dev.off()
}

load("cells.forIlia.RData")
regions <- c("acc","cn","cer")
species <- c("H","C","B","M")

batches <- list()
batches[["acc"]] <- c("a","b","b","c","c")
names(batches[["acc"]]) <- c("1","2","3","4","5")
batches[["cn"]] <- c("a","a","b","b","c","c")
names(batches[["cn"]]) <- c("1","2","3","4","5","6")
batches[["cer"]] <- c("a","a","b","b","c","c")
names(batches[["cer"]]) <- c("1","2","3","4","5","6")

pdf("Ncells_per_clusters.pdf",width=7,height=3.5)
par(mfrow=c(1,3),las=1)
for (r in regions){
  sp.r <- unlist(lapply(strsplit(names(cells[[r]]), "_"), function (x) x[[1]]))
  ct.r <- cells[[r]]
  tab <- table(ct.r,sp.r)
  tab <- tab[,species]
  tab <- apply(tab,1,function (x) x/sum(x)*100)
  barplot(tab,horiz=T,col=c("red","blue","#800080","forestgreen"),main=r)
}
dev.off()

col.species <- c("red","blue","#800080","forestgreen")
names(col.species) <- c("H","C","B","M")

ct.names <- list()
ct.names[["acc"]] <- c("AC Ex","AC In","AC Ast","AC OD","AC OPC","AC MG")
ct.names[["cn"]] <- c("CN Neu","CN OD","CN Ast","CN In","CN OPC","CN MG")
ct.names[["cer"]] <- c("CB Neu","CB In","CB Ast","CB OD")

pdf("Ncells_batches.pdf",width=7,height=7)
par(mfrow=c(3,4),las=2)
for (r in regions){
  sp.r <- unlist(lapply(strsplit(names(cells[[r]]), "_"), function (x) x[[1]]))
  batch.r <- unlist(lapply(strsplit(names(cells[[r]]), "-"), function (x) x[[2]]))
  batch.r <- batches[[r]][batch.r]
  ct.r <- cells[[r]]
  ct.r <- as.numeric(ct.r)
  ct.r <- ct.names[[r]][ct.r]
  for (s in species){
    print(s)
    tab <- table(ct.r[sp.r==s],batch.r[sp.r==s])
    tab <- apply(tab,2,function (x) x/sum(x)*100)
    sds <- apply(tab,1,sd)
    means <- apply(tab,1,mean)
    bars <- barplot(means,col=col.species[s],main=s,ylab="Percentage of cells",ylim=c(0,100))
    arrows(bars,means-sds,bars,means+sds,length = 0)
    stripchart(as.data.frame(t(tab)),add=T,at=bars,vertical=T,pch=21,bg=rgb(0.5,0.5,0.5,0.5),cex=1)
    
    neurons <- rownames(tab)[c( grep(rownames(tab),pattern = "Neu"), grep(rownames(tab),pattern = "Ex"), grep(rownames(tab),pattern = "In") )]
    glia <- rownames(tab)[!(rownames(tab) %in% neurons)]
    tab.neurons <- colSums(tab[neurons,])
    tab.glia <- colSums(tab[glia,])
    print(mean(tab.glia/tab.neurons))
  }
}
dev.off()

colors <- list(acc=c("#CE6928","#B02325","#208C43", "#9F5DA5","#F16EAA","#4A87C6"), cn=c("#CE6928","#9F5DA5","#208C43", "#B02325","#F16EAA","#4A87C6"), cer=c("#CE6928","#B02325","#208C43", "#9F5DA5"))

pdf("Ncells_per_species.pdf",width=7,height=3.5)
par(mfrow=c(1,3),las=1)
for (r in regions){
  sp.r <- unlist(lapply(strsplit(names(cells[[r]]), "_"), function (x) x[[1]]))
  ct.r <- cells[[r]]
  tab <- table(ct.r,sp.r)
  tab <- tab[,species]
  print(tab)
  tab <- apply(tab,2,function (x) x/sum(x)*100)
  barplot(tab,horiz=F,main=r,col=colors[[r]])
}
dev.off()

q()



load("integrated.RData")

markers <- as.matrix(read.table("nuc.seq.genes.txt",header=T,sep="\t"))
clusters <- as.matrix(read.table("nuc.seq.clusters.txt",header=T,sep="\t",row.names=1))
clusters
head(markers)
cell.types <- unique(markers[,"Cluster.ID"])
cell.types
for (r in regions){
  avg <- c()
  for (i in cell.types){
    gene.set <- markers[markers[,"Cluster.ID"]==i,"Gene.ID"]
    print(i)
    print(length(gene.set))
    ens <- rbind(as.matrix(bitr(gene.set, fromType="ALIAS", toType="ENSEMBL", OrgDb="org.Hs.eg.db")),
                 as.matrix(bitr(gene.set, fromType="SYMBOL", toType="ENSEMBL", OrgDb="org.Hs.eg.db")))
    gene.set <- gene.set[!(gene.set %in% ens[,1])]
    print(gene.set)
    gene.set <- unlist(strsplit(gene.set,"\\d$",perl=TRUE))
    print(gene.set)
    ens2 <- rbind(as.matrix(bitr(gene.set, fromType="ALIAS", toType="ENSEMBL", OrgDb="org.Hs.eg.db")),
                 as.matrix(bitr(gene.set, fromType="SYMBOL", toType="ENSEMBL", OrgDb="org.Hs.eg.db")))
    gene.set <- unique(c(ens[,2],ens2[,2]))
    print(length(gene.set))
    gene.set <- gene.set[gene.set %in% rownames(rlist[[r]]@assays$integrated)]
    # Get mean expression of genes of interest per cell
    mtx <- rlist[[r]]@assays$integrated[c(gene.set,gene.set),]
    # Add mean expression values in ???object@meta.data$gene.set.score???
    rlist[[r]][[clusters[as.numeric(i),"Name"]]] <- Matrix::colMeans(mtx)
    # Get mean expression of genes of interest over cells in each cluster
    avg.ct <- c()
    for (cl in unique(rlist[[r]]@active.ident)){
      avg.ct <- cbind(avg.ct, Matrix::rowMeans(mtx[,rlist[[r]]@active.ident==cl]))
    }
    dimnames(avg.ct)[[1]] <- c(gene.set,gene.set)
    dimnames(avg.ct)[[2]] <- unique(rlist[[r]]@active.ident)
    avg <- rbind(avg, avg.ct)
  }
  avg <- avg[unique(rownames(avg)),]
  #avg <- log10(avg+0.001)
  pdf(paste0(r,".nuc.seq.avgHeatmap.pdf"),pointsize=5.75,width=2,height=3)
  heatmap.2(avg,Rowv=F,Colv=F,dendrogram="none",density.info="none",scale="none",trace="none",symbreaks=F,symkey=F,keysize=2,key.title="",col=colorRampPalette(c("darkblue","lightgray","darkred"))(24))
  dev.off()
}
for (r in regions){
  # Plot mean expression using Seurat::FeaturePlot()
  png(paste0(r,".nuc.seq.nolegend.png"),res=600,pointsize=5.75,width=11000,height=4500)
  plot <- FeaturePlot(object = rlist[[r]], features = clusters[as.numeric(cell.types),"Name"], ncol=5, pt.size=0.001, cols=c(rgb(1,1,1,0),rgb(0,0,1,1)))
  print(plot)
  dev.off()
}
