setwd("~/brainmap/seurat")
library(Seurat)
library(squash)
library(dplyr)
library(clusterProfiler)
library(DOSE)
library(pheatmap)
library(viridis)

load("integrated.nomito.RData")
rlist.all <- rlist

regions <- c("acc","cn","cer")
species <- c("H","C","B","M")

load("anova.v2.Rdata")

rlist <- list()
for (s in species){
  rlist[[s]] <- list()
}
for (r in regions){
  for (s in species){
    sel = names(rlist.all[[r]]@active.ident)[grep(paste0(s,"_"),names(rlist.all[[r]]@active.ident))]
    print(head(sel))
    print(length(sel))
    rlist[[s]][[r]] <- SubsetData(rlist.all[[r]], cells = sel)
    # remove genes that do not survive ANOVA
    rlist[[s]][[r]]@assays$RNA@data <- rlist[[s]][[r]]@assays$RNA@data[pvals[[r]]<0.05,]
  }
}
rm(rlist.all)

load("cells.nomito.RData")

# calculate human/macaque ratios for each gene
ratio <- list()
for (r in regions){
  # real bulk
  ratio[[r]] <- list()
  expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
  expr <- expr[rownames(expr) %in% rownames(rlist[["H"]][[r]]@assays$RNA@data),]
  expr <- expr[rownames(expr) %in% rownames(rlist[["C"]][[r]]@assays$RNA@data),]
  ratio[[r]][["B"]] <- expr[,"H"]-((expr[,"C"]+expr[,"B"])/2)
}
for (r in regions){
  # per cell type
  for (i in names(cells[["H"]][[r]])){
    print(i)
    bulk <- list()
    for (s in species){
      cells.i <- cells[[s]][[r]][[i]]
      x <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data[,cells.i])
      x <- log10(x+0.001)
      #x <- x - median(x)
      bulk[[s]] <- x[names(ratio[[r]][["B"]])]
    }
    ratio[[r]][[i]] <- bulk[["H"]]-((bulk[["C"]]+bulk[["B"]])/2)
  }
}
for (r in regions){
  # synthetic bulk
  all.cells <- list()
  for (s in species){
    all.cells[[s]] <- unlist(cells[[s]][[r]])
  }
  bulk <- list()
  for (s in species){
    x <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data[,all.cells[[s]]])
    x <- log10(x+0.001)
    #x <- x - median(x)
    bulk[[s]] <- x[names(ratio[[r]][["B"]])]
  }
  ratio[[r]][["S"]] <- bulk[["H"]]-((bulk[["C"]]+bulk[["B"]])/2)
}

for (r in regions){
  print(lapply(ratio[[r]],length))
}

pdf("overlap_human-spec_genes.nomito.pdf")
for (r in regions){
  overlap <- matrix(NA, length(ratio[[r]]), length(ratio[[r]]))
  dimnames(overlap)[[1]] <- paste(c("RNAseq", ct.names[[r]], "Synth"), unlist(lapply(ratio[[r]], function (x) sum(x>log10(10)))), sep=", n=")
  dimnames(overlap)[[2]] <- paste(c("RNAseq", ct.names[[r]], "Synth"), unlist(lapply(ratio[[r]], function (x) sum(x>log10(10)))), sep=", n=")
  for(i in 1:length(ratio[[r]])){
    for(j in 1:length(ratio[[r]])){
      if(i!=j){
        overlap[i,j] <- sum(ratio[[r]][[i]]>log10(10) & ratio[[r]][[j]]>log10(10)) / sum(ratio[[r]][[i]]>log10(10))
      }
    }
    print(i)
    print(sum(ratio[[r]][[i]]>log10(10)))
  }
  pheatmap(overlap,
           cluster_rows=F,
           cluster_cols=F,
           show_rownames=T,
           main=r)
}
dev.off()


load("integrated.nomito.RData")
de <- list()
for (r in regions){
  de[[r]] <- list()
  ident <- rlist[[r]]@active.ident
  for (i in unique(ident)){
    ident.i <- names(ident)[ident==i]
    H <- ident.i[grep(paste0("H","_"),ident.i)]
    C <- c(ident.i[grep(paste0("C","_"),ident.i)], ident.i[grep(paste0("B","_"),ident.i)])
    de[[r]][[i]] <- FindMarkers(object = rlist[[r]]@assays$RNA@data, cells.1=H, cells.2=C, test.use = "wilcox", min.pct = 0.1, logfc.threshold=log(2))
  }
}
# for (r in regions){
#   ident <- names(rlist[[r]]@active.ident)
#   H <- ident[grep(paste0("H","_"),ident)]
#   C <- c(ident[grep(paste0("C","_"),ident)], ident[grep(paste0("B","_"),ident)])
#   de[[r]][["S"]] <- FindMarkers(object = rlist[[r]]@assays$RNA@data, cells.1=H, cells.2=C, test.use = "wilcox", min.pct = 0.1, logfc.threshold=log(2))
# }
save(de,file="de.nomito.RData")

balance <- min(c( unlist(lapply(cells[["H"]], function (y) lapply(y,length)))),
       unlist(lapply(cells[["C"]], function (y) lapply(y,length)))
       + unlist(lapply(cells[["B"]], function (y) lapply(y,length))))
balance

nsub <- 100
de.sub <- list()
for (r in regions){
  de.sub[[r]] <- list()
  ident <- rlist[[r]]@active.ident
  for (i in unique(ident)){
    print(paste(r,i))
    ident.i <- names(ident)[ident==i]
    H <- ident.i[grep(paste0("H","_"),ident.i)]
    C <- c(ident.i[grep(paste0("C","_"),ident.i)], ident.i[grep(paste0("B","_"),ident.i)])
    de.sub[[r]][[i]] <- list()
    for (b in 1:nsub){
      print(b)
      H.b <- sample(H,balance,replace=F)
      C.b <- sample(C,balance,replace=F)
      de.sub[[r]][[i]][[b]] <- FindMarkers(object = rlist[[r]]@assays$RNA@data, cells.1=H.b, cells.2=C.b, test.use = "wilcox", min.pct = 0.1, logfc.threshold=log(2))
    }
  }
}
save(de.sub,file="de.sub.nomito.RData")

load("HC.RData")
HC.pval <- lapply(HC.pval, function (x) p.adjust(x,method="BH"))
lapply(HC.pval, function (x) sum(x<0.05))
pval <- list()
pval[["acc"]] <- HC.pval$`11 Cingulate Anterior (BA24)`
pval[["cn"]] <- HC.pval$`32 Caudate`
pval[["cer"]] <- HC.pval$`30 Cerebellar Grey Matter`
rnaseq <- list()
rnaseq[["acc"]] <- HC.logfc$`11 Cingulate Anterior (BA24)`
rnaseq[["cn"]] <- HC.logfc$`32 Caudate`
rnaseq[["cer"]] <- HC.logfc$`30 Cerebellar Grey Matter`

ct.names <- list()
ct.names[["acc"]] <- c("AC Ex","AC In","AC Ast","AC OD","AC OPC","AC MG")
ct.names[["cn"]] <- c("CN Neu","CN OD","CN Ast","CN In","CN OPC","CN MG")
ct.names[["cer"]] <- c("CB Neu","CB In","CB Ast","CB OD")

load("de.nomito.RData")
pdf("deconvolution_for_paper.nomito.pdf",width=7.5,height=17)
par(mfcol=c(6,3))
for (r in regions){
  rnaseq.r <- rnaseq[[r]][pval[[r]]<0.05]
  rnaseq.r <- names(rnaseq.r[abs(rnaseq.r)>log10(2)])
  for (i in names(de[[r]])[order(as.numeric(names(de[[r]])))]){
    de.i <- de[[r]][[i]]
    print(paste(r,i))
    de.i <- rownames(de.i)[de.i[,"p_val_adj"]<0.05]
    
    ovl <- de.i[de.i %in% rnaseq.r]
    corr <- round(cor(ratio[[r]][["B"]], ratio[[r]][[i]], method="s"), digits=2)
    plot(ratio[[r]][["B"]], ratio[[r]][[i]], xlab="RNA-seq", ylab=ct.names[[r]][as.numeric(i)+1], pch=".", main=c(paste("Spearman's rho =",corr), paste("Overlap:", length(ovl), "genes")))
    points(ratio[[r]][["B"]][rnaseq.r], ratio[[r]][[i]][rnaseq.r], pch=".", col="red")
    points(ratio[[r]][["B"]][de.i], ratio[[r]][[i]][de.i], col="blue", cex=0.5, lwd=0.5)
    #ovl.symbol <- bitr(ovl, fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL
    #try(text(ratio[[r]][["B"]][ovl], ratio[[r]][[i]][ovl], labels=ovl.symbol, cex=0.4, col="blue", pos=3, offset=0.1))
    abline(h=c(log10(2),-1*log10(2)),lty=2,lwd=0.5)
    abline(v=c(log10(2),-1*log10(2)),lty=2,lwd=0.5)
  }
}
dev.off()

load("de.nomito.RData")
pie <- list()
pdf("deconvolution_pie.nomito.pdf",width=5,height=5)
for (r in regions){
  pie[[r]] <- c()
  overlap <- matrix(NA, length(de[[r]])+1, length(de[[r]])+1)
  dimnames(overlap)[[1]] <- paste(c("RNAseq", ct.names[[r]]))
  dimnames(overlap)[[2]] <- paste(c("RNAseq", ct.names[[r]]))
  rnaseq.r <- rnaseq[[r]][pval[[r]]<0.05]
  rnaseq.r <- names(rnaseq.r[abs(rnaseq.r)>log10(2)])
  for (i in names(de[[r]])){
    de.i <- de[[r]][[i]]
    de.i <- rownames(de.i)[de.i[,"p_val_adj"]<0.05]
    ovl.i <- de.i[de.i %in% rnaseq.r]
    name.i <- ct.names[[r]][as.numeric(i)+1]
    if(length(ovl.i)>0){
      pie[[r]] <- rbind(pie[[r]], cbind(ovl.i, name.i))
    }
    overlap[, "RNAseq"] <- length(ovl.i)/length(rnaseq.r)*100
    overlap["RNAseq", name.i] <- length(ovl.i)/length(rnaseq.r)*100
    for (j in names(de[[r]])){
      de.j <- de[[r]][[j]]
      de.j <- rownames(de.j)[de.j[,"p_val_adj"]<0.05]
      ovl.j <- de.j[de.j %in% ovl.i]
      if(i!=j){
        name.j <- ct.names[[r]][as.numeric(j)+1]
        overlap[name.j, name.i] <- length(ovl.j)/length(rnaseq.r)*100
      }
    }
  }
  pheatmap(overlap,
           cluster_rows=F,
           cluster_cols=F,
           show_rownames=T,
           display_numbers=T,
           number_format="%.2f",
           main=r)
}
dev.off()

tabs <- list()
for (r in regions){
  tab <- table(pie[[r]][,1], pie[[r]][,2])
  names <- sapply(rownames(tab), function (x) try(bitr(x, fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL[1]))
  dimnames(tab)[[1]] <- names
  tabs[[r]] <- tab
}

tabs.ens <- list()
for (r in regions){
  tab <- table(pie[[r]][,1], pie[[r]][,2])
  tabs.ens[[r]] <- tab
}

pdf("pie_scatter.nomito.pdf",width=5,height=5)
for (r in regions){
  for (i in names(de[[r]])){
    print(i)
    corr <- round(cor(ratio[[r]][["B"]], ratio[[r]][[i]], method="s"), digits=2)
    ct <- ct.names[[r]][as.numeric(i)+1]
    plot(ratio[[r]][["B"]], ratio[[r]][[i]], xlab="RNA-seq", ylab=ct, pch=18, col="lightgray", main=corr)
    sel <- rownames(tabs.ens[[r]])[rowSums(tabs.ens[[r]])>1]
    points(ratio[[r]][["B"]][sel], ratio[[r]][[i]][sel], pch=18, col="dimgray")
    sel <- rownames(tabs.ens[[r]])[rowSums(tabs.ens[[r]])==1 & tabs.ens[[r]][,ct]==1]
    points(ratio[[r]][["B"]][sel], ratio[[r]][[i]][sel], pch=18, col="red")
  }
}
dev.off()

pdf("pie.nomito.pdf",width=9,height=3)
par(mfrow=c(1,3))
bars <- list()
for (r in regions){
  n <- length(rnaseq[[r]][pval[[r]]<0.05])
  tab <- tabs[[r]]
  ctypes <- c(colSums(tab[rowSums(tab)==1,])/n*100, sum(rowSums(tab)>1)/n*100)
  ctypes <- ctypes[ctypes>0]
  names(ctypes)[length(ctypes)] <- "universal"
  ctypes <- c(100-sum(ctypes), ctypes)
  names(ctypes)[1] <- "RNA-seq"
  pie(ctypes, main=c(r,n))
  bars[[r]] <- ctypes
}
dev.off()

pdf("new_differences.nomito.pdf",width=6,height=5)
par(mfrow=c(1,2),las=1)
newdiff <- list()
for (r in regions){
  for (i in names(de[[r]])){
    de.i <- de[[r]][[i]]
    print(paste(r,i))
    de.i <- rownames(de.i)[de.i[,"p_val_adj"]<0.05]
    newdiff[[r]] <- c(newdiff[[r]],de.i)
  }
}
n <- unlist(lapply(tabs,nrow))
print(n)
b <- barplot(n,ylab="Number of differences (snRNA-seq and RNA-seq)")
text(b,n,n,pos=1)

newdiff <- lapply(newdiff,unique)
n <- unlist(lapply(newdiff,length)) - unlist(lapply(tabs,nrow))
b <- barplot(n,ylab="Number of differences (snRNA-seq not RNA-seq)")
text(b,n,n,pos=1)
dev.off()

load("de.sub.nomito.RData")
nsub <- 100
pdf("new_differences_sub.nomito.pdf",width=3,height=5)
newdiff <- list()
newdiff.ct <- list()
for (r in regions){
  newdiff[[r]] <- list()
  newdiff.ct[[r]] <- list()
  for (i in names(de.sub[[r]])){
    for (b in 1:nsub){
      de.i <- de.sub[[r]][[i]][[b]]
      de.i <- rownames(de.i)[de.i[,"p_val_adj"]<0.05]
      newdiff[[r]][[i]] <- c(newdiff[[r]][[i]],de.i)
    }
  }
  newdiff[[r]] <- lapply(newdiff[[r]],table)
  newdiff[[r]] <- lapply(newdiff[[r]], function (x) x[!(names(x) %in% rownames(tabs.ens[[r]]))])
  newdiff[[r]] <- lapply(newdiff[[r]], function (x) x[x>=nsub*0.9])
  for (i in names(newdiff[[r]])){
    for (gene in names(newdiff[[r]][[i]])){
      newdiff.ct[[r]][[gene]] <- c(newdiff.ct[[r]][[gene]],i)
    }
  }
}
newdiff.ct <- sapply(names(newdiff.ct), function (x) table(unlist(lapply(newdiff.ct[[x]], function (y) {
  if(length(y)>1){
    "universal"
  } else{
    ct.names[[x]][as.numeric(y)+1]
  }
}))))
newdiff.ct[["acc"]] <- newdiff.ct[["acc"]][c("universal","AC In 1","AC Ex 0","AC Ast 2","AC OD 3","AC OPC 4","AC MG 5")]
newdiff.ct[["cn"]] <- newdiff.ct[["cn"]][c("universal","CN Pur 3","CN Pur 0","CN Ast 2","CN OD 1","CN OPC 4","CN MG 5")]
newdiff.ct[["cer"]] <- c(newdiff.ct[["cer"]][c("universal","CB In 1","CB Gr 0","CB Ast 2","CB OD 3")],0,0)
newdiff.ct
newdiff.ct <- sapply(regions, simplify=T, function (x) newdiff.ct[[x]]/sum(newdiff.ct[[x]],na.rm=T)*100)
write.table(newdiff.ct,"new_differences_sub.nomito.txt")
newdiff.ct[!is.finite(newdiff.ct)] <- 0
barplot(newdiff.ct,ylab="Percentage of differences (snRNA-seq not RNA-seq)",col=c("dimgray","#B02325","#CE6928","#208C43", "#9F5DA5","#F16EAA","#4A87C6"))
dev.off()



##### functional enrichment analysis of new differences #####
load("de.nomito.RData")
newdiff <- list()
newdiff.ct <- list()
for (r in regions){
  newdiff[[r]] <- list()
  newdiff.ct[[r]] <- list()
  for (i in names(de[[r]])){
    de.i <- de[[r]][[i]]
    de.i <- rownames(de.i)[de.i[,"p_val_adj"]<0.05]
    newdiff[[r]][[i]] <- c(newdiff[[r]][[i]],de.i)
  }
  newdiff[[r]] <- lapply(newdiff[[r]], function (x) x[!(x %in% rownames(tabs.ens[[r]]))])
  for (i in names(newdiff[[r]])){
    for (gene in newdiff[[r]][[i]]){
      newdiff.ct[[r]][[gene]] <- c(newdiff.ct[[r]][[gene]],i)
    }
  }
}
newdiff.ct <- sapply(names(newdiff.ct), function (x) unlist(lapply(newdiff.ct[[x]], function (y) {
  if(length(y)>1){
    "universal"
  } else{
    ct.names[[x]][as.numeric(y)+1]
  }
})))

bg <- lapply(rlist, function (x) rownames(x@assays$RNA@data))
bg <- lapply(bg, function (x) bitr(x, fromType="ENSEMBL", toType="ENTREZID", OrgDb="org.Hs.eg.db")$ENTREZID)

go <- list()
for (r in regions){
  genelist <- tapply(names(newdiff.ct[[r]]), newdiff.ct[[r]], function (x) x, simplify=F)
  genelist <- lapply(genelist, function (x) bitr(x, fromType="ENSEMBL", toType="ENTREZID", OrgDb="org.Hs.eg.db")$ENTREZID)
  print(is.array(genelist))
  go[[r]] <- compareCluster(genelist, fun="enrichGO", ont="BP", OrgDb='org.Hs.eg.db', universe=bg[[r]], pvalueCutoff=0.05, pAdjustMethod="BH")
}

reg.names <- list(acc="AC",cn="CN",cer="CB")

plots <- sapply(regions, function (r) {
  p <- list()
  p[["GO"]] <- dotplot(go[[r]],showCategory = 20,title=reg.names[[r]])
  p
}, simplify=F)

library(cowplot)
pdf("new_differences_enrich_0.05.nomito.pdf",width=11,height=15)
plot_grid(plots$acc$GO, plots$cn$GO, plots$cer$GO, nrow=3, rel_heights = c(2,1,1))
dev.off()

##########

pdf("pie_bars.nomito.pdf",width=9,height=4)
par(mfrow=c(1,2),las=1)
summary <- unlist(lapply(bars, function (x) x["RNA-seq"]))
names(summary) <- c("AC","CN","CB")
summary <- rbind(summary,100-summary)
summary
barplot(summary,col=c("lightgray","#800080"),ylab="Percentage of changes within RNA-seq data")
summary <- lapply(bars, function (x) x[names(x)!="RNA-seq"])
summary
summary[["acc"]] <- summary[["acc"]][c("universal","AC In","AC Ex","AC Ast","AC OD","AC OPC","AC MG")]
summary[["cn"]] <- summary[["cn"]][c("universal","CN In","CN Neu","CN Ast","CN OD","CN OPC","CN MG")]
summary[["cer"]] <- c(summary[["cer"]][c("universal","CB In","CB Neu","CB Ast","CB OD")],0,0)
summary
summary <- as.matrix(as.data.frame(summary))
dimnames(summary)[[2]] <- c("AC","CN","CB")
barplot(summary,ylim=c(0,10),ylab="Percentage of changes within RNA-seq data",col=c("dimgray","#B02325","#CE6928","#208C43", "#9F5DA5","#F16EAA","#4A87C6"))
dev.off()

library(VennDiagram)
pdf("pie_venn.nomito.pdf",width=4,height=4)
par(mfrow=c(1,3),las=1)
for (r in regions){
  bulk <- length(rnaseq[[r]][pval[[r]]<0.05])
  cross <- nrow(tabs[[r]])
  new <- length(newdiff.ct[[r]])
  grid.newpage()
  draw.pairwise.venn(bulk,new+cross,cross)
}
dev.off()

pdf("pie_hists.nomito.pdf",width=5,height=5)
expr.un <- list()
expr.ct <- list()
for (r in regions){
  print(r)
  tab <- table(pie[[r]][,1], pie[[r]][,2])
  expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
  expr <- expr[rownames(expr) %in% rownames(tab),]
  expr <- abs(expr[,"H"]-((expr[,"C"]+expr[,"B"])/2))
  ctypes <- rownames(tab[rowSums(tab)==1,])
  print(length(ctypes))
  universal <- rownames(tab[rowSums(tab)>1,])
  print(length(universal))
  expr.ct[[r]] <- expr[ctypes]
  expr.un[[r]] <- expr[universal]
}
boxplot(expr.ct,outline=F,boxwex=0.4,yaxt="n",col="lightgray",horizontal=T,notch=T,ylim=c(0.3,1.7),xlim=c(0.5,4),xlab="log10 Homo/Pan expression difference (RNA-seq)")
#stripchart(expr.ct,pch=16,add=T)
boxplot(expr.un,outline=F,boxwex=0.4,yaxt="n",at=1:3+0.4,add=T,col="dimgray",horizontal=T,notch=T)
#stripchart(expr.un,at=1:3+0.4,pch=16,add=T)
axis(2,at=1:3+0.2,labels=c("AC","CN","CB"),las=1)
legend("top",c("cell-type sp.","universal diff."),fill=c("lightgray","dimgray"),bty="n",ncol=2)
sapply(names(expr.un), function (x) t.test(expr.un[[x]],expr.ct[[x]],alternative = "g")$p.value)
dev.off()

