setwd("~/brainmap/seurat")
library(Seurat)
library(squash)
library(dplyr)
library(clusterProfiler)
library(pheatmap)
library(viridis)

#load("markers.rlist.ALL.RData")
load("integrated.nomito.RData")
rlist.all <- rlist

regions <- c("acc","cn","cer")
species <- c("H","C","B","M")

rlist <- list()
for (s in species){
  rlist[[s]] <- list()
}
for (r in regions){
  for (s in species){
    sel = names(rlist.all[[r]]@active.ident)[rlist.all[[r]]$orig.ident==s]
    print(length(sel))
    rlist[[s]][[r]] <- SubsetData(rlist.all[[r]], cells = sel)
  }
}
rm(rlist.all)

pbmc.markers <- list()
for (s in species){
  print(s)
  pbmc.markers[[s]] <- list()
  for (r in regions){
    print(r)
    pbmc.markers[[s]][[r]] <- FindAllMarkers(object = rlist[[s]][[r]], assay="RNA", only.pos = TRUE, test.use = "wilcox", min.pct = 0, return.thresh = 1)
  }
}

save(pbmc.markers,file="pbmc.markers.nomito.byspec.RData")

load("ct.ids.nomito.RData")

common.markers <- list()
for (r in regions){
  common.markers[[r]] <- list()
  for (ct in names(ct.ids[[r]])){
    print(c(r,ct))
    common.markers[[r]][[ct]] <- matrix(NA, 4, 4)
    dimnames(common.markers[[r]][[ct]])[[1]] <- species
    dimnames(common.markers[[r]][[ct]])[[2]] <- species
    for (s1 in species){
      for (s2 in species){
        markers.s1 <- pbmc.markers[[s1]][[r]]$gene[pbmc.markers[[s1]][[r]]$cluster==as.numeric(ct) & pbmc.markers[[s1]][[r]]$p_val_adj<0.00001]
        markers.s2 <- pbmc.markers[[s2]][[r]]$gene[pbmc.markers[[s2]][[r]]$cluster==as.numeric(ct) & pbmc.markers[[s2]][[r]]$p_val_adj<0.00001]
        common.markers[[r]][[ct]][s1,s2] <- sum(markers.s1 %in% markers.s2)/length(unique(c(markers.s1,markers.s2)))
      }
    }
    print(common.markers[[r]][[ct]])
  }
}

cells <- list()
for (s in species){
  cells[[s]] <- list()
}
for (r in regions){
  for (s in species){
    print(s)
    cells[[s]][[r]] <- list()
    tab <- table(rlist[[s]][[r]]@active.ident)
    print(tab)
    for (i in names(tab)){
      cells.i <- names(rlist[[s]][[r]]@active.ident)[rlist[[s]][[r]]@active.ident==as.numeric(i)]
      cells[[s]][[r]][[as.character(i)]] <- cells.i
    }
  }
}

load("ct.ids.nomito.RData")

ct.names <- list()
for (r in regions){
  ct.names[[r]] <- ct.ids[[r]][order(as.numeric(names(ct.ids[[r]])))]
}
ct.names[["acc"]] <- paste("AC", ct.names[["acc"]])
ct.names[["cn"]] <- paste("CN", ct.names[["cn"]])
ct.names[["cer"]] <- paste("CB", ct.names[["cer"]])

save(cells,ct.names,file="cells.mapsep.RData")

ncells <- lapply(cells, function (x) lapply(x, function (y) lapply(y,length)))
ncells

write.table(
  as.data.frame(lapply(ncells, function (x) unlist(as.data.frame(x)))), file = "ncells.txt"
)

ncells <- min(unlist( ncells ))
ncells

ncells <- 50

# number of genes expressed in multiple cell types
for (r in regions){
  out <- c()
  for (i in names(cells[["H"]][[r]])){
    cells.i <- cells[["H"]][[r]][[i]]
    out <- cbind(out, Matrix::rowMeans(rlist[["H"]][[r]]@assays$RNA@data[,cells.i]))
  }
  dim(out)
  quantile(out,0:10/10)
  n <- apply(out,1, function (x) sum(x>0.005))
  hist(n)
  print(sum(n>1)/nrow(out))
}


##### Human-specificity in each cell cluster #####

n <- 100
balance <- ncells
HS <- list()
PS <- list()
genes <- list()
for (r in regions){
  print(r)
  HS[[r]] <- list()
  PS[[r]] <- list()
  genes[[r]] <- list()
  all.med <- list()
  for (s in species){
    all.med[[s]] <- median(log10(Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)+0.001),na.rm=T)
    print(all.med[[s]])
  }
  # bootstrapping cells
  for (i in names(cells[["H"]][[r]])){
    print(i)
    HS[[r]][[i]] <- list(C=list(),B=list())
    PS[[r]][[i]] <- list(C=list(),B=list())
    genes[[r]][[i]] <- list(C=list(),B=list())
    for (th in 2){
      key <- as.character(th)
      HS[[r]][[i]][["C"]][[key]] <- c()
      HS[[r]][[i]][["B"]][[key]] <- c()
      PS[[r]][[i]][["C"]][[key]] <- c()
      PS[[r]][[i]][["B"]][[key]] <- c()
      genes[[r]][[i]][["C"]][[key]] <- c()
      genes[[r]][[i]][["B"]][[key]] <- c()
    }
    for (b in 1:n){
      bulk <- list()
      for (s in species){
        cells.i <- cells[[s]][[r]][[i]]
        boot <- sample(cells.i,balance,replace=T)
        x <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data[,c(boot,boot)])
        #x <- x/(10^all.med[[s]])
        x <- log10(x+0.001)
        bulk[[s]] <- x
      }
      bulk.HCM <- log10( abs(bulk[["H"]]-bulk[["M"]]) + 0.001) - log10( abs(bulk[["C"]]-bulk[["M"]]) + 0.001)
      bulk.HBM <- log10( abs(bulk[["H"]]-bulk[["M"]]) + 0.001) - log10( abs(bulk[["B"]]-bulk[["M"]]) + 0.001)
      for (th in 2){
        key <- as.character(th)
        HS[[r]][[i]][["C"]][[key]] <- c(HS[[r]][[i]][["C"]][[key]], sum(bulk.HCM>log10(th)))
        PS[[r]][[i]][["C"]][[key]] <- c(PS[[r]][[i]][["C"]][[key]], sum(bulk.HCM<log10(th)*(-1)))
        HS[[r]][[i]][["B"]][[key]] <- c(HS[[r]][[i]][["B"]][[key]], sum(bulk.HBM>log10(th)))
        PS[[r]][[i]][["B"]][[key]] <- c(PS[[r]][[i]][["B"]][[key]], sum(bulk.HBM<log10(th)*(-1)))
        genes[[r]][[i]][["C"]][[key]] <- rbind(genes[[r]][[i]][["C"]][[key]], bulk.HCM)
        genes[[r]][[i]][["B"]][[key]] <- rbind(genes[[r]][[i]][["B"]][[key]], bulk.HBM)
      }
    }
  }
}
save(HS,PS,genes,file="hspec.mapsep.Rdata")

load("hspec.mapsep.Rdata")
# for(r in regions){
#   for (i in 1:length(ct.names[[r]])){
#     avg <- (colMeans(genes[[r]][[i]][["C"]][["2"]])+colMeans(genes[[r]][[i]][["B"]][["2"]]))/2
#     write.table(avg,file=paste0("gene_listsHS/",ct.names[[r]][[i]],".txt"),col.names=F,quote=F,sep="\t")
#   }
# }

pdf(paste0("hspec.mapsep",ncells,".boxplot.integrated.pdf"),width=10,height=5.6)
par(mfrow=c(1,2))
for (r in regions){
  for (th in 2){
    th <- as.character(th)
    B <- lapply(HS[[r]],function (x) x[["B"]][[th]])
    C <- lapply(HS[[r]],function (x) x[["C"]][[th]])
    boxplot(C,names=ct.names[[r]],las=2,border="blue",notch=T,outpch=".",ylim=c(0,3500),main=paste0(r,", th=",th),xlab="human-specific",ylab="# specific genes",boxwex=0.4)
    boxplot(B,names=ct.names[[r]],las=2,border="#800080",notch=T,outpch=".",add=T,at=1:length(names(B))+0.4,boxwex=0.4)
    B <- lapply(PS[[r]],function (x) x[["B"]][[th]])
    C <- lapply(PS[[r]],function (x) x[["C"]][[th]])
    boxplot(C,names=ct.names[[r]],las=2,border="blue",notch=T,outpch=".",ylim=c(0,3500),main=paste0(r,", th=",th),xlab="pan-specific",ylab="# specific genes",boxwex=0.4)
    boxplot(B,names=ct.names[[r]],las=2,border="#800080",notch=T,outpch=".",add=T,at=1:length(names(B))+0.4,boxwex=0.4)
  }
}
dev.off()
pdf(paste0("hspec.mapsep",ncells,".relative.integrated.pdf"),width=10,height=5.6)
par(mfrow=c(1,2))
for (r in regions){
  for (th in 2){
    th <- as.character(th)
    B <- lapply(HS[[r]],function (x) x[["B"]][[th]])
    C <- lapply(HS[[r]],function (x) x[["C"]][[th]])
    B <- lapply(B,function (x) log2(x/median(B$bP)))
    C <- lapply(C,function (x) log2(x/median(C$bP)))
    boxplot(C,names=ct.names[[r]],las=2,border="blue",notch=T,outpch=".",ylim=c(-1.5,1.5),main=paste0(r,", th=",th),xlab="Human-specific",ylab="Evolutionary rate",boxwex=0.4)
    boxplot(B,names=ct.names[[r]],las=2,border="#800080",notch=T,outpch=".",add=T,at=1:length(names(B))+0.4,boxwex=0.4)
    abline(h=0,lty=2)
    B <- lapply(PS[[r]],function (x) x[["B"]][[th]])
    C <- lapply(PS[[r]],function (x) x[["C"]][[th]])
    B <- lapply(B,function (x) log2(x/median(B$bP)))
    C <- lapply(C,function (x) log2(x/median(C$bP)))
    boxplot(C,names=ct.names[[r]],las=2,border="blue",notch=T,outpch=".",ylim=c(-1.5,1.5),main=paste0(r,", th=",th),xlab="Pan-specific",ylab="Evolutionary rate",boxwex=0.4)
    boxplot(B,names=ct.names[[r]],las=2,border="#800080",notch=T,outpch=".",add=T,at=1:length(names(B))+0.4,boxwex=0.4)
    abline(h=0,lty=2)
  }
}
dev.off()
pdf(paste0("hspec.mapsep",ncells,".ratio.integrated.pdf"),width=10,height=4.5)
par(mfcol=c(1,3))
for (r in regions){
  for (th in 2){
    th <- as.character(th)
    Bh <- lapply(HS[[r]],function (x) x[["B"]][[th]])
    Ch <- lapply(HS[[r]],function (x) x[["C"]][[th]])
    Bp <- lapply(PS[[r]],function (x) x[["B"]][[th]])
    Cp <- lapply(PS[[r]],function (x) x[["C"]][[th]])
    Cratio <- sapply(names(Ch), simplify=F, function (x) log2(Ch[[x]]+1) - log2(Cp[[x]]+1))
    Bratio <- sapply(names(Bh), simplify=F, function (x) log2(Bh[[x]]+1) - log2(Bp[[x]]+1))
    boxplot(Cratio,names=ct.names[[r]],las=2,border="blue",notch=T,outpch=".",ylim=c(-0.5,1),main=paste0(r,", th=",th),ylab="log2( #hum-spec / #pan-spec )",boxwex=0.4)
    boxplot(Bratio,names=ct.names[[r]],las=2,border="#800080",notch=T,outpch=".",add=T,at=1:length(names(Bp))+0.4,boxwex=0.4)
    abline(h=0,lty=2)
  }
}
dev.off()
pdf(paste0("hspec.mapsep",ncells,".barplot.integrated.pdf"),width=12,height=4)
par(mfrow=c(1,3))
for (r in regions){
  for (th in 2){
    th <- as.character(th)
    B <- lapply(HS[[r]],function (x) x[["B"]][[th]])
    C <- lapply(HS[[r]],function (x) x[["C"]][[th]])
    means <- rbind(unlist(lapply(C,mean)), unlist(lapply(B,mean)))
    sds <- rbind(unlist(lapply(C,sd)), unlist(lapply(B,sd)))
    bars <- barplot(means,col=c("blue","#800080"),beside=T,main=paste(r,th),ylab="# specific genes",ylim=c(-1.2*max(means+sds),1.2*max(means+sds)))
    arrows(bars,means+sds,bars,means-sds,length=0)
    B <- lapply(PS[[r]],function (x) x[["B"]][[th]])
    C <- lapply(PS[[r]],function (x) x[["C"]][[th]])
    means <- rbind(unlist(lapply(C,mean)), unlist(lapply(B,mean)))
    sds <- rbind(unlist(lapply(C,sd)), unlist(lapply(B,sd)))
    bars <- barplot(-1*means,col=c("blue","#800080"),beside=T,add=T)
    arrows(bars,-1*(means+sds),bars,-1*(means-sds),length=0)
  }
}
dev.off()
avg.rate <- c()
pdf(paste0("hspec.mapsep",ncells,".relativeBarplot.integrated.pdf"),width=9,height=4)
par(mfrow=c(1,3),las=2)
for (r in regions){
  for (th in 2){
    th <- as.character(th)
    Bh <- lapply(HS[[r]],function (x) x[["B"]][[th]])
    Ch <- lapply(HS[[r]],function (x) x[["C"]][[th]])
    Bp <- lapply(PS[[r]],function (x) x[["B"]][[th]])
    Cp <- lapply(PS[[r]],function (x) x[["C"]][[th]])
    all <- sapply(names(Bh),simplify=F,function (x) c(Bh[[x]],Ch[[x]],Bp[[x]],Cp[[x]]))
    all[["P"]] <- NULL
    all[["bP"]] <- NULL
    norm <- mean(unlist(all))
    print(norm)
    all <- lapply(all,function (x) log2(x/norm))
    means <- unlist(lapply(all,mean))
    names(means) <- unlist(lapply(strsplit(ct.names[[r]], " "), function (x) x[[2]]))
    avg.rate <- rbind(avg.rate, cbind(ct.names[[r]], means))
    sds <- unlist(lapply(all,sd))
    bars <- barplot(means,border="#4000C0",col="#4000C048",main=r,ylab="Evolutionary rate",ylim=c(-0.6,0.6))
    arrows(bars,means+sds,bars,means-sds,length=0,col="#4000C0")
  }
}
avg.rate
for (r in regions){
  for (th in 2){
    th <- as.character(th)
    B <- lapply(HS[[r]],function (x) x[["B"]][[th]])
    C <- lapply(HS[[r]],function (x) x[["C"]][[th]])
    Bh <- lapply(B,function (x) log2(x/median(B$bP)))
    Ch <- lapply(C,function (x) log2(x/median(C$bP)))
    B <- lapply(PS[[r]],function (x) x[["B"]][[th]])
    C <- lapply(PS[[r]],function (x) x[["C"]][[th]])
    Bp <- lapply(B,function (x) log2(x/median(B$bP)))
    Cp <- lapply(C,function (x) log2(x/median(C$bP)))
    all <- sapply(names(Bh),simplify=F,function (x) c(Bh[[x]],Ch[[x]],Bp[[x]],Cp[[x]]))
    all[["P"]] <- NULL
    all[["bP"]] <- NULL
    bxp <- boxplot(all,border="#4000C0",col="#4000C048",notch=T,outpch=".",ylim=c(-1.5,0.5),main=r,ylab="Evolutionary rate",xaxt="n")
    axis(1,at=1:length(ct.names[[r]]),labels=unlist(lapply(strsplit(ct.names[[r]], " "), function (x) x[[2]])))
    abline(h=0,lty=2)
  }
}
dev.off()

save(avg.rate,file="avg.evol.rate.mapsep.RData")
##########


##### Human-specificity in each cell cluster for human-specific genes from 33 regions #####

ncells <- 50
load("cells.mapsep.RData")
load("hspec.mapsep.Rdata")
load("human-spec_ProtCoding_2.0.pdf.RData")
hs.HB <- lapply(hs.HB,function (x) names(x)[x>log2(2)] )
hs.HC <- lapply(hs.HC,function (x) names(x)[x>log2(2)] )

o <- as.matrix(read.delim("order_EBnames.txt",header=F,row.names=1))[,1]
o

balance <- ncells
th <- 2

HS <- list()
for (br in o){
  HS[[br]] <- list()
}
for (r in regions){
  print(r)
  for (br in o){
    HS[[br]][[r]] <- list()
  }
  all.med <- list()
  for (s in species){
    all.med[[s]] <- median(log10(Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)+0.001),na.rm=T)
    print(all.med[[s]])
  }
  # bootstrapping cells
  for (i in names(cells[["H"]][[r]])){
    print(i)
    for (br in o){
      HS[[br]][[r]][[i]] <- list(C=list(),B=list())
      HS[[br]][[r]][[i]][["C"]] <- c()
      HS[[br]][[r]][[i]][["B"]] <- c()
    }
    for (b in 1:1000){
      print(b)
      bulk <- list()
      for (s in species){
        cells.i <- cells[[s]][[r]][[i]]
        boot <- sample(cells.i,balance,replace=T)
        x <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data[,boot])
        x <- log10(x+0.001)
        bulk[[s]] <- x
      }
      bulk.HCM <- log10( abs(bulk[["H"]]-bulk[["M"]]) + 0.001) - log10( abs(bulk[["C"]]-bulk[["M"]]) + 0.001)
      bulk.HBM <- log10( abs(bulk[["H"]]-bulk[["M"]]) + 0.001) - log10( abs(bulk[["B"]]-bulk[["M"]]) + 0.001)
      
      for (br in o){
        HS[[br]][[r]][[i]][["C"]] <- c(HS[[br]][[r]][[i]][["C"]], sum(bulk.HCM>log10(th) & names(bulk.HCM) %in% sample(hs.HC[[br]],400)))
        HS[[br]][[r]][[i]][["B"]] <- c(HS[[br]][[r]][[i]][["B"]], sum(bulk.HBM>log10(th) & names(bulk.HBM) %in% sample(hs.HB[[br]],400)))
      }
    }
  }
}

means <- c()
medians <- c()
ratios.allreg <- list()
for (br in o){
  ratios <- list()
  ratios.allreg[[br]] <- list()
  for (r in regions){
    Bh <- lapply(HS[[br]][[r]],function (x) x[["B"]])
    Ch <- lapply(HS[[br]][[r]],function (x) x[["C"]])
    ratios[[r]] <- sapply(names(Ch), simplify=F, function (x) c(Ch[[x]], Bh[[x]]))
  }
  ratios.allreg[[br]][["Neu"]] <- c(ratios[["acc"]][["0"]], ratios[["acc"]][["1"]], ratios[["cn"]][["0"]], ratios[["cn"]][["3"]], ratios[["cer"]][["0"]], ratios[["cer"]][["1"]])
  ratios.allreg[[br]][["Ast"]] <- c(ratios[["acc"]][["2"]], ratios[["cn"]][["2"]], ratios[["cer"]][["2"]])
  ratios.allreg[[br]][["OD"]] <- c(ratios[["acc"]][["3"]], ratios[["cn"]][["1"]], ratios[["cer"]][["3"]])
  ratios.allreg[[br]][["OPC"]] <- c(ratios[["acc"]][["4"]], ratios[["cn"]][["4"]])
  ratios.allreg[[br]][["MG"]] <- c(ratios[["acc"]][["5"]], ratios[["cn"]][["5"]])
  
  means <- rbind(means, unlist(lapply(ratios.allreg[[br]], mean)))
  medians <- rbind(medians, unlist(lapply(ratios.allreg[[br]], median)))
}
dimnames(means)[[1]] <- o
dimnames(means)[[2]] <- names(ratios.allreg[[br]])
dimnames(medians)[[1]] <- o
dimnames(medians)[[2]] <- names(ratios.allreg[[br]])

ratios.allreg <- lapply(ratios.allreg, function (x) x[["Neu"]])
#ratios.allreg <- sapply(rev(o), simplify=F, function (x) ratios.allreg[[x]]-mean(means[,"Neu"]))

hcol <- colorRampPalette(c("#1565C0","white","#C62828"))(94)
hcol <- hcol[c(1:45,50:94)]
#br <- seq(-0.1,0.1,length.out=91)
#br[1] <- -0.2
#br[91] <- 0.2

#means <- apply(means,2, function (x) x-mean(x))

pdf(paste0("hspec.HS.33regions.integrated.mergedCT.mapsep.pdf"),width=5,height=6)
par(mar=c(2,0,0,13.3)+0.5, las=1)
plot(means[rev(o),"Neu"],1:33,pch=16,yaxt="n")
points(medians[rev(o),"Neu"],1:33,pch=16,col="blue")
v <- mean(means[c("30 Cerebellar Grey Matter","32 Caudate","11 Cingulate Anterior (BA24)"),"Neu"])
v
abline(v=v,lty=2)
v <- mean(medians[c("30 Cerebellar Grey Matter","32 Caudate","11 Cingulate Anterior (BA24)"),"Neu"])
v
abline(v=v,lty=2,col="blue")
axis(4,at=1:33,labels = rev(o))
legend("bottomleft",c("mean","median"),col=c("black","blue"),pch=16,bty="n")
par(mar=c(2,0,0,13.3)+0.5, las=1)
boxplot(ratios.allreg,outline=F,range=0.5,at=33:1,horizontal=T,yaxt="n")
abline(v=v,lty=2,col="blue")
points(means[rev(o),"Neu"],1:33,pch=16,col="blue")
axis(4,at=1:33,labels = rev(o))
br <- seq(78,92,length.out=91)
pheatmap(means,
         color=hcol,
         #         breaks=br,
         na_col="lightgray",
         cluster_rows=F,
         cluster_cols=F,
         show_rownames=T)
pheatmap(means[,c("Neu","Neu")],
         color = colorRampPalette(c("#1565C0","white","#C62828"))(90),
         breaks=br,
         na_col="lightgray",
         cluster_rows=F,
         cluster_cols=F,
         show_rownames=T)
dev.off()

pdf(paste0("hspec.HS.33regions.integrated.mapsep.pdf"),width=9,height=5)
par(mfcol=c(1,3))
medians <- c()
for (br in o){
  medians.br <- c()
  for (r in regions){
    Bh <- lapply(HS[[br]][[r]],function (x) x[["B"]])
    Ch <- lapply(HS[[br]][[r]],function (x) x[["C"]])
    ratio <- sapply(names(Ch), simplify=F, function (x) c(Ch[[x]],Bh[[x]]))
    boxplot(ratio,xaxt="n",border="blue",notch=T,outpch=".",main=c(br,r),ylab="#hum-spec genes")
    medians.br <- c(medians.br,unlist(lapply(ratio,mean)))
    abline(h=0,lty=2)
    axis(1,at=1:length(names(Bh)),labels=unlist(lapply(strsplit(ct.names[[r]]," "), function (x) x[[2]])))
  }
  medians <- rbind(medians,medians.br)
}
dimnames(medians)[[1]] <- o
dimnames(medians)[[2]] <- c(ct.names[["acc"]],ct.names[["cn"]],ct.names[["cer"]])
medians
pheatmap(medians,
         na_col="lightgray",
         gaps_col=c(6,12),
         cluster_rows=F,
         cluster_cols=F,
         show_rownames=T)
dev.off()

pdf(paste0("hspec.HS.33regions.integrated.corr.mapsep.pdf"),width=5,height=5)
corr <- cor(medians)
names <- c("AC In","AC Ex","CN Neu","CN In","CB Neu","CB In",
           "AC Ast","CN Ast","CB Ast",
           "AC OD","CN OD","CB OD",
           "AC MG","CN MG",
           "AC OPC","CN OPC")
corr <- corr[names,names]
br <- seq(min(corr),max(corr[corr<1]),length.out=91)
br[1] <- 0
br[91] <- 1
pheatmap(corr,
         color=rev(inferno(90)),
         breaks=br,
         na_col="lightgray",
         gaps_col=c(6,9,12,14),
         gaps_row=c(6,9,12,14),
         cluster_rows=F,
         cluster_cols=F,
         show_rownames=T)
dev.off()

##########

##### Overlap with human-specific genes from 33 regions #####

load("human-spec_ProtCoding_2.0.pdf.RData")
hs.HB <- lapply(hs.HB,function (x) names(x)[x>log2(2)] )
hs.HC <- lapply(hs.HC,function (x) names(x)[x>log2(2)] )

o <- as.matrix(read.delim("order_EBnames.txt",header=F,row.names=1))[,1]
o

load("genes.mapsep.RData")

#n <- min(c(unlist(lapply(hs.HB, length)), unlist(lapply(hs.HC, length))))
n <- 100
jaccC <- c()
jaccB <- c()
for (br in o){
  jaccC.br <- c()
  jaccB.br <- c()
  for (r in regions){
    print(r)
    bootC <- bootstraps[["C"]][[r]]
    bootB <- bootstraps[["B"]][[r]]
    for (i in colnames(bootC)){
      print(i)
      nucseq <- rownames(bootC)[order(bootC[,i],decreasing=T)]
      nucseq <- nucseq[1:n]
      rnaseq <- hs.HC[[br]]
      jacc <- c()
      for(b in 1:100){
        rnaseq.b <- sample(rnaseq,n)
        jacc <- c(jacc, sum(nucseq %in% rnaseq.b)/length(unique(c(nucseq,rnaseq.b))) )
      }
      jaccC.br <- c(jaccC.br, mean(jacc))
      
      nucseq <- rownames(bootB)[order(bootB[,i],decreasing=T)]
      nucseq <- nucseq[1:n]
      rnaseq <- hs.HB[[br]]
      jacc <- c()
      for(b in 1:100){
        rnaseq.b <- sample(rnaseq,n)
        jacc <- c(jacc, sum(nucseq %in% rnaseq.b)/length(unique(c(nucseq,rnaseq.b))) )
      }
      jaccB.br <- c(jaccB.br, mean(jacc))
    }
  }
  jaccC <- rbind(jaccC,jaccC.br)
  jaccB <- rbind(jaccB,jaccB.br)
}
dimnames(jaccC)[[1]] <- o
dimnames(jaccC)[[2]] <- c(ct.names[["acc"]],ct.names[["cn"]],ct.names[["cer"]])
dimnames(jaccB)[[1]] <- o
dimnames(jaccB)[[2]] <- c(ct.names[["acc"]],ct.names[["cn"]],ct.names[["cer"]])

jacc.final <- (jaccC+jaccB)/2


pdf(paste0("hspec.overlap.33regions.mapsep.pdf"),width=9,height=5)
pheatmap(jacc.final,
         na_col="lightgray",
         gaps_col=c(6,12),
         cluster_rows=F,
         cluster_cols=F,
         show_rownames=T)
dev.off()

pdf(paste0("hspec.overlap.33regions.corr.mapsep.pdf"),width=5,height=5)
corr <- cor(jacc.final)
names <- c("AC In","AC Ex","CN Neu","CN In","CB Neu","CB In",
           "AC Ast","CN Ast","CB Ast",
           "AC OD","CN OD","CB OD",
           "AC MG","CN MG",
           "AC OPC","CN OPC")
corr <- corr[names,names]
pheatmap(corr,
         na_col="lightgray",
         gaps_col=c(6,9,12,14),
         gaps_row=c(6,9,12,14),
         cluster_rows=F,
         cluster_cols=F,
         show_rownames=T)
dev.off()


neu.markers <- unique(c(
  rownames(bootstraps[["C"]][["acc"]])[bootC[,"0"]>th],
  rownames(bootstraps[["C"]][["acc"]])[bootC[,"1"]>th],
  rownames(bootstraps[["C"]][["cn"]])[bootC[,"0"]>th],
  rownames(bootstraps[["C"]][["cn"]])[bootC[,"3"]>th],
  rownames(bootstraps[["C"]][["cer"]])[bootC[,"0"]>th],
  rownames(bootstraps[["C"]][["cer"]])[bootC[,"1"]>th],
  rownames(bootstraps[["B"]][["acc"]])[bootC[,"0"]>th],
  rownames(bootstraps[["B"]][["acc"]])[bootC[,"1"]>th],
  rownames(bootstraps[["B"]][["cn"]])[bootC[,"0"]>th],
  rownames(bootstraps[["B"]][["cn"]])[bootC[,"3"]>th],
  rownames(bootstraps[["B"]][["cer"]])[bootC[,"0"]>th],
  rownames(bootstraps[["B"]][["cer"]])[bootC[,"1"]>th]
))
ast.markers <- unique(c(
  rownames(bootstraps[["C"]][["acc"]])[bootC[,"2"]>th],
  rownames(bootstraps[["C"]][["cn"]])[bootC[,"2"]>th],
  rownames(bootstraps[["C"]][["cer"]])[bootC[,"2"]>th],
  rownames(bootstraps[["B"]][["acc"]])[bootC[,"2"]>th],
  rownames(bootstraps[["B"]][["cn"]])[bootC[,"2"]>th],
  rownames(bootstraps[["B"]][["cer"]])[bootC[,"2"]>th]
))
oli.markers <- unique(c(
  rownames(bootstraps[["C"]][["acc"]])[bootC[,"3"]>th],
  rownames(bootstraps[["C"]][["cn"]])[bootC[,"1"]>th],
  rownames(bootstraps[["C"]][["cer"]])[bootC[,"3"]>th],
  rownames(bootstraps[["B"]][["acc"]])[bootC[,"3"]>th],
  rownames(bootstraps[["B"]][["cn"]])[bootC[,"1"]>th],
  rownames(bootstraps[["B"]][["cer"]])[bootC[,"3"]>th]
))

##########


th <- 2
balance <- ncells
ratios <- list()
for (r in regions){
  print(r)
  ratios[[r]] <- list()
  all.med <- list()
  for (s in species){
    all.med[[s]] <- median(log10(Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)+0.001),na.rm=T)
    print(all.med[[s]])
  }
  # bootstrapping cells
  for (i in names(cells[["H"]][[r]])){
    print(i)
    ratios[[r]][[i]] <- list(C=c(),B=c())
    for (b in 1:100){
      bulk <- list()
      for (s in species){
        cells.i <- cells[[s]][[r]][[i]]
        boot <- sample(cells.i,balance,replace=T)
        x <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data[,c(boot,boot)])
        x <- log10(x+0.001)
        bulk[[s]] <- x
      }
      bulk.HC <- bulk[["H"]]-bulk[["C"]]
      bulk.HB <- bulk[["H"]]-bulk[["B"]]
      bulk.HCM <- log10( abs(bulk[["H"]]-bulk[["M"]]) + 0.001) - log10( abs(bulk[["C"]]-bulk[["M"]]) + 0.001)
      bulk.HBM <- log10( abs(bulk[["H"]]-bulk[["M"]]) + 0.001) - log10( abs(bulk[["B"]]-bulk[["M"]]) + 0.001)
      hist(bulk.HCM)
      #bulk.HCM[abs(bulk.HC)<=log10(th)] <- NA
      #bulk.HBM[abs(bulk.HB)<=log10(th)] <- NA
      ratios[[r]][[i]][["C"]] <- cbind(ratios[[r]][[i]][["C"]], bulk.HCM)
      ratios[[r]][[i]][["B"]] <- cbind(ratios[[r]][[i]][["B"]], bulk.HBM)
    }
  }
}
pdf(paste0("hspec.integrated.mapsep.pdf"),width=10,height=4)
par(mfrow=c(1,3))
for (r in regions){
  print(r)
  B.rat <- as.data.frame(lapply(ratios[[r]],function (x) rowMeans(x[["B"]],na.rm=T)), check.names=F)
  C.rat <- as.data.frame(lapply(ratios[[r]],function (x) rowMeans(x[["C"]],na.rm=T)), check.names=F)
  B <- apply(B.rat,1,function (x) var(x,na.rm=T))
  C <- apply(C.rat,1,function (x) var(x,na.rm=T))
  print(median(B,na.rm=T))
  print(median(C,na.rm=T))
  #hist(B,breaks=0:20/10,col="#80008080",main=r,xlab="SD")
  #hist(C,breaks=0:20/10,col=rgb(0,0,1,0.5),add=T)
  bulk <- as.matrix(read.delim(paste0(r,"H.txt"),header=T,row.names=1))
  hspec.genes.B <- rownames(bulk)[bulk[,"B"]>log10(2)]
  hspec.genes.C <- rownames(bulk)[bulk[,"C"]>log10(2)]
  hspec.genes.B <- hspec.genes.B[hspec.genes.B %in% names(B)]
  hspec.genes.C <- hspec.genes.C[hspec.genes.C %in% names(C)]
  B.bulk <- B[hspec.genes.B]
  C.bulk <- C[hspec.genes.C]
  print(median(B.bulk,na.rm=T))
  print(median(C.bulk,na.rm=T))
  bxp <- list(B.rnaseq=B.bulk,C.rnaseq=C.bulk)
  hspec.genes.B <- rownames(B.rat)[apply(B.rat,1,function (x) sum(x>log10(2))>0)]
  hspec.genes.C <- rownames(C.rat)[apply(C.rat,1,function (x) sum(x>log10(2))>0)]
  B.bulk <- B[hspec.genes.B]
  C.bulk <- C[hspec.genes.C]
  bxp[["B.nucseq"]] <- B.bulk
  bxp[["C.nucseq"]] <- C.bulk
  print(median(B.bulk,na.rm=T))
  print(median(C.bulk,na.rm=T))
  boxplot(bxp,outline=F,col=c("#80008080",rgb(0,0,1,0.5),"#80008080",rgb(0,0,1,0.5)),ylab="SD",main=r)
}
dev.off()

##########


load("avg.evol.rate.mapsep.RData")
load("hspec.mapsep.Rdata")
load("cells.mapsep.RData")

genes.orig <- genes
for (r in regions){
  #genes[[r]] <- lapply(genes[[r]], function (i) lapply(i, function (x) table(x$"2")))
  genes[[r]] <- lapply(genes[[r]], function (i) lapply(i, function (x) apply(x$"2",2, 
                  function (y) log10(sum(y>log10(2))+1) - log10(sum(y<log10(2)*(-1))+1) )))
}
bootstraps <- list(C=list(),B=list())
for (r in regions){
  for (sp in names(bootstraps)){
    bootstraps[[sp]][[r]] <- matrix(0,nrow(rlist[["H"]][[r]]@assays$RNA@data),length(genes[[r]]))
    dimnames(bootstraps[[sp]][[r]])[[1]] <- rownames(rlist[["H"]][[r]]@assays$RNA@data)
    dimnames(bootstraps[[sp]][[r]])[[2]] <- names(genes[[r]])
  }
  for (i in names(genes[[r]])){
    bootstraps[["C"]][[r]][names(genes[[r]][[i]]$C),i] <- genes[[r]][[i]]$C
    bootstraps[["B"]][[r]][names(genes[[r]][[i]]$B),i] <- genes[[r]][[i]]$B
  }
}
for (r in regions){
  x1 <- bootstraps[["C"]][[r]][,"0"]
  x2 <- bootstraps[["C"]][[r]][,"2"]
  nonna <- is.finite(x1)&is.finite(x2)
  x1 <- x1[nonna]
  x2 <- x2[nonna]
  print(median(x1))
  print(median(x2))
  df <- data.frame(x1,x2)
  
  corr <- cor(x1,x2,use="pairwise.complete.obs")
  print(corr)
  
  ## Use densCols() output to get density at each point
  x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
  df$dens <- col2rgb(x)[1,] + 1L
  
  ## Map densities to colors
  cols <-  colorRampPalette(c("#000099", "#00FEFF", "#45FE4F", 
                              "#FCFF00", "#FF9400", "#FF3100"))(256)
  df$col <- cols[df$dens]
  
  ## Plot it, reordering rows so that densest points are plotted on top
  plot(x2~x1, data=df[order(df$dens),], pch=20, col=col, main=c(r,corr))
}
genes <- genes.orig
save(genes,bootstraps,file="genes.mapsep.RData")

# list of genes for Olya
#ast.markers <- rownames(pbmc.markers$acc)[pbmc.markers$acc$cluster==2&pbmc.markers$acc$avg_logFC>log(2)&pbmc.markers$acc$p_val_adj<0.01]
#ast.hspec <- rownames(bootstraps$C$acc)[bootstraps$C$acc[,"2"]>1&bootstraps$B$acc[,"2"]>1]
#bitr(ast.hspec[ast.hspec %in% ast.markers], fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")

load("genes.mapsep.RData")
for (r in regions){
  expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
  expr <- expr[rownames(expr) %in% rownames(bootstraps[["C"]][[r]]),]
  expr <- abs(expr[,"H"]-expr[,"M"]) / abs(expr[,"C"]-expr[,"M"])
  expr <- log10(expr)
  
  bulk <- apply(bootstraps[["C"]][[r]][names(expr),],1, function (x) max(x[is.finite(x)]))

  corr <- cor(bulk[is.finite(bulk)],expr[is.finite(bulk)],use="pairwise.complete.obs",method="s")
  print(corr)
  plot(bulk,expr,main=c(r,corr),pch=21,bg=rgb(0.5,0.5,0.5,0.5))
}

ncells <- 50
balance <- ncells

genes <- list()
for (r in regions){
  print(r)
  genes[[r]] <- list()
  all.med <- list()
  for (s in species){
    all.med[[s]] <- median(log10(Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)+0.001),na.rm=T)
    print(all.med[[s]])
  }
  # bootstrapping cells
  for (i in names(cells[["H"]][[r]])){
    print(i)
    for (b in 1:1){
      bulk <- list()
      for (s in species){
        cells.i <- cells[[s]][[r]][[i]]
        x <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data[,sample(cells.i,balance,replace=T)])
        x <- log10(x+0.001)
        bulk[[s]] <- x
      }
      bulk.HC <- bulk[["H"]]-bulk[["C"]]
      bulk.HB <- bulk[["H"]]-bulk[["B"]]
      bulk.HCM <- log10( abs(bulk[["H"]]-bulk[["M"]]) + 0.001) - log10( abs(bulk[["C"]]-bulk[["M"]]) + 0.001)
      bulk.HBM <- log10( abs(bulk[["H"]]-bulk[["M"]]) + 0.001) - log10( abs(bulk[["B"]]-bulk[["M"]]) + 0.001)
      nonna <- !(is.na(bulk.HC) | is.na(bulk.HCM))
      bulk.HC <- bulk.HC[nonna]
      bulk.HCM <- bulk.HCM[nonna]
      nonna <- !(is.na(bulk.HB) | is.na(bulk.HBM))
      bulk.HB <- bulk.HB[nonna]
      bulk.HBM <- bulk.HBM[nonna]
      hist(bulk.HCM)
      genes[[r]][[i]][["H"]] <- bulk[["H"]]
      genes[[r]][[i]][["HCM"]] <- bulk.HCM
      genes[[r]][[i]][["HBM"]] <- bulk.HBM
      genes[[r]][[i]][["HCBM"]] <- (bulk.HBM+bulk.HCM)/2
    }
  }
}

hcol <- rev(inferno(90))
ct.names <- unlist(ct.names)

heatmaps <- list()
for (type in c("H","HCBM","HCM","HBM")){
  names <- names(unlist(lapply(genes, function (x) names(x))))
  overlap <- matrix(NA,length(names),length(names))
  dimnames(overlap)[[1]] <- names
  dimnames(overlap)[[2]] <- names
  for (r1 in regions){
    for (cl1 in names(genes[[r1]])){
      for (r2 in regions){
        for(cl2 in names(genes[[r2]])){
          if(r1!=r2 | cl1!=cl2){
            t1 <- genes[[r1]][[cl1]][[type]]
            t2 <- genes[[r2]][[cl2]][[type]]
            
            t1 <- t1[names(t1) %in% names(t2)]
            t2 <- t2[names(t1)]
            
            x1 <- as.vector(t1)
            x2 <- as.vector(t2)
            df <- data.frame(x1,x2)
            
            corr <- cor(x1[is.finite(x1)],x2[is.finite(x1)],use="pairwise.complete.obs")
            overlap[paste0(r1,as.numeric(cl1)+1),paste0(r2,as.numeric(cl2)+1)] <- corr
          }
        }
      }
    }
  }
  dimnames(overlap)[[1]] <- ct.names
  dimnames(overlap)[[2]] <- ct.names
  ord <- c("AC In 1","CN Pur 3","CB In 1","AC Ex 0","CN Pur 0","CB Gr 0",
           "AC Ast 2","CN Ast 2","CB Ast 2","AC OD 3","CN OD 1","CB OD 3",
           "AC OPC 4","CN OPC 4","AC MG 5","CN MG 5")
  overlap <- overlap[ord,rev(ord)]
  
  heatmaps[[type]] <- pheatmap(overlap,
           color=hcol,
           breaks=log10(seq(10^min(overlap,na.rm=T),10^max(overlap,na.rm=T),length.out=91)),
           #breaks=10^seq(log10(min(overlap,na.rm=T)),log10(max(overlap,na.rm=T)),length.out=91),
           #breaks=seq(min(overlap,na.rm=T),max(overlap,na.rm=T),length.out=91),
           border_color=NA,
           na_col=hcol[length(hcol)],
           gaps_row=c(6,9,12,14),
           gaps_col=c(2,4,7,10),
           cluster_rows=F,
           cluster_cols=F,
           show_rownames=T,
           silent=T,
           main=type)
}


pdf("overlap.mapsep.pdf",width=17,height=4,pointsize=24)
gridExtra::grid.arrange(grobs=lapply(heatmaps, function (x) x[[4]]),ncol=4)
dev.off()


#ab <- as.matrix(read.delim("antibodies.txt",header=F))
ab <- as.matrix(c("MSI2","NFAT5"))
keytypes(org.Hs.eg.db)
ab.s<- bitr(ab, fromType="SYMBOL", toType="ENSEMBL", OrgDb="org.Hs.eg.db")$ENSEMBL
ab.a <- bitr(ab, fromType="ALIAS", toType="ENSEMBL", OrgDb="org.Hs.eg.db")$ENSEMBL
ab <- unique(c(ab.s,ab.a))
ab

  for (r in regions){
    bulk <- list()
    for (s in species){
      bulk[[s]] <- apply(rlist[[s]][[r]]@assays$RNA@data, 1, function (x) mean(x))
      bulk[[s]] <- log10(bulk[[s]])
      bulk[[s]] <- bulk[[s]] - median(bulk[[s]])
    }
    ab.r <- ab
    for (s in species){
      ab.r <- ab.r[ab.r %in% names(bulk[[s]])]
    }
    for (s in species){
      bulk[[s]] <- bulk[[s]][ab.r]
    }
    bulk.log10fc <- log10( abs(2*bulk[["H"]]-2*bulk[["M"]])/abs(bulk[["C"]]+bulk[["B"]]-2*bulk[["M"]]) )

    pdf(paste0(r,".ab.pdf"),width=12,height=4)
    par(mfrow=c(1,7),mar=c(2,2,2,1))
    for (i in ab.r){
      name <- i
      try(name <- bitr(i, fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL[1])
      print(c(i,name))
      expr <- list()
      expr[["H"]] <- sapply(names(cells[["H"]][[r]]), function (x) rlist[["H"]][[r]]@assays$RNA@data[i,cells[["H"]][[r]][[x]]], simplify=F)
      expr[["CB"]] <- sapply(names(cells[["H"]][[r]]), function (x) c(rlist[["C"]][[r]]@assays$RNA@data[i,cells[["C"]][[r]][[x]]], rlist[["B"]][[r]]@assays$RNA@data[i,cells[["B"]][[r]][[x]]]), simplify=F)
      expr[["M"]] <- sapply(names(cells[["H"]][[r]]), function (x) rlist[["M"]][[r]]@assays$RNA@data[i,cells[["M"]][[r]][[x]]], simplify=F)
      #for (s in names(expr)){
      #  expr[[s]] <- lapply(expr[[s]], function (x) x[x>0.5])
      #}
      zeros <- list()
      for (s in names(expr)){
        zeros[[s]] <- unlist(lapply(expr[[s]],length))
      }
      sel <- names(zeros[["H"]])[zeros[["H"]]>1]
      sel <- sel[sel %in% names(zeros[["CB"]])[zeros[["CB"]]>1]]
      sel <- sel[sel %in% names(zeros[["M"]])[zeros[["M"]]>1]]
      print(sel)

      if(length(sel)>1){
        for (s in names(expr)){
          expr[[s]] <- sapply(sel, function (x) expr[[s]][[x]], simplify=F)
          try(boxplot(expr[[s]],outline=F,main=paste(name,s),ylim=c(0.5,10)))
          try(stripchart(expr[[s]],vertical=T,method="jitter",add=TRUE,pch=20,col=rgb(0,0,1,0.5)))
        }
        expr[["H-CB"]] <- sapply(names(expr[["H"]]), function (x) mean(expr[["H"]][[x]]) - mean(expr[["CB"]][[x]]))
        try(plot(names(expr[["H-CB"]]),expr[["H-CB"]],main="H-CB",ylim=c(-3.5,3.5)))
        expr[["H-M"]] <- sapply(names(expr[["H"]]), function (x) mean(expr[["H"]][[x]]) - mean(expr[["M"]][[x]]))
        try(plot(names(expr[["H-M"]]),expr[["H-M"]],main="H-M",ylim=c(-3.5,3.5)))
        expr[["CB-M"]] <- sapply(names(expr[["H"]]), function (x) mean(expr[["CB"]][[x]]) - mean(expr[["M"]][[x]]))
        try(plot(names(expr[["CB-M"]]),expr[["CB-M"]],main="CB-M",ylim=c(-3.5,3.5)))
        expr[["log10(H-M)/(CB-M)"]] <- sapply(names(expr[["H"]]), function (x) log10( abs(expr[["H-M"]][[x]])/abs(expr[["CB-M"]][[x]]) ))

        ratios <- c()
        for (k in 1:100){
          rand <- list()
          for (s in c("H","CB","M")){
            rand[[s]] <- lapply(expr[[s]], function (x) sample(x, 1000, replace=T))
            pool <- unlist(rand[[s]])
            rand[[s]] <- lapply(expr[[s]], function (x) sample(pool, length(x), replace=T))
            rand[[s]] <- unlist(lapply(rand[[s]],mean))
          }
          ratios <- rbind(ratios, log10( abs(rand[["H"]]-rand[["M"]])/abs(rand[["CB"]]-rand[["M"]]) ))
        }
        ratios <- as.list(as.data.frame(ratios))
        boxplot(ratios, border="lightgray", main="log10(H-M)/(CB-M)", ylim=c(-3.5,3.5))
        x <- names(expr[["log10(H-M)/(CB-M)"]])
        y <- expr[["log10(H-M)/(CB-M)"]]
        try(points(as.factor(x[!is.na(y)]), y[!is.na(y)], col="red", pch=16))
        pval <- c()
        for (n in names(ratios)){
          pval <- c(pval, round(sum(ratios[[n]]>=y[x==n])/length(ratios[[n]]),digits=3))
        }
        text(as.factor(names(ratios)),3.5,labels=pval,cex=0.7)
      }
    }
    dev.off()
  }


