setwd("~/brainmap/seurat")
library(Seurat)
library(squash)
library(dplyr)
library(clusterProfiler)
library(pheatmap)
library(viridis)

load("integrated.nomito.RData")

load("anova.v2.Rdata")

regions <- c("acc","cn","cer")
species <- c("H","C","B","M")

rlist.all <- rlist

rlist <- list()
for (s in species){
  rlist[[s]] <- list()
}
for (r in regions){
  for (s in species){
    sel = names(rlist.all[[r]]@active.ident)[rlist.all[[r]]$orig.ident==s]
    print(length(sel))
    rlist[[s]][[r]] <- SubsetData(rlist.all[[r]], cells = sel)
    # remove genes that do not survive ANOVA
    rlist[[s]][[r]]@assays$RNA@data <- rlist[[s]][[r]]@assays$RNA@data[pvals[[r]]<0.05,]
  }
}
rm(rlist.all)

load("cells.nomito.RData")

ncells <- 1
n <- 1000

##### Human-specificity in each cell cluster #####

balance <- ncells
HS <- list()
PS <- list()
genes <- list()
for (r in regions){
  print(r)
  HS[[r]] <- list()
  PS[[r]] <- list()
  genes[[r]] <- list()
  # bootstrapping cells
  for (i in names(cells[["H"]][[r]])){
    print(i)
    HS[[r]][[i]] <- list(C=c(),B=c(),CB=c())
    PS[[r]][[i]] <- list(C=c(),B=c(),CB=c())
    genes[[r]][[i]] <- list(CB=c())
    for (b in 1:n){
      bulk <- list()
      for (s in species){
        cells.i <- cells[[s]][[r]][[i]]
        boot <- sample(cells.i,balance,replace=F) # if using balance>25, set replace=T
        x <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data[,c(boot,boot)])
        bulk[[s]] <- x
      }
      bulk.HM <- abs(bulk[["H"]]-bulk[["M"]]) 
      bulk.CM <- abs(bulk[["C"]]-bulk[["M"]]) 
      bulk.BM <- abs(bulk[["B"]]-bulk[["M"]])
      HS[[r]][[i]][["C"]] <- c(HS[[r]][[i]][["C"]], sum(bulk.HM > 2*bulk.CM) / sum(bulk.HM>0|bulk.CM>0))
      PS[[r]][[i]][["C"]] <- c(PS[[r]][[i]][["C"]], sum(bulk.HM*2 < bulk.CM) / sum(bulk.HM>0|bulk.CM>0))
      HS[[r]][[i]][["B"]] <- c(HS[[r]][[i]][["B"]], sum(bulk.HM > 2*bulk.BM) / sum(bulk.HM>0|bulk.BM>0))
      PS[[r]][[i]][["B"]] <- c(PS[[r]][[i]][["B"]], sum(bulk.HM*2 < bulk.BM) / sum(bulk.HM>0|bulk.BM>0))
      HS[[r]][[i]][["CB"]] <- c(HS[[r]][[i]][["CB"]], sum(bulk.HM > 2*bulk.BM & bulk.HM > 2*bulk.CM) / sum((bulk.HM>0|bulk.CM>0) & (bulk.HM>0|bulk.BM>0)) )
      PS[[r]][[i]][["CB"]] <- c(PS[[r]][[i]][["CB"]], sum(bulk.HM*2 < bulk.BM & bulk.HM*2 < bulk.CM) / sum((bulk.HM>0|bulk.CM>0) & (bulk.HM>0|bulk.BM>0)) )
      genes[[r]][[i]][["CB"]] <- rbind(genes[[r]][[i]][["CB"]], ( bulk.HM > 2*bulk.BM & bulk.HM > 2*bulk.CM & (bulk.HM>0|bulk.CM>0) & (bulk.HM>0|bulk.BM>0) )  )
    }
  }
}

pdf(paste0("nomito.relativeBarplot.integrated.",balance,".pdf"),width=9,height=3.5)
par(mfrow=c(1,3),las=2)
full.rate <- c()
avg.rate <- c()
for (r in regions){
  Bh <- lapply(HS[[r]],function (x) x[["B"]])
  Ch <- lapply(HS[[r]],function (x) x[["C"]])
  Bp <- lapply(PS[[r]],function (x) x[["B"]])
  Cp <- lapply(PS[[r]],function (x) x[["C"]])
  CBh <- lapply(HS[[r]],function (x) x[["CB"]])
  CBp <- lapply(PS[[r]],function (x) x[["CB"]])
  #all <- sapply(names(Bh),simplify=F,function (x) c(Bh[[x]],Ch[[x]],Bp[[x]],Cp[[x]]))
  all <- sapply(names(Bh),simplify=F,function (x) c(CBh[[x]],CBp[[x]]))
  
  #  all <- sapply(names(all), simplify=F, function (x) all[[x]]/nonzero.perCluster[paste(r,x)]*100)
  #  norm <- mean(unlist(all))
  #  all <- lapply(all,function (x) log2(x/norm))
  full.rate <- c(full.rate, unlist(lapply(all,mean)))
  
  means <- unlist(lapply(all,mean))
  names(means) <- unlist(lapply(strsplit(ct.names[[r]], " "), function (x) x[[2]]))
  avg.rate <- rbind(avg.rate, cbind(ct.names[[r]], means))
  sds <- unlist(lapply(all,sd))
  bars <- barplot(means,border="#4000C0",col="#4000C048",main=r,ylab="Evolutionary rate",ylim=c(0,0.7))
  arrows(bars,means+sds,bars,means-sds,length=0,col="#4000C0")
}

barplot(full.rate,border="#4000C0",col="#4000C048",ylab="Evolutionary rate rev.")

load("avg.evol.rate.mapsep.RData")
barplot(as.numeric(avg.rate[,2]),border="#4000C0",col="#4000C048",ylab="Evolutionary rate",ylim=c(-0.6,0.6))

plot(nonzero.perCluster, full.rate, main=cor(nonzero.perCluster, full.rate))
dev.off()


pdf(paste0("nomito.ratio.integrated.",balance,".pdf"),width=9,height=3.5)
par(mfcol=c(1,3))
for (r in regions){
  Bh <- lapply(HS[[r]],function (x) x[["B"]])
  Ch <- lapply(HS[[r]],function (x) x[["C"]])
  Bp <- lapply(PS[[r]],function (x) x[["B"]])
  Cp <- lapply(PS[[r]],function (x) x[["C"]])
  CBh <- lapply(HS[[r]],function (x) x[["CB"]])
  CBp <- lapply(PS[[r]],function (x) x[["CB"]])
  
  Cratio <- sapply(names(Ch), simplify=F, function (x) log2(Ch[[x]]) - log2(Cp[[x]]))
  Bratio <- sapply(names(Bh), simplify=F, function (x) log2(Bh[[x]]) - log2(Bp[[x]]))
  ratio <- sapply(names(CBh), simplify=F, function (x) log2(CBh[[x]]) - log2(CBp[[x]]))
  boxplot(ratio,names=ct.names[[r]],las=2,notch=T,outpch=".",ylim=c(-0.2,7),main=r,ylab="log2( #hum-spec / #pan-spec )")
  abline(h=0,lty=2)
}
dev.off()

save(HS,PS,genes,file=paste0("nomito.HS.PS.",balance,".RData"))

##########

load("nomito.HS.PS.1.RData")

##### heterogeneity #####

n <- 1000
heterog <- list()
for (r in regions){
  print(r)
  heterog[[r]] <- list()
  for (ct in names(genes[[r]])){
    print(ct)
    t <- genes[[r]][[ct]][["CB"]]
    print(dim(t))
    jacc <- unlist(sapply(1:(n-1),function (i) sapply((i+1):n, function (j) {
      x <- t[i,]
      y <- t[j,]
      union <- sum(x|y)
      overlap <- sum(x&y)
      overlap/union
    })))
    heterog[[r]][[ct]] <- jacc
  }
}

save(heterog,file="heterogeneity.nomito.Rdata")

load("heterogeneity.nomito.Rdata")

colors <- list()
colors[["acc"]] <- c("#CE6928","#B02325","#208C43", "#9F5DA5","#F16EAA","#4A87C6")
colors[["cn"]] <- c("#CE6928","#9F5DA5","#208C43", "#B02325","#F16EAA","#4A87C6")
colors[["cer"]] <- c("#CE6928","#B02325","#208C43", "#9F5DA5")

avg.heterog <- c()
pdf("heterogeneity.nomito.pdf",width=12,height=4)
par(mfrow=c(1,3))
for (r in regions){
  plot(0:1,c(0,6),col="white",xlab="overlap / union",ylab="log10 count",main=r)
  for (i in 1:length(ct.names[[r]])){
    h <- hist(heterog[[r]][[i]],breaks=0:100/100,plot=F)
    avg.heterog <- rbind(avg.heterog, c(ct.names[[r]][i],mean(heterog[[r]][[i]][heterog[[r]][[i]]>0.5]),median(heterog[[r]][[i]][heterog[[r]][[i]]>0.5])))
    lines(h$mids,log10(h$counts/2+1),col=colors[[r]][i],lwd=2)
  }
  legend("topright",ct.names[[r]],col=colors[[r]],lwd=2,bty="n")
}
dev.off()
avg.heterog

load("avg.evol.rate.nomito.RData")
avg.rate

pdf("heterogeneity.corr.2nd.nomito.pdf",width=5,height=5)
plot(as.numeric(avg.heterog[,2]),as.numeric(avg.rate[,2]),pch=16,xlim=c(0.69,0.97),ylab="Evolutionary rate",xlab="Overlap/Union")
text(as.numeric(avg.heterog[,2]),as.numeric(avg.rate[,2]),avg.rate[,1],pos=4,offset=0.25,cex=0.7)
cor.test(as.numeric(avg.rate[,2]),as.numeric(avg.heterog[,2]))
dev.off()

##########

