setwd("~/brainmap/seurat")
library(Seurat)
library(squash)
library(dplyr)
library(clusterProfiler)

load("integrated.nomito.RData")
rlist.all <- rlist

regions <- c("acc","cn","cer")
species <- c("H","C","B","M")

rlist <- list()
for (s in species){
  rlist[[s]] <- list()
}
for (r in regions){
  for (s in species){
    sel = names(rlist.all[[r]]@active.ident)[grep(paste0(s,"_"),names(rlist.all[[r]]@active.ident))]
    print(head(sel))
    print(length(sel))
    rlist[[s]][[r]] <- SubsetData(rlist.all[[r]], cells = sel)
  }
}
rm(rlist.all)

# mtx <- as.matrix(read.delim("ortho.length.txt",header=T,row.names=1))
# head(mtx)
# ids <- list()
# for (s in c("c","b","m")){
#   ids[[toupper(s)]] <- rownames(mtx)
#   names(ids[[toupper(s)]]) <- mtx[,s]
# }
# ids[["H"]] <- rownames(mtx)
# names(ids[["H"]]) <- rownames(mtx)
# head(ids)
# 
# len <- list()
# for (s in species){
#   mtx <- as.matrix(read.delim(paste0(s,".v93.txt"),header=T,row.names=1))
#   mtx <- mtx[rownames(mtx) %in% names(ids[[s]]),]
#   len[[s]] <- as.numeric(mtx[,"mean"])
#   names(len[[s]]) <- ids[[s]][rownames(mtx)]
# }
# head(len)


# genes <- list()
# for (r in regions){
#   expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
#   for (s in species){
#     expr <- expr[rownames(expr) %in% rownames(rlist[[s]][[r]]@assays$RNA@data),]
#     expr <- expr[rownames(expr) %in% names(len[[s]]),]
#   }
#   for (s in species){
#     expr[,s] <- expr[,s] - log10(len[[s]][rownames(expr)]) + 3
#   }
#   hist(rowMeans(expr))
#   
#   bulk <- list()
#   for (s in species){
#     bulk[[s]] <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)
#     bulk[[s]] <- log10(bulk[[s]]+0.001)
#     bulk[[s]] <- bulk[[s]][rownames(expr)] - median(bulk[[s]]) - log10(len[[s]][rownames(expr)]) + 3
#     print(sum(is.na(bulk[[s]])))
#   }
#   avg <- (bulk[["H"]]+bulk[["B"]]+bulk[["C"]]+bulk[["M"]])/4
#   hist(avg)
#   print(length(avg))
#   print(dim(expr))
#   
#   genes[[r]] <- names(avg)[avg>(-1)&rowMeans(expr)>(-1)]
#   print(length(genes[[r]]))
# }


png("bulk_corr_byspec.mapsep.png",width=4800,height=3600,res=600,pointsize=7.92)
par(mfcol=c(3,4),mar=c(4,4,2,2)+0.3,cex.lab=1.8)
blist <- list()
elist <- list()
for (s in species){
  blist[[s]] <- list()
  elist[[s]] <- list()
  for (r in regions){
    expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1)) # these are log-transformed counts
    expr <- expr[,s]
    elist[[s]][[r]] <- expr
    print(c(s,r))
    print(length(expr))
    expr <- expr[names(expr) %in% rownames(rlist[[s]][[r]]@assays$RNA@data)]
    print(length(expr))

    bulk <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)
    bulk <- log10(bulk)
    bulk <- bulk - median(bulk)
    blist[[s]][[r]] <- bulk
    bulk <- bulk[names(expr)]
    print(c(median(expr),median(bulk)))
    
    #bulk <- bulk[names(bulk) %in% names(len[[s]])]
    #expr <- expr[names(expr) %in% names(len[[s]])]
    #print(length(expr))
    x1 <- bulk #- log10(len[[s]][names(bulk)]) + 3 # counts -> RPKM
    x2 <- expr #- log10(len[[s]][names(bulk)]) + 3 # counts -> RPKM
    df <- data.frame(x1,x2)

    corr <- cor(x1[is.finite(x1)],x2[is.finite(x1)],use="pairwise.complete.obs")
    
    ## Use densCols() output to get density at each point
    x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
    df$dens <- col2rgb(x)[1,] + 1L
    
    ## Map densities to colors
    cols <-  colorRampPalette(c("#000099", "#00FEFF", "#45FE4F", 
                                "#FCFF00", "#FF9400", "#FF3100"))(256)
    df$col <- cols[df$dens]
    
    ## Plot it, reordering rows so that densest points are plotted on top
    plot(x2~x1, data=df[order(df$dens),], pch=20, col=col, xlab="Expression, nuc-seq", ylab="Expression, rna-seq")
    legend("bottomright",paste("R =",round(corr,digits=2)),bty="n",cex=1.4)
    legend("topleft",s,bty="n",cex=1.4)
  }
}
dev.off()


##### select species-specific genes using ANOVA #####

load("anova.v2.Rdata")

for (r in regions){
  for (s in species){
    rlist[[s]][[r]]@assays$RNA@data <- rlist[[s]][[r]]@assays$RNA@data[pvals[[r]]<0.05,]
  }
}

##########


pdf("bulk_corr_H-C.H-M.mapsep.pdf",width=11,height=4)
par(mfrow=c(1,3))
for (r in regions){
  expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
  expr <- expr[rownames(expr) %in% rownames(rlist[["H"]][[r]]@assays$RNA@data),]
  # expr <- expr[genes[[r]],]
  # for (s in species){
  #   expr <- expr[rownames(expr) %in% names(len[[s]]),]
  # }
  # for (s in species){
  #   expr[,s] <- expr[,s] - log10(len[[s]][rownames(expr)]) + 3
  # }
  expr.HC <- abs(expr[,"H"]-expr[,"C"])
  #hist(expr.HC,breaks=100)
  expr <- abs(expr[,"H"]-expr[,"M"]) / abs(expr[,"C"]-expr[,"M"])
  expr <- log10(expr)
  
  bulk <- list()
  for (s in species){
    bulk[[s]] <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)
    bulk[[s]] <- log10(bulk[[s]]+0.001)
    bulk[[s]] <- bulk[[s]] - median(bulk[[s]])
    bulk[[s]] <- bulk[[s]][names(expr)] # - log10(len[[s]][names(expr)]) + 3
    #hist(bulk[[s]])
  }
  bulk.HC <- abs(bulk[["H"]]-bulk[["C"]])
  #hist(bulk.HC,breaks=100)
  bulk <- abs(bulk[["H"]]-bulk[["M"]]) / abs(bulk[["C"]]-bulk[["M"]])
  bulk <- log10(bulk)
  
  print(c(median(expr,na.rm=T),median(bulk,na.rm=T)))
  
  bulk <- bulk[bulk.HC>2|expr.HC>2]
  expr <- expr[bulk.HC>2|expr.HC>2]
  bulk <- bulk[is.finite(bulk)]
  expr <- expr[is.finite(bulk)]
  
  corr <- cor(bulk,expr,use="pairwise.complete.obs",method="s")
  corr.test <- cor.test(bulk,expr,use="pairwise.complete.obs",method="s")
  plot(bulk,expr,main=c(r,paste(corr,corr.test$p.value)),pch=21,bg=rgb(0.5,0.5,0.5,0.5),cex=2)
}
dev.off()

pdf("bulk_corr_H-B.H-M.mapsep.pdf",width=11,height=4)
par(mfrow=c(1,3))
for (r in regions){
  expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
  expr <- expr[rownames(expr) %in% rownames(rlist[["H"]][[r]]@assays$RNA@data),]
  # expr <- expr[genes[[r]],]
  # for (s in species){
  #   expr <- expr[rownames(expr) %in% names(len[[s]]),]
  # }
  # for (s in species){
  #   expr[,s] <- expr[,s] - log10(len[[s]][rownames(expr)])
  # }
  expr.HB <- abs(expr[,"H"]-expr[,"B"])
  #hist(expr.HB,breaks=100)
  expr <- abs(expr[,"H"]-expr[,"M"]) / abs(expr[,"B"]-expr[,"M"])
  expr <- log10(expr)
  
  bulk <- list()
  for (s in species){
    bulk[[s]] <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)
    bulk[[s]] <- log10(bulk[[s]]+0.001)
    bulk[[s]] <- bulk[[s]] - median(bulk[[s]])
    print(sum(!(names(expr) %in% names(bulk[[s]]))))
    bulk[[s]] <- bulk[[s]][names(expr)] # - log10(len[[s]][names(expr)])
    #hist(bulk[[s]])
  }
  bulk.HB <- abs(bulk[["H"]]-bulk[["B"]])
  #hist(bulk.HB,breaks=100)
  bulk <- abs(bulk[["H"]]-bulk[["M"]]) / abs(bulk[["B"]]-bulk[["M"]])
  bulk <- log10(bulk)
  
  print(c(median(expr,na.rm=T),median(bulk,na.rm=T)))
  
  bulk <- bulk[bulk.HB>2|expr.HB>2]
  expr <- expr[bulk.HB>2|expr.HB>2]
  bulk <- bulk[is.finite(bulk)]
  expr <- expr[is.finite(bulk)]
  
  corr <- cor(bulk,expr,use="pairwise.complete.obs",method="s")
  corr.test <- cor.test(bulk,expr,use="pairwise.complete.obs",method="s")
  plot(bulk,expr,main=c(r,paste(corr,corr.test$p.value)),pch=21,bg=rgb(0.5,0.5,0.5,0.5),cex=2)
}
dev.off()

pdf("bulk_corr_newMetric.nomito.n.pdf",width=11,height=3.5)
par(mfrow=c(1,3))
heatmaps <- list()
hcol <- blues9
for (r in regions){
  expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
  expr <- expr[rownames(expr) %in% rownames(rlist[["H"]][[r]]@assays$RNA@data),]
  expr.HM <- abs(expr[,"H"]-expr[,"M"]) 
  expr.CM <- abs(expr[,"C"]-expr[,"M"]) 
  expr.BM <- abs(expr[,"B"]-expr[,"M"])
  exprHS <- expr.HM > 2*expr.BM & expr.HM > 2*expr.CM & (expr.HM>0|expr.CM>0) & (expr.HM>0|expr.BM>0)
  exprPS <- expr.HM*2 < expr.BM & expr.HM*2 < expr.CM & (expr.HM>0|expr.CM>0) & (expr.HM>0|expr.BM>0)

  bulk <- list()
  for (s in species){
    bulk[[s]] <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)
    bulk[[s]] <- bulk[[s]][rownames(expr)] # - log10(len[[s]][names(expr)]) + 3
  }
  bulk.HM <- abs(bulk[["H"]]-bulk[["M"]]) 
  bulk.CM <- abs(bulk[["C"]]-bulk[["M"]]) 
  bulk.BM <- abs(bulk[["B"]]-bulk[["M"]])
  bulkHS <- bulk.HM > 2*bulk.BM & bulk.HM > 2*bulk.CM & (bulk.HM>0|bulk.CM>0) & (bulk.HM>0|bulk.BM>0)
  bulkPS <- bulk.HM*2 < bulk.BM & bulk.HM*2 < bulk.CM & (bulk.HM>0|bulk.CM>0) & (bulk.HM>0|bulk.BM>0)
  
  all <- cbind(exprHS,exprPS,bulkHS,bulkPS)
  print(colSums(all))
  overlap <- matrix(NA,ncol(all),ncol(all))
  dimnames(overlap)[[1]] <- colnames(all)
  dimnames(overlap)[[2]] <- colnames(all)
  for (i in colnames(all)){
    for (j in colnames(all)){
        overlap[i,j] <- sum(all[,i]&all[,j])
    }
  }
  heatmaps[[r]] <- pheatmap(overlap,
                               color=hcol,
                               breaks=seq(min(overlap,na.rm=T),440,length.out=10),
                               border_color=NA,
                               na_col=hcol[length(hcol)],
                               cluster_rows=F,
                               cluster_cols=F,
                               show_rownames=T,
                               display_numbers = T,
                               number_format = "%i", 
                               number_color = rgb(195,0,0,maxColorValue = 255),
                               silent=T,
                               main=r)
}
gridExtra::grid.arrange(grobs=lapply(heatmaps, function (x) x[[4]]),ncol=3)
dev.off()

pdf("bulk_corr_newMetric.nomito.jacc.pdf",width=11,height=3.5)
par(mfrow=c(1,3))
heatmaps <- list()
hcol <- blues9
for (r in regions){
  expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
  expr <- expr[rownames(expr) %in% rownames(rlist[["H"]][[r]]@assays$RNA@data),]
  expr.HM <- abs(expr[,"H"]-expr[,"M"]) 
  expr.CM <- abs(expr[,"C"]-expr[,"M"]) 
  expr.BM <- abs(expr[,"B"]-expr[,"M"])
  exprHS <- expr.HM > 2*expr.BM & expr.HM > 2*expr.CM & (expr.HM>0|expr.CM>0) & (expr.HM>0|expr.BM>0)
  exprPS <- expr.HM*2 < expr.BM & expr.HM*2 < expr.CM & (expr.HM>0|expr.CM>0) & (expr.HM>0|expr.BM>0)
  
  bulk <- list()
  for (s in species){
    bulk[[s]] <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)
    bulk[[s]] <- bulk[[s]][rownames(expr)] # - log10(len[[s]][names(expr)]) + 3
  }
  bulk.HM <- abs(bulk[["H"]]-bulk[["M"]]) 
  bulk.CM <- abs(bulk[["C"]]-bulk[["M"]]) 
  bulk.BM <- abs(bulk[["B"]]-bulk[["M"]])
  bulkHS <- bulk.HM > 2*bulk.BM & bulk.HM > 2*bulk.CM & (bulk.HM>0|bulk.CM>0) & (bulk.HM>0|bulk.BM>0)
  bulkPS <- bulk.HM*2 < bulk.BM & bulk.HM*2 < bulk.CM & (bulk.HM>0|bulk.CM>0) & (bulk.HM>0|bulk.BM>0)
  
  all <- cbind(exprHS,exprPS,bulkHS,bulkPS)
  print(colSums(all))
  overlap <- matrix(NA,ncol(all),ncol(all))
  dimnames(overlap)[[1]] <- colnames(all)
  dimnames(overlap)[[2]] <- colnames(all)
  for (i in colnames(all)){
    for (j in colnames(all)){
      if(j!=i){
        overlap[i,j] <- sum(all[,i]&all[,j]) /sum(all[,i]|all[,j])
      }
    }
  }
  heatmaps[[r]] <- pheatmap(overlap,
                            color=hcol,
                            breaks=seq(min(overlap,na.rm=T),max(overlap,na.rm=T),length.out=10),
                            border_color=NA,
                            na_col=hcol[length(hcol)],
                            cluster_rows=F,
                            cluster_cols=F,
                            show_rownames=T,
                            display_numbers = T,
                            number_format = "%.2f", # %i
                            number_color = "darkred",
                            silent=T,
                            main=r)
}
gridExtra::grid.arrange(grobs=lapply(heatmaps, function (x) x[[4]]),ncol=3)
dev.off()


load("pbmc.markers.nomito.RData")
load("../bulk/fc.th.dynamic.RData")

ct.names <- list()
ct.names[["acc"]] <- c("AC Ex","AC In","AC Ast","AC OD","AC OPC","AC MG")
ct.names[["cn"]] <- c("CN Neu","CN OD","CN Ast","CN In","CN OPC","CN MG")
ct.names[["cer"]] <- c("CB Neu","CB In","CB Ast","CB OD")

corrs <- c()
ctrHP <- c()
ctrPH <- c()
for (r in regions){
  expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
  expr <- expr[rownames(expr) %in% rownames(rlist[["H"]][[r]]@assays$RNA@data),]
  expr.HM <- abs(expr[,"H"]-expr[,"M"]) 
  expr.CM <- abs(expr[,"C"]-expr[,"M"]) 
  expr.BM <- abs(expr[,"B"]-expr[,"M"])
  exprHS <- expr.HM > 2*expr.BM & expr.HM > 2*expr.CM & (expr.HM>0|expr.CM>0) & (expr.HM>0|expr.BM>0)
  exprPS <- expr.HM*2 < expr.BM & expr.HM*2 < expr.CM & (expr.HM>0|expr.CM>0) & (expr.HM>0|expr.BM>0)

  bulk <- list()
  for (s in species){
    bulk[[s]] <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)
    bulk[[s]] <- bulk[[s]][rownames(expr)]
  }
  bulk.HM <- abs(bulk[["H"]]-bulk[["M"]]) 
  bulk.CM <- abs(bulk[["C"]]-bulk[["M"]]) 
  bulk.BM <- abs(bulk[["B"]]-bulk[["M"]])
  bulkHS <- bulk.HM > 2*bulk.BM & bulk.HM > 2*bulk.CM & (bulk.HM>0|bulk.CM>0) & (bulk.HM>0|bulk.BM>0)
  bulkPS <- bulk.HM*2 < bulk.BM & bulk.HM*2 < bulk.CM & (bulk.HM>0|bulk.CM>0) & (bulk.HM>0|bulk.BM>0)
    
  for (i in unique(pbmc.markers[[r]]$cluster)){
    print(i)
    markers <- pbmc.markers[[r]][pbmc.markers[[r]]$p_val_adj<0.05 & pbmc.markers[[r]]$cluster==i,"gene"]
    bulk.i <- bulkHS[names(bulkHS) %in% markers]
    expr.i <- exprHS[names(exprHS) %in% markers]
    jacc <- sum(bulk.i&expr.i) / sum(bulk.i|expr.i)
    print(jacc)
    name <- ct.names[[r]][as.numeric(i)+1]
    corrs <- rbind(corrs, c(jacc,name,r))
    
    bulk.i <- bulkHS[names(bulkHS) %in% markers]
    expr.i <- exprPS[names(exprPS) %in% markers]
    jacc <- sum(bulk.i&expr.i) / sum(bulk.i|expr.i)
    print(jacc)
    name <- ct.names[[r]][as.numeric(i)+1]
    ctrHP <- rbind(ctrHP, c(jacc,name,r))

    bulk.i <- bulkPS[names(bulkPS) %in% markers]
    expr.i <- exprHS[names(exprHS) %in% markers]
    jacc <- sum(bulk.i&expr.i) / sum(bulk.i|expr.i)
    print(jacc)
    name <- ct.names[[r]][as.numeric(i)+1]
    ctrPH <- rbind(ctrPH, c(jacc,name,r))
  }
}


corrs[,2] <- unlist(lapply(strsplit(corrs[,2]," "), function (x) x[[2]]))
ctrHP[,2] <- unlist(lapply(strsplit(ctrHP[,2]," "), function (x) x[[2]]))
ctrPH[,2] <- unlist(lapply(strsplit(ctrPH[,2]," "), function (x) x[[2]]))

cols <- c("#FF9801","#1665C0","#26A59A")
names(cols) <- regions

corrs <- cbind(corrs,cols[corrs[,3]])
ctrHP <- cbind(ctrHP,cols[ctrHP[,3]])
ctrPH <- cbind(ctrPH,cols[ctrPH[,3]])

pdf("corrs.nomito.pdf",width=10,height=3.5)
par(mfrow=c(1,3))
plot(as.factor(corrs[,2]),as.numeric(corrs[,1]),
     col="white",border="white",ylab="Jaccard coefficient",ylim=c(0,0.3),las=1,main="H-spec. scRNA-seq vs. H-spec. RNA-seq")
points(as.factor(corrs[,2]),as.numeric(corrs[,1]),
       bg=corrs[,4],col="lightgray",pch=24,cex=2,lwd=0.5)
legend("bottomright",names(cols),pt.bg=cols,col="lightgray",pch=24,bty="n",pt.cex=2,pt.lwd=0.5)

plot(as.factor(ctrHP[,2]),as.numeric(ctrHP[,1]),
     col="white",border="white",ylab="Jaccard coefficient",ylim=c(0,0.3),las=1,main="H-spec. scRNA-seq vs. P-spec. RNA-seq")
points(as.factor(ctrHP[,2]),as.numeric(ctrHP[,1]),
       bg=ctrHP[,4],col="lightgray",pch=24,cex=2,lwd=0.5)
legend("topright",names(cols),pt.bg=cols,col="lightgray",pch=24,bty="n",pt.cex=2,pt.lwd=0.5)

plot(as.factor(ctrPH[,2]),as.numeric(ctrPH[,1]),
     col="white",border="white",ylab="Jaccard coefficient",ylim=c(0,0.3),las=1,main="P-spec. scRNA-seq vs. H-spec. RNA-seq")
points(as.factor(ctrPH[,2]),as.numeric(ctrPH[,1]),
       bg=ctrPH[,4],col="lightgray",pch=24,cex=2,lwd=0.5)
legend("topright",names(cols),pt.bg=cols,col="lightgray",pch=24,bty="n",pt.cex=2,pt.lwd=0.5)
dev.off()

