setwd("~/brainmap/seurat")
library(Seurat)
library(squash)
library(dplyr)
library(clusterProfiler)

load("integrated.nomito.RData")
rlist.all <- rlist

regions <- c("acc","cn","cer")
species <- c("H","C","B","M")

rlist <- list()
for (s in species){
  rlist[[s]] <- list()
}
for (r in regions){
  for (s in species){
    sel = names(rlist.all[[r]]@active.ident)[grep(paste0(s,"_"),names(rlist.all[[r]]@active.ident))]
    print(head(sel))
    print(length(sel))
    rlist[[s]][[r]] <- SubsetData(rlist.all[[r]], cells = sel)
  }
}
rm(rlist.all)

mtx <- as.matrix(read.delim("ortho.length.txt",header=T,row.names=1))
head(mtx)
ids <- list()
for (s in c("c","b","m")){
  ids[[toupper(s)]] <- rownames(mtx)
  names(ids[[toupper(s)]]) <- mtx[,s]
}
ids[["H"]] <- rownames(mtx)
names(ids[["H"]]) <- rownames(mtx)
head(ids)

len <- list()
for (s in species){
  mtx <- as.matrix(read.delim(paste0(s,".v93.txt"),header=T,row.names=1))
  mtx <- mtx[rownames(mtx) %in% names(ids[[s]]),]
  len[[s]] <- as.numeric(mtx[,"mean"])
  names(len[[s]]) <- ids[[s]][rownames(mtx)]
}
head(len)

genes <- list()
for (r in regions){
  expr <- as.matrix(read.delim(paste0(r,".counts.txt"),header=T,row.names=1))
  for (s in species){
    expr <- expr[rownames(expr) %in% rownames(rlist[[s]][[r]]@assays$RNA@data),]
    expr <- expr[rownames(expr) %in% names(len[[s]]),]
  }
  for (s in species){
    expr[,s] <- expr[,s] - log10(len[[s]][rownames(expr)]) + 3
  }
  hist(rowMeans(expr))
  
  bulk <- list()
  for (s in species){
    bulk[[s]] <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)
    bulk[[s]] <- log10(bulk[[s]]+0.001)
    bulk[[s]] <- bulk[[s]][rownames(expr)] - median(bulk[[s]]) - log10(len[[s]][rownames(expr)]) + 3
    print(sum(is.na(bulk[[s]])))
  }
  avg <- (bulk[["H"]]+bulk[["B"]]+bulk[["C"]]+bulk[["M"]])/4
  hist(avg)
  print(length(avg))
  print(dim(expr))
  
  genes[[r]] <- names(avg)[avg>(-1)&rowMeans(expr)>(-1)]
  print(length(genes[[r]]))
}

tpm <- as.matrix(read.delim("acc.txt",header=T,row.names=1))
cnt <- as.matrix(read.delim("acc.counts.txt",header=T,row.names=1))
sel <- rownames(tpm)[rownames(tpm) %in% rownames(cnt)]
sel <- sel[sel %in% names(len[["H"]])]
length(sel)
cor(tpm[sel,"H"],cnt[sel,"H"] - log10(len[[s]][sel]))


png("pollen_corr.v2.nomito.png",width=3600,height=2600,res=600,pointsize=7.92)
par(mfrow=c(2,3))
blist <- list()
elist <- list()
s <- "H"
for (d in c(".B",".O")){
  blist[[s]] <- list()
  elist[[s]] <- list()
  for (r in regions){
    expr <- as.matrix(read.delim("pollen.avg.v2.txt",header=T,row.names=6))
    head(expr)
    names <- rownames(expr)
    expr <- log10(as.numeric(expr[,paste0(s,d)])+0.001) # .O for organoid data, .B for brain data
    expr <- expr - median(expr)
    names(expr) <- names
    elist[[s]][[r]] <- expr
    print(c(s,r))
    print(length(expr))
    print(head(expr))
    print(sum(is.na(expr)))
    expr <- expr[names(expr) %in% rownames(rlist[[s]][[r]]@assays$RNA@data)]
    print(length(expr))
    
    bulk <- Matrix::rowMeans(rlist[[s]][[r]]@assays$RNA@data)
    bulk <- log10(bulk)
    bulk <- bulk - median(bulk)
    blist[[s]][[r]] <- bulk
    bulk <- bulk[names(expr)]
    print(c(median(expr),median(bulk)))
    
    #bulk <- bulk[names(bulk) %in% names(len[[s]])]
    #expr <- expr[names(expr) %in% names(len[[s]])]
    #print(length(expr))
    x1 <- bulk #- log10(len[[s]][names(bulk)]) + 3 # counts -> RPKM
    x2 <- expr #- log10(len[[s]][names(bulk)]) + 3 # counts -> RPKM
    df <- data.frame(x1,x2)
    
    corr <- cor(x1[is.finite(x1)],x2[is.finite(x1)],use="pairwise.complete.obs")
    
    ## Use densCols() output to get density at each point
    x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
    df$dens <- col2rgb(x)[1,] + 1L
    
    ## Map densities to colors
    cols <-  colorRampPalette(c("#000099", "#00FEFF", "#45FE4F", 
                                "#FCFF00", "#FF9400", "#FF3100"))(256)
    df$col <- cols[df$dens]
    
    ## Plot it, reordering rows so that densest points are plotted on top
    plot(x2~x1, data=df[order(df$dens),], pch=20, col=col, xlab="Expression, nuc-seq", ylab="Expression, rna-seq", main=r)
    legend("bottomright",paste("R =",round(corr,digits=2)),bty="n",cex=1.4)
    legend("topleft",s,bty="n",cex=1.4)
  }
}
dev.off()
