setwd("~/brainmap/bulk")

load("human-spec_ProtCoding_2.0.pdf.RData")
objects()
lapply(hs.HC,length)

colors <- as.matrix(read.delim("order_EBnames.txt",row.names=2,header=F))[,2]
regions <- names(colors)

ovl.C <- matrix(NA,length(regions),length(regions))
dimnames(ovl.C)[[1]] <- regions
dimnames(ovl.C)[[2]] <- regions
for (i in regions){
  for (j in regions){
    if(i!=j){
      genes.i <- names(hs.HC[[i]])[hs.HC[[i]]>log2(2)]
      genes.j <- names(hs.HC[[j]])[hs.HC[[j]]>log2(2)]
      ovl.C[i,j] <- sum(genes.i %in% genes.j)/length(unique(c(genes.i,genes.j)))
    }
  }
}
ovl.B <- matrix(NA,length(regions),length(regions))
dimnames(ovl.B)[[1]] <- regions
dimnames(ovl.B)[[2]] <- regions
for (i in regions){
  for (j in regions){
    if(i!=j){
      genes.i <- names(hs.HB[[i]])[hs.HB[[i]]>log2(2)]
      genes.j <- names(hs.HB[[j]])[hs.HB[[j]]>log2(2)]
      ovl.B[i,j] <- sum(genes.i %in% genes.j)/length(unique(c(genes.i,genes.j)))
    }
  }
}

avg.C <- matrix(NA,length(regions),length(regions))
dimnames(avg.C)[[1]] <- regions
dimnames(avg.C)[[2]] <- regions
for (i in regions){
  for (j in regions){
    if(i!=j){
      genes.i <- names(hs.HC[[i]])[hs.HC[[i]]>log2(2)]
      genes.j <- names(hs.HC[[j]])[hs.HC[[j]]>log2(2)]
      avg.C[i,j] <- sum(genes.i %in% genes.j)
    }
  }
}
avg.B <- matrix(NA,length(regions),length(regions))
dimnames(avg.B)[[1]] <- regions
dimnames(avg.B)[[2]] <- regions
for (i in regions){
  for (j in regions){
    if(i!=j){
      genes.i <- names(hs.HB[[i]])[hs.HB[[i]]>log2(2)]
      genes.j <- names(hs.HB[[j]])[hs.HB[[j]]>log2(2)]
      avg.B[i,j] <- sum(genes.i %in% genes.j)
    }
  }
}

library(pheatmap)

pdf("overlap.pdf",width=8.5,height=8)
plot.new()
pheatmap(ovl.C,
             cluster_rows=F,
             cluster_cols=F,
             annotation_colors=list(clusters=colors),
             annotation_row=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_col=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_legend=F,
             annotation_names_row=F,
             annotation_names_col=F,
             main="C")
title(main="Jaccard coeff.",cex=0.8,adj=1)
pheatmap(ovl.B,
             cluster_rows=F,
             cluster_cols=F,
             annotation_colors=list(clusters=colors),
             annotation_row=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_col=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_legend=F,
             annotation_names_row=F,
             annotation_names_col=F,
             main="B")
title(main="Jaccard coeff.",cex=0.8,adj=1)
pheatmap((ovl.C+ovl.B)/2,
             cluster_rows=F,
             cluster_cols=F,
             annotation_colors=list(clusters=colors),
             annotation_row=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_col=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_legend=F,
             annotation_names_row=F,
             annotation_names_col=F,
             main="(C+B)/2")
title(main="Jaccard coeff.",cex=0.8,adj=1)
h <- avg.C
h <- apply(h, 2, function (x) x/mean(x,na.rm=T))
h <- apply(h, 1, function (x) x/mean(x,na.rm=T))
h[h>1.3] <- 1.3
pheatmap(h,
             cluster_rows=F,
             cluster_cols=F,
             annotation_colors=list(clusters=colors),
             annotation_row=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_col=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_legend=F,
             annotation_names_row=F,
             annotation_names_col=F,
             main="C")
title(main="Overlap",cex=0.8,adj=1)
h <- avg.B
h <- apply(h, 2, function (x) x/mean(x,na.rm=T))
h <- apply(h, 1, function (x) x/mean(x,na.rm=T))
h[h>1.3] <- 1.3
pheatmap(h,
             cluster_rows=F,
             cluster_cols=F,
             annotation_colors=list(clusters=colors),
             annotation_row=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_col=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_legend=F,
             annotation_names_row=F,
             annotation_names_col=F,
             main="B")
title(main="Overlap",cex=0.8,adj=1)
h <- (avg.C+avg.B)/2
h <- apply(h, 2, function (x) x/mean(x,na.rm=T))
h <- apply(h, 1, function (x) x/mean(x,na.rm=T))
h[h>1.3] <- 1.3
pheatmap(h,
             cluster_rows=F,
             cluster_cols=F,
             annotation_colors=list(clusters=colors),
             annotation_row=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_col=data.frame(row.names=names(colors),clusters=names(colors)),
             annotation_legend=F,
             annotation_names_row=F,
             annotation_names_col=F,
             main="(C+B)/2")
title(main="Overlap",cex=0.8,adj=1)
dev.off()

pdf("shared_genes.pdf",width=6,height=6)
hist(apply(as.matrix(as.data.frame(hs.HC)), 1, function (x) sum(x>log2(2))), xlab="Number of regions", ylab="Number of genes",  breaks=0:33, main="C", right=F)
hist(apply(as.matrix(as.data.frame(hs.HB)), 1, function (x) sum(x>log2(2))), xlab="Number of regions", ylab="Number of genes", breaks=0:33, main="B", right=F)
h <- apply(cbind(as.matrix(as.data.frame(hs.HC)),as.matrix(as.data.frame(hs.HB))), 1, function (x) sum(x>log2(2))/2)
hist(h, xlab="Number of regions", ylab="Number of genes", breaks=0:33, main="(C+B)/2", right=F)
dev.off()

write(rbind(names(h),h)[,order(h,decreasing=T)],"shared_genes.txt",sep="\t",ncol=2)

library(pheatmap)
pdf("shared_genes_regions.pdf",width=7,height=4)
mtx <- (as.data.frame(hs.HC,optional=T)[,regions]+as.data.frame(hs.HB,optional=T)[,regions])/2
mtx[1:5,1:5]
mtx <- mtx[names(h)[h>10],]
ph <- list()
for (k in 2:10){
ph[[paste0("k",k)]] <- pheatmap(mtx,
	 kmeans_k=k,
         border_color=NA,
         cluster_cols=F,
         gaps_col=c(10,18,21,23,29,30),
         show_rownames=T)
}
mtx <- apply(mtx,2, function (x) as.numeric(x>log2(2)))
for (k in 2:10){
pheatmap(mtx,
         kmeans_k=k,
         border_color=NA,
         cluster_cols=F,
         gaps_col=c(10,18,21,23,29,30),
         show_rownames=T)
}
par(mar=c(13,4,0,4)+0.1,las=3)
plot(1:ncol(mtx),colSums(mtx),pch=16,xaxt="n",xlab="",ylab="Number of shared genes per region")
axis(1,at=1:ncol(mtx),labels=colnames(mtx))
dev.off()


M1 <- c(9,10,28)
M2 <- c(24,26,28)
M3 <- c(32)
colors <- as.matrix(read.delim("order_EBnames.txt",row.names=1,header=F))[,1]
M1 <- colors[M1]
M2 <- colors[M2]
M3 <- colors[M3]

genes <- c()
for (i in M1){
  genes <- c(genes,names(hs.HC[[i]])[hs.HC[[i]]>log2(2)])
}
M1 <- names(table(genes))[table(genes)==length(M1)]
genes <- c()
for (i in M2){
  genes <- c(genes,names(hs.HC[[i]])[hs.HC[[i]]>log2(2)])
}
M2 <- names(table(genes))[table(genes)==length(M2)]
genes <- c()
for (i in M3){
  genes <- c(genes,names(hs.HC[[i]])[hs.HC[[i]]>log2(2)])
}
M3 <- names(table(genes))[table(genes)==length(M3)]

bg <- names(hs.HB[[1]])
dim(bg)
head(bg)
table(bg)

M1 <- M1[M1 %in% names(bg)[bg==1]]
M2 <- M2[M2 %in% names(bg)[bg==2]]
M3 <- M3[M3 %in% names(bg)[bg==3]]
length(M1)
length(M2)
length(M3)
write(M1,file="M1.txt",ncol=1)
write(M2,file="M2.txt",ncol=1)
write(M3,file="M3.txt",ncol=1)

library(DOSE)
library(clusterProfiler)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(ggplot2)

#library(enrichR)
#dbs <- listEnrichrDbs()
#dbs
symbols <- bitr(names(h)[h>10], fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL
write(symbols, "symbols.shared.txt", ncol=1)
#enriched <- enrichr(symbols, dbs[,1])
#printEnrich(enriched, "enrichr.shared.txt" , sep = "\t")
symbols <- bitr(names(h)[h<6], fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL
write(symbols, "symbols.unique.txt", ncol=1)
#enriched <- enrichr(symbols, dbs[,1])
#printEnrich(enriched, "enrichr.unique.txt" , sep = "\t")
symbols <- c()
for (i in (names(h))){
  s <- i
  try(s <- bitr(i, fromType="ENSEMBL", toType="SYMBOL", OrgDb="org.Hs.eg.db")$SYMBOL[1])
  symbols <- c(symbols, s)
}
write(rbind(symbols,h)[,order(h,decreasing=T)],"shared_genes.symbols.txt",sep="\t",ncol=2)

kmeans <- sapply(unique(ph$k3$kmeans$cluster), simplify=F, function (x) names(ph$k3$kmeans$cluster)[ph$k3$kmeans$cluster==x])
ego <- compareCluster(kmeans, fun="enrichGO", ont="BP", OrgDb='org.Hs.eg.db', universe=bg, pvalueCutoff=0.05, pAdjustMethod="BH")
pdf("shared_genes.kmeans.GO.pdf",width=8,height=4)
dotplot(ego, showCategory=30)
dev.off()

shared.ego <- list()
for (i in c(8,9,10,11,12)){
  print(i)
  print(sum(h>i))
  shared.ego[[as.character(i)]] <- enrichGO(gene = names(h)[h>i],
                  universe      = names(bg),
                  OrgDb         = org.Hs.eg.db,
                  keyType       = "ENSEMBL",
                  ont           = "BP",
                  pAdjustMethod = "BH",
                  qvalueCutoff  = 0.1,
                  readable      = TRUE)
}

unique.ego <- list()
for (i in c(4,5,6)){
  print(i)
  print(sum(h<i))
  unique.ego[[as.character(i)]] <- enrichGO(gene = names(h)[h<i],
                  universe      = names(bg),
                  OrgDb         = org.Hs.eg.db,
                  keyType       = "ENSEMBL",
                  ont           = "BP",
                  pAdjustMethod = "BH",
                  qvalueCutoff  = 0.1,
                  readable      = TRUE)
}

all.ego <- list()
for (i in c(8,9,10,11,12)){
  all.ego[[paste0(">",i)]] <- shared.ego[[as.character(i)]]
}
for (i in c(4,5,6)){
  all.ego[[paste0("<",i)]] <- unique.ego[[as.character(i)]]
}

dotplot(unique.ego$`6`, showCategory=30)

pdf("shared_genes.GO.pdf",width=10,height=8)
dotplot(merge_result(all.ego), by="count", showCategory=30)
dev.off()

q()

ego <- enrichGO(gene          = M1,
                universe      = names(bg),
                OrgDb         = org.Hs.eg.db,
                keyType       = "ENSEMBL",
                ont           = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.25,
                qvalueCutoff  = 1,
                readable      = TRUE)
ego
ego <- enrichGO(gene          = M2,
                universe      = names(bg),
                OrgDb         = org.Hs.eg.db,
                keyType       = "ENSEMBL",
                ont           = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.25,
                qvalueCutoff  = 1,
                readable      = TRUE)
ego
ego <- enrichGO(gene          = M3,
                universe      = names(bg),
                OrgDb         = org.Hs.eg.db,
                keyType       = "ENSEMBL",
                ont           = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.25,
                qvalueCutoff  = 1,
                readable      = TRUE)
ego

