# gene.ensembl = read.table("~/Downloads/antibody.ensembl.txt",sep=";",header=T)
# row.names(gene.ensembl) = gene.ensembl$ensembl_gene_id
# 
# human.spec = read.table("~/Downloads/human-spec_ProtCoding_2.0.pdf.txt",sep="\t",header=T)
# human.spec.sub = human.spec[which(human.spec$Type=="H<B<M or M<B<H"| human.spec$Type == "H<C<M or M<C<H"),]
# human.spec.sub.selected = human.spec.sub[human.spec.sub$Gene %in% gene.ensembl$ensembl_gene_id,]
# 
# human.spec.lipid = read.table("~/Downloads/human-spec_Lipid_2.0.pdf.txt",sep="\t",header=T)
# human.spec.sub.selected.v2 = human.spec.sub.selected[human.spec.sub.selected$Gene %in% human.spec.lipid$Gene,]
# 
# df = data.frame(GENE = as.character(gene.ensembl[as.character(human.spec.sub.selected$Gene),2]), REGION = as.character(human.spec.sub.selected[,3]))
# write.table(unique(df),"~/Downloads/new.antibody.table.txt",col.names = T,row.names = F,quote=F,sep="\t")
# 
# df2 = data.frame(GENE = as.character(gene.ensembl[as.character(human.spec.sub.selected.v2$Gene),2]), REGION = as.character(human.spec.sub.selected.v2[,3]))
# write.table(unique(df2),"~/Downloads/new.antibody.lipid.table.txt",col.names = T,row.names = F,quote=F,sep="\t")
# 
# tpm.table = read.table("~/Downloads/brain.map.tpm.tab")
# 
# count.table = read.table("~/Downloads/brainmap.counts",sep=",",header=T)
# rownames(count.table) = count.table[,1]
# count.table = count.table[,-1]
# 
# human.length = read.table("~/Downloads/human.v93.txt",header=T)
# 
# count.table.v2 = count.table[rownames(count.table) %in% human.length$gene, ]
# tpm.table.v2 = tpm.table[rownames(tpm.table) %in% human.length$gene, ]
# intersect.genes = intersect(rownames(tpm.table.v2),rownames(count.table.v2))
# tpm.table.v2 = tpm.table.v2[intersect.genes,]
# count.table.v2 = count.table.v2[intersect.genes,]
# rownames(human.length) = human.length$gene
# human.length.v2 = human.length[intersect.genes,]
# 
# pdf("~/Downloads/human.correlation.pdf")
# for (j in 1:4){
#   new.tpm = apply(count.table.v2,2,function(x){x/(human.length.v2[,(j+1)]/10^3)})
#   new.tpm.v2 = apply(new.tpm,2,function(x){x/(sum(x)/10^6)})
#   
#   human.check = colnames(tpm.table.v2)[grepl("H",colnames(tpm.table.v2))]
#   correlation.list = c()
#   for (i in human.check){
#     two.cors = cor(tpm.table.v2[,i],new.tpm.v2[,i],method="pearson")
#     correlation.list = c(correlation.list,two.cors)
#   }
#   
#   hist(correlation.list, xlab = "Cor", main = paste0("Usage of ",j, " length"))
# }
# 
# dev.off()

norm.app = "no"
probe.cols = "mean"
cls.num = 3

our.clusters = read.table("~/Downloads/Expression/allen/new.assignments.txt",sep="\t",comment.char = "")
idx = which(our.clusters$V2 %in% "#26A59A")
our.clusters = our.clusters[-idx,]

if (norm.app == "brain"){
  our.expression = read.table("~/Downloads/Expression/ProtCoding.34regions_mtx.txt.bybrain")
}else{
  our.expression = read.table("~/Downloads/Expression/ProtCoding.34regions_mtx.txt")
}
our.meta = read.table("~/Downloads/Expression/annot_samples.txt",sep="\t")
our.meta.human = our.meta[(our.meta$V5 %in% unlist(strsplit(colnames(our.expression),"_"))[seq(4,6*dim(our.expression)[2],6)] & our.meta$V6 %in% c("HA","HB","HC","HD")),]
rownames(our.meta.human) = paste0("X",rownames(our.meta.human))
our.expression.sub = our.expression[,rownames(our.meta.human)]


cnt = 1
for (i in unique(our.clusters$V2)){
  regions = as.character(our.clusters[which(our.clusters[,2] == i),1])
  if(cnt == 1){
    our.my = rowMeans(our.expression.sub[,rownames(our.meta.human)[which(our.meta.human[,7] %in% regions)]],na.rm = T)
  }else{
    our.my = cbind(our.my,rowMeans(our.expression.sub[,rownames(our.meta.human)[which(our.meta.human[,7] %in% regions)]],na.rm = T))
  }
  cnt = cnt+1
}

micro.list = c("~/Downloads/Expression/allen/normalized_microarray_donor9861/OurExpression.txt",
               "~/Downloads/Expression/allen/normalized_microarray_donor10021/OurExpression.txt",
               "~/Downloads/Expression/allen/normalized_microarray_donor12876/OurExpression.txt",
               "~/Downloads/Expression/allen/normalized_microarray_donor14380/OurExpression.txt",
               "~/Downloads/Expression/allen/normalized_microarray_donor15496/OurExpression.txt",
               "~/Downloads/Expression/allen/normalized_microarray_donor15697/OurExpression.txt")

gene.express = list()
for(i in 1:length(micro.list)){
  abc.m = read.table(micro.list[i],sep="\t",header=T)
  abc.m = apply(abc.m[,colnames(abc.m) %in% as.character(our.clusters[,3])],2,as.numeric)
  if (norm.app == "brain"){
    abc.m = abc.m - median(abc.m,na.rm = T)
  }
  gene.express[[i]]  = abc.m
}


new.genes.cb = gene.express[[1]]
for (i in 2:6){
  new.genes.cb = cbind(new.genes.cb, gene.express[[i]])
}


new.names.cols = unique(colnames(new.genes.cb))
cnt = 1
for (i in new.names.cols){
  values = rowMeans(new.genes.cb[,colnames(new.genes.cb) %in% i])
  if (cnt == 1){
    combined.regions = values
  }else{
    combined.regions = cbind(combined.regions,values)
  }
  cnt = cnt +1
}

colnames(combined.regions) = new.names.cols
combined.regions = combined.regions[,-24]

d.cols = as.dist(1 - cor(combined.regions))
hcl.cols = hclust(d.cols,method="ward.D2")

if (cls.num > 1){
  cls.cols = cutree(hcl.cols,k=cls.num)
  cnt = 1
  for (i in 1:cls.num){
    regions = names(which(cls.cols == i))
      if(cnt == 1){
        our.d2 = rowMeans(new.genes.cb[,which(colnames(new.genes.cb) %in% regions)],na.rm = T)
      }else{
        our.d2 = cbind(our.d2,rowMeans(new.genes.cb[,which(colnames(new.genes.cb) %in% regions)],na.rm = T))
      }
      cnt = cnt+1
  }
}else{
  cnt = 1
  for (i in unique(our.clusters$V2)){
    regions = unique(as.character(our.clusters[which(our.clusters[,2] == i),3]))
    if(cnt == 1){
        our.d2 = rowMeans(new.genes.cb[,which(colnames(new.genes.cb) %in% regions)],na.rm = T)
    }else{
      our.d2 = cbind(our.d2,rowMeans(new.genes.cb[,which(colnames(new.genes.cb) %in% regions)],na.rm = T))
    }
    cnt = cnt+1
  }
}

convertHumanEntrez <- function(x){
  require("biomaRt")
  human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
  mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
  genesV2 = getLDS(attributes = c("entrezgene"), filters = "entrezgene", values = x , mart = human, attributesL = c("ensembl_gene_id"), martL = human, uniqueRows=T)
  humanx <- genesV2
  # Print the first 6 genes found to the screen
  print(head(humanx))
  return(humanx)
}

all.probes = read.csv("~/Downloads/Expression/allen/normalized_microarray_donor9861/Probes.csv")
entrez.human = convertHumanEntrez(all.probes$entrez_id)

genes.our = read.table("~/Downloads/Expression/genes.names.txt",header=F)
entrez.human.v2 = entrez.human[entrez.human$Gene.stable.ID %in% genes.our$V1, ]
rownames(entrez.human.v2) = entrez.human.v2$NCBI.gene.ID


our.d3 = cbind(our.d2,all.probes$entrez_id)
#colnames(our.d3) = c("V1","V2","V3","V4","V5","V6","V7")
colnames(our.d3) = c("V1","V2","V3","V4")
our.d3 = as.data.frame(our.d3)
our.d3 = our.d3[-which(is.na(our.d3[,dim(our.d3)[2]])),]
if (probe.cols == "mean"){
#  our.d4 = ddply(our.d3,~V7,summarise,V1=mean(V1),V2=mean(V2),V3=mean(V3),V4=mean(V4),V5=mean(V5),V6=mean(V6))
  our.d4 = ddply(our.d3,~V4,summarise,V1=mean(V1),V2=mean(V2),V3=mean(V3))
  }else if(probe.cols == "max"){
  our.d4 = ddply(our.d3,~V7,summarise,V1=max(V1),V2=max(V2),V3=max(V3),V4=max(V4),V5=max(V5),V6=max(V6))
  
}
rownames(our.d4) = our.d4[,1]

our.d5 = our.d4[rownames(entrez.human.v2), ]
rownames(our.my) = genes.our$V1
our.my = our.my[entrez.human.v2$Gene.stable.ID,]


all.test.cor = matrix(0,nrow=6,ncol=cls.num)
for (i in 1:6){
  for (j in 2:(cls.num+1)){
    cor.value  = cor(our.my[,i],our.d5[,j],method="p")
    all.test.cor[i,(j-1)] = cor.value
  }
}

library(pheatmap)
pdf(paste0("~/Downloads/Expression/corr.",norm.app,".norm.",probe.cols,".",cls.num,".pdf"),width=6,height = 6)
pheatmap(all.test.cor,cluster_rows = F,cluster_cols = F)
dev.off()

write.table(all.test.cor,paste0("~/Downloads/Expression/corr.",norm.app,".norm.",probe.cols,".",cls.num,".txt"),
            sep="\t",quote=F,col.names = F,row.names = F)