library(RColorBrewer)
library(gplots)
library(ape)
#library(viridis)

mtx <- as.matrix(read.delim(commandArgs()[5],header=F,row.names=NULL))
dim(mtx)
samples <- mtx[1,]
mtx <- mtx[2:nrow(mtx),]
peaks <- mtx[,1]
mtx <- mtx[,2:ncol(mtx)]
mtx <- mtx[,grep("_MS",samples)]
samples <- samples[grep("_MS",samples)]
mtx.num <- apply(mtx,2,as.numeric)
dim(mtx.num)

brain <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)-1]))
id <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)]))
ms <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)-2]))
species <- unlist(lapply(strsplit(brain,""),function (x) x[1]))

mtx.num <- mtx.num[,brain!="CHA"]
samples <- samples[brain!="CHA"]
species <- species[brain!="CHA"]
if(length(commandArgs())>8){
   mtx.num <- mtx.num[,species==commandArgs()[9]]
   samples <- samples[species==commandArgs()[9]]
}
#mtx.num <- log10(mtx.num)
mtx.num <- mtx.num[apply(mtx.num,1,function (x) sum(!is.finite(x))==0),]
dim(mtx.num)

brain <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)-1]))
id <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)]))
ms <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)-2]))
species <- unlist(lapply(strsplit(brain,""),function (x) x[1]))

batches <- as.matrix(read.table(commandArgs()[6],header=F,fill=T,row.names=2,sep="\t"))

mtx.num <- mtx.num[,!(batches[samples,7]=="")]
id <- id[!(batches[samples,7]=="")]
ms <- ms[!(batches[samples,7]=="")]
brain <- brain[!(batches[samples,7]=="")]
species <- species[!(batches[samples,7]=="")]
samples <- samples[!(batches[samples,7]=="")]

norm <- mtx.num
min(norm)
max(norm)

clust <- norm
dim(clust)
dimnames(clust)[[2]] <- batches[samples,7]
length(batches[samples,7])
regions <- colnames(clust)
regions <- regions[!(regions=="")]
length(regions)
table(regions)

##### normalization by median (for expression dataset) #####
norm <- mtx.num
dim(norm)
norm <- apply(norm,2,function (x) x-median(x))
clust <- norm
dim(clust)
dimnames(clust)[[2]] <- batches[samples,7]
##########

for (b in unique(brain)){
   avg.b <- c()
   for (i in unique(regions)){
      m <- clust[,regions==i&brain==b]
      s <- sum(regions==i&brain==b)
      if(s>1){
         m <- rowMeans(m)
      }
      if(s>0){
         avg.b <- cbind(avg.b,m)
      }
   }
   coeff <- rowMeans(avg.b)
   norm[,brain==b] <- norm[,brain==b] - coeff
}
dim(norm)
#norm <- mtx.num

########## ANOVA ###########

length(regions)
df <- t(norm)
res.man <- manova(df ~ regions*species)
pvals.rs <- as.matrix(as.data.frame(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]])))[3,]
length(pvals.rs)
BH <- p.adjust(pvals.rs,method="BH")
sum(BH<0.001)
sum(pvals.rs<0.00001)

############################

clust <- norm
dim(clust)
dimnames(clust)[[2]] <- batches[samples,7]

#hcol <- colorRampPalette(c("#1565C0","white","#C62828"))(100)
hcol <- colorRampPalette(c("#6A1B9A","white","#EF6C00"))(94)
hcol <- hcol[c(1:45,50:94)]
length(hcol)
#hcol <- plasma(90)

dat <- list()
for (sp in unique(species)){
   avg <- c()
   for (i in unique(regions[species==sp])){
      avg.i <- c()
      n.i <- 0
         m <- clust[,regions==i&species==sp]
         s <- sum(regions==i&species==sp)
         if(s>1){
            m <- rowMeans(m)
         }
         if(s>0){
            avg.i <- cbind(avg.i,m)
            n.i <- n.i + 1
         }
      if(n.i>1){
         avg.i <- rowMeans(avg.i)
      }
      avg <- cbind(avg,avg.i)
   }
   print(dim(avg))
   dimnames(avg)[[1]] <- peaks
   dimnames(avg)[[2]] <- unique(regions[species==sp])
#   dat[[sp]] <- t(avg[BH<=0.001,])
   dat[[sp]] <- t(avg[pvals.rs<0.00001,])
}

for (sp in unique(species)){
   dat[[sp]] <- dat[[sp]][rownames(dat[["H"]]),]
}

dat[["HCM"]] <- log2(abs(dat[["H"]]-dat[["M"]])+0.001) - log2(abs(dat[["C"]]-dat[["M"]])+0.001)
dat[["HBM"]] <- log2(abs(dat[["H"]]-dat[["M"]])+0.001) - log2(abs(dat[["B"]]-dat[["M"]])+0.001)
dat[["HCM"]] <- (dat[["HCM"]]+dat[["HBM"]])/2

pdf(paste0("P0.00001/hclust_",commandArgs()[7],".pdf"),width=15,height=50)
par(mfcol=c(12,1),mar=c(2,2,2,1))
d <- as.dist(1-cor(dat[["HCM"]],method="s"))
for (m in c("ward.D","ward.D2","complete","average","mcquitty")){
  h <- hclust(d,method=m)
  g <- cutree(h,k=3:5)
  write.table(g,paste0("P0.00001/S_",m,".",commandArgs()[7],".txt"),sep="\t",quote=F)
  for (i in 3:5){
    plot(h,main=c("Spearman's R",paste(m,i)))
    rect.hclust(h, k=i, border="red")
  }
}
d <- as.dist(1-cor(dat[["HCM"]],method="p"))
for (m in c("ward.D","ward.D2","complete","average","mcquitty")){
  h <- hclust(d,method=m)
  g <- cutree(h,k=3:5)
  write.table(g,paste0("P0.00001/P_",m,".",commandArgs()[7],".txt"),sep="\t",quote=F)
  for (i in 3:5){
    plot(h,main=c("Pearson's R",paste(m,i)))
    rect.hclust(h, k=i, border="red")
  }
}
d <- dist(t(dat[["HCM"]]))
for (m in c("ward.D","ward.D2","complete","average","mcquitty")){
  h <- hclust(d,method=m)
  g <- cutree(h,k=3:5)
  write.table(g,paste0("P0.00001/E_",m,".",commandArgs()[7],".txt"),sep="\t",quote=F)
  for (i in 3:5){
    plot(h,main=c("Euclidean",paste(m,i)))
    rect.hclust(h, k=i, border="red")
  }
} 
dev.off()


o <- as.matrix(read.table("../ourexpression/order_EBnames.txt",comment.char="",header=F,sep="\t",row.names=2))
o.ks <- rownames(o)
pdf(paste0("P0.00001/HCM.lines_",commandArgs()[7],".pdf"),width=8,height=48,pointsize=12)
par(mfcol=c(12,1),mar=c(14,4,4,2),las=2)
for (d in c("S","P","E")){
  for (m in c("ward.D","ward.D2","complete","average","mcquitty")){
    g <- as.matrix(read.delim(paste0("P0.00001/",d,"_",m,".",commandArgs()[7],".txt"),header=T,row.names=1))
    for (k in 1:ncol(g)){
       mean <- apply(dat[["HCM"]][o.ks,],1,function (x) tapply(x,g[,k],mean))
       dimnames(mean)[[1]] <- paste(names(table(g[,k])),table(g[,k]))
       plot(mean[1,],xlab="",ylab="Mean value",ylim=c(min(mean),max(mean)),xaxt="n",main=paste(d,m,colnames(g)[k]),type="l",pch=16,col=rainbow(nrow(mean))[1],lwd=2)
       for (j in 2:nrow(mean)){
          lines(mean[j,],col=rainbow(nrow(mean))[j],lwd=2)
       }
       axis(1,1:ncol(mean),labels=colnames(mean))
    }
  }
}
dev.off()
pdf(paste0("P0.00001/HCM.sortedlines_",commandArgs()[7],".pdf"),width=6,height=12,pointsize=12)
par(mfcol=c(3,1),mar=c(14,4,4,2),las=2)
for (d in c("S","P","E")){
  for (m in c("ward.D","ward.D2","complete","average","mcquitty")){
    g <- as.matrix(read.delim(paste0("P0.00001/",d,"_",m,".",commandArgs()[7],".txt"),header=T,row.names=1))
    for (k in 3){
       mean <- apply(dat[["HCM"]][o.ks,],1,function (x) tapply(x,g[,k],mean))
       dimnames(mean)[[1]] <- paste(names(table(g[,k])),table(g[,k]))
       print(rownames(mean))
       for (i in 1:nrow(mean)){
          plot(mean[i,order(mean[i,])],xlab="",xaxt="n",ylim=c(min(mean),max(mean)),main=paste(d,m),type="l",lwd=2)
          axis(1,1:ncol(mean),labels=colnames(mean[,order(mean[i,])]))
       }
    }
  }
}
dev.off()


library(pheatmap)
for (d in c("S","P","E")){
  pdf(paste0("P0.00001/",d,".HCM.mean_",commandArgs()[7],".pdf"),width=10,height=5,pointsize=24)
  for (m in c("ward.D","ward.D2","complete","average","mcquitty")){
    g <- as.matrix(read.delim(paste0("P0.00001/",d,"_",m,".",commandArgs()[7],".txt"),header=T,row.names=1))
    for (k in 1:ncol(g)){
       mean <- apply(dat[["HCM"]][o.ks,],1,function (x) tapply(x,g[,k],mean))
       dimnames(mean)[[1]] <- paste(names(table(g[,k])),table(g[,k]))
       pheatmap(mean,
       color=hcol,
       breaks=seq(-2,2,length.out=91),
       border_color=NA,
       na_col="white",
       cluster_rows=F,
       cluster_cols=F,
       show_rownames=T,
       main=paste(d,m,colnames(g)[k]))
    }
  }
  dev.off()
}


#library(squash)
#hcol <- rev(jet(100))
#breaks <- seq(0.75,1.25,length.out=101)
#breaks[1] <- 0
#breaks[length(breaks)] <- 2
breaks <- seq(0,2,length.out=101)
#hcol <- colorRampPalette(c("#C62828","white","#1565C0"))(100)
#hcol <- colorRampPalette(c("#1A1970","white","#ED7601"))(100)
hcol <- colorRampPalette(c("purple","purple","purple","black","yellow","yellow","yellow"))(100)

d <- 1-cor(dat[["HCM"]],method="s")
mtx <- d
d <- as.dist(d)
for (m in "average"){
  tiff(paste0("P0.00001/S_",m,".ProtCoding.tiff"),compression="lzw",res=300,pointsize=5.75,width=2400,height=2400)
  pheatmap(mtx,
  clustering_distance_rows=d,
  clustering_distance_cols=d,
  clustering_method=m,
  cutree_rows=3,
  cutree_cols=3,
  show_rownames=F,
  show_colnames=F,
  color=hcol,
  border_color=F,
  breaks=breaks,
  na_col="black",
  main=paste("1 - Spearman's R,",m))
  dev.off()
}
