setwd("~/brainmap/bulk")

library(gplots)
library(pheatmap)

mtx <- as.matrix(read.delim("ProtCoding.34regions_mtx.txt",header=T,check.names = F))
dim(mtx)
mtx[1:10,1:10]
samples <- colnames(mtx)
peaks <- rownames(mtx)
# remove QS samples if any
samples <- samples[grep("_MS",samples)]
mtx <- mtx[,samples]
mtx.num <- apply(mtx,2,as.numeric)
dimnames(mtx.num)[[1]] <- peaks
dim(mtx.num)

brain <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)-1]))
species <- unlist(lapply(strsplit(brain,""),function (x) x[1]))

# remove bad chimpanzee A samples if any
mtx.num <- mtx.num[,brain!="CHA"]
samples <- samples[brain!="CHA"]
mtx.num <- mtx.num[apply(mtx.num,1,function (x) sum(!is.finite(x))==0),]
dim(mtx.num)

brain <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)-1]))
species <- unlist(lapply(strsplit(brain,""),function (x) x[1]))

batches <- as.matrix(read.table("batches_34groups_4ctx_EBnames.txt",header=F,fill=T,row.names=2,sep="\t"))
col <- as.matrix(read.delim("order_EBnames.txt",header=F,row.names=2))[,2]

mtx.num <- mtx.num[,!(batches[samples,7]=="")]
brain <- brain[!(batches[samples,7]=="")]
species <- species[!(batches[samples,7]=="")]
samples <- samples[!(batches[samples,7]=="")]

unique(brain)
meta <- as.matrix(read.delim("meta.txt",header=F,row.names=1))
meta
sex <- meta[brain,1]
head(sex)
age <- as.numeric(meta[brain,3])
head(age)
rin <- as.numeric(meta[brain,4])
head(rin)
hemi <- meta[brain,5]
head(hemi)

dimnames(mtx.num)[[2]] <- batches[samples,7]
length(batches[samples,7])
regions <- colnames(mtx.num)
regions <- regions[!(regions=="")]
length(regions)
table(regions)

hists <- list()
for (i in unique(regions)){
  mtx <- mtx.num[,regions==i]
  hists[[i]] <- hist(rowMeans(mtx),breaks=0:50/10,plot=F)
}
par(mar=c(5,4,4,2))
plot(0,0,type="n",xlim=c(0,3),ylim=c(0,1000))
lapply(names(col), function (x) lines(hists[[x]]$mids,hists[[x]]$counts,col=col[x]))
mtx.num[1:5,1:5]


##### normalization by median #####
mtx.num <- apply(mtx.num,2,function (x) x-median(x))

hists <- list()
for (i in unique(regions)){
  mtx <- mtx.num[,regions==i]
  hists[[i]] <- hist(rowMeans(mtx),breaks=-50:50/10,plot=F)
}
par(mar=c(5,4,4,2))
plot(0,0,type="n",xlim=c(-1,1),ylim=c(0,1000))
lapply(names(col), function (x) lines(hists[[x]]$mids,hists[[x]]$counts,col=col[x]))
mtx.num[1:5,1:5]
##########


##### normalization by brain #####
norm <- mtx.num
for (b in unique(brain)){
  avg.b <- c()
  for (i in unique(regions)){
    m <- mtx.num[,regions==i&brain==b]
    s <- sum(regions==i&brain==b)
    if(s>1){
      m <- rowMeans(m)
    }
    if(s>0){
      avg.b <- cbind(avg.b,m)
    }
  }
  coeff <- rowMeans(avg.b)
  norm[,brain==b] <- norm[,brain==b] - coeff
}
dim(norm)

hists <- list()
for (i in unique(regions)){
  mtx <- norm[,regions==i]
  hists[[i]] <- hist(rowMeans(mtx),breaks=-50:50/10,plot=F)
}
par(mar=c(5,4,4,2))
plot(0,0,type="n",xlim=c(-1,1),ylim=c(0,1000))
lapply(names(col), function (x) lines(hists[[x]]$mids,hists[[x]]$counts,col=col[x]))
norm[1:5,1:5]

write.table(norm,"ProtCoding.34regions_mtx.txt.bybrain",quote=F,sep="\t")
##########


##### lm normalization by brain #####
lms <- lapply(1:nrow(mtx.num), function(x){
  lm(mtx.num[x,]~as.factor(brain))
})
residuals <- lapply(lms, function(x)residuals(summary(x)))
residuals <- do.call(rbind, residuals)
adj.residuals <- residuals+matrix(apply(mtx.num, 1, mean), nrow=nrow(residuals), ncol=ncol(residuals))
rownames(adj.residuals) <- rownames(mtx.num)
rownames(residuals) <- rownames(mtx.num)
norm <- adj.residuals
dimnames(norm)[[1]] <- rownames(mtx.num)
dimnames(norm)[[2]] <- colnames(mtx.num)
dim(norm)

hists <- list()
for (i in unique(regions)){
  mtx <- norm[,regions==i]
  hists[[i]] <- hist(rowMeans(mtx),breaks=-50:50/10,plot=F)
}
par(mar=c(5,4,4,2))
plot(0,0,type="n",xlim=c(-1,1),ylim=c(0,1000))
lapply(names(col), function (x) lines(hists[[x]]$mids,hists[[x]]$counts,col=col[x]))
norm[1:5,1:5]

write.table(norm,"ProtCoding.34regions_mtx.txt.lm.bybrain",quote=F,sep="\t")
###########

lm.norm <- as.matrix(read.delim("ProtCoding.34regions_mtx.txt.lm.bybrain",header=T,row.names=1,check.names=F))
norm <- as.matrix(read.delim("ProtCoding.34regions_mtx.txt.bybrain",header=T,row.names=1,check.names=F))

##### PCA #####
pca <- prcomp(t(norm),center=F,scale=F)
x <- pca$x[,1]
y <- pca$x[,2]
z <- pca$x[,3]
pov <- pca$sdev[1:10]/sum(pca$sdev)*100
pov <- round(pov,digits=2)

pch <- rep(16,length.out=length(samples))
names(pch) <- samples

pdf("PCA.pdf")
plot(x, y, xlab=paste0("Coordinate 1, ",pov[1],"%"), ylab=paste0("Coordinate 2, ",pov[2],"%"), main="Species", type="n")
col8 <- c("magenta","blue","red","forestgreen")[as.factor(species)]
text(x, y, labels = brain, col=col8, cex=.5)
plot(x, z, xlab=paste0("Coordinate 1, ",pov[1],"%"), ylab=paste0("Coordinate 3, ",pov[3],"%"), main="Species", type="n")
col8 <- c("magenta","blue","red","forestgreen")[as.factor(species)]
text(x, z, labels = brain, col=col8, cex=.5)

plot(x, y, xlab=paste0("Coordinate 1, ",pov[1],"%"), ylab=paste0("Coordinate 2, ",pov[2],"%"), main="Regions", type="n")
col8 <- col[as.factor(regions)]
text(x, y, labels = brain, col=col8, cex=.5)
plot(x, z, xlab=paste0("Coordinate 1, ",pov[1],"%"), ylab=paste0("Coordinate 3, ",pov[3],"%"), main="Regions", type="n")
col8 <- col[as.factor(regions)]
text(x, z, labels = brain, col=col8, cex=.5)
dev.off()
##########

##### clustering #####
avg <- c()
for (i in unique(regions)){
  m <- norm[,regions==i]
  s <- sum(regions==i)
  if(s>1){
    m <- rowMeans(m)
  }
  avg <- cbind(avg,m)
}
dimnames(avg)[[2]] <- unique(regions)
dimnames(avg)[[1]] <- peaks
write.table(avg,"ProtCoding.34regions_mtx.txt.avg",quote=F,sep="\t")

pdf("clustering.pdf",width=30,height=20)
par(mfrow=c(2,6))
d <- dist(t(avg))
for (m in c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty")){
  hcl <- hclust(d,method=m)
  plot(hcl,main=c(m,"Euclidean"))
}
d <- as.dist(1-cor(avg,method="pearson"))
for (m in c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty")){
  hcl <- hclust(d,method=m)
  plot(hcl,main=c(m,"1-cor"))
}
dev.off()

d <- as.dist(1-cor(avg,method="pearson"))
hcl <- hclust(d,method="complete")

modules <- as.matrix(read.delim("7clusters.txt",header=F,row.names=1))
modules <- modules[,1]
modules
##########

##### closest cluster by ind. #####
avg <- avg[,names(modules)]

means <- c()
for (i in unique(modules)){
  avg.i <- avg[,modules==i]
  s <- sum(modules==i)
  if(s>1){
    avg.i <- rowMeans(avg.i)
  }
  means <- cbind(means,avg.i)
}
dimnames(means)[[2]] <- unique(modules)
dim(means)

out <- matrix(0,length(unique(brain)),length(modules))
dimnames(out)[[1]] <- unique(brain)
dimnames(out)[[2]] <- names(modules)
colnames(out)
for (sp in unique(brain)){
  clust <- norm[,brain==sp]
  dimnames(clust)[[2]] <- batches[samples[brain==sp],7]
  regions.sp <- colnames(clust)
  
  avg.sp <- c()
  for (i in unique(regions.sp)){
    m <- clust[,regions.sp==i]
    s <- sum(regions.sp==i)
    if(s>1){
      m <- rowMeans(m)
    }
    avg.sp <- cbind(avg.sp,m)
  }
  dimnames(avg.sp)[[2]] <- unique(regions.sp)
  
  print(sp)
  print(dim(avg.sp))
  
  d <- 1-cor(cbind(means,avg.sp),method="pearson")

  d <- d[colnames(avg.sp),colnames(d) %in% unique(modules)]
  print(d)
  
  min.d <- apply(d,1,which.min)
  min.d <- colnames(d)[min.d]
  names(min.d) <- rownames(d)
  out[sp,names(min.d)] <- min.d
}

print(modules)
print(out)

out.num2 <- rbind(modules,apply(out,2,as.numeric))
dimnames(out.num2)[[1]] <- c("33All",rownames(out))
out.num2

pdf("clustering_byInd.pdf",width=8,height=12,pointsize=18)
heatmap.2(t(out.num2),
          density.info="none",  # turns off density plot inside color legend
          trace="none",         # turns off trace lines inside the heat map
          col=unique(col),       # use on color palette defined earlier
          #distfun=function(c) as.dist(1 - cor(t(c))),
          key=F,
          rowsep=1:ncol(out.num2),
          sepwidth=c(0.001,0.001),
          margins=c(4,16),
          cexCol=0.8,
          adjCol=c(NA,0.5),
          dendrogram="none",
          # RowSideColors=col[names(modules)],
          Colv=F,
          Rowv=F
)
dev.off()
###########

##### closest cluster #####
avg <- avg[,names(modules)]

means <- c()
for (i in unique(modules)){
  avg.i <- avg[,modules==i]
  s <- sum(modules==i)
  if(s>1){
    avg.i <- rowMeans(avg.i)
  }
  means <- cbind(means,avg.i)
}
dimnames(means)[[2]] <- unique(modules)
dim(means)

out <- matrix(0,length(unique(species)),length(modules))
dimnames(out)[[1]] <- unique(species)
dimnames(out)[[2]] <- names(modules)
colnames(out)
for (sp in unique(species)){
  clust <- norm[,species==sp]
  dimnames(clust)[[2]] <- batches[samples[species==sp],7]
  regions.sp <- colnames(clust)
  
  avg.sp <- c()
  for (i in unique(regions.sp)){
    m <- clust[,regions.sp==i]
    s <- sum(regions.sp==i)
    if(s>1){
      m <- rowMeans(m)
    }
    avg.sp <- cbind(avg.sp,m)
  }
  dimnames(avg.sp)[[2]] <- unique(regions.sp)
  
  print(sp)
  print(dim(avg.sp))
  
  d <- 1-cor(cbind(means,avg.sp),method="pearson")
  
  d <- d[colnames(avg.sp),colnames(d) %in% unique(modules)]
  print(d)
  
  min.d <- apply(d,1,which.min)
  min.d <- colnames(d)[min.d]
  names(min.d) <- rownames(d)
  out[sp,names(min.d)] <- min.d
}

print(modules)
print(out)

out.num2 <- rbind(modules,apply(out,2,as.numeric))
dimnames(out.num2)[[1]] <- c("33All",paste0(33,unique(species)))
out.num2 <- out.num2[c("33All","33B","33C","33H","33M"),]
out.num2

pdf("clustering_bySpecies.pdf",width=8,height=12,pointsize=18)
heatmap.2(t(out.num2),
          density.info="none",  # turns off density plot inside color legend
          trace="none",         # turns off trace lines inside the heat map
          col=unique(col),       # use on color palette defined earlier
          #distfun=function(c) as.dist(1 - cor(t(c))),
          key=F,
          rowsep=1:ncol(out.num2),
          sepwidth=c(0.001,0.001),
          margins=c(4,16),
          cexCol=0.8,
          adjCol=c(NA,0.5),
          dendrogram="none",
          # RowSideColors=col[names(modules)],
          Colv=F,
          Rowv=F
)
dev.off()
##########

##########
# ANOVA  #
##########

avg.reg <- list()
for (i in unique(regions)){
  avg.reg[[i]] <- c()
  names <- c()
  for (b in unique(species)[order(unique(species))]){
    m <- norm[,regions==i&species==b]
    s <- sum(regions==i&species==b)
    if(s>1){
      m <- rowMeans(m)
    }
    if(s>0){
      avg.reg[[i]] <- cbind(avg.reg[[i]],m)
      names <- c(names,b)
    }
  }
  dimnames(avg.reg[[i]])[[1]] <- rownames(norm)
  dimnames(avg.reg[[i]])[[2]] <- names
}

df <- t(norm)
res.man <- manova(df ~ regions)
pvals.r <- unlist(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]][[1]])) 
length(pvals.r)
BH <- p.adjust(pvals.r,method="BH")
sum(pvals.r<0.00001)
sum(BH<0.05)

res.man <- manova(df ~ sex)
pvals.r <- unlist(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]][[1]])) 
length(pvals.r)
BH <- p.adjust(pvals.r,method="BH")
sum(pvals.r<0.00001)
sum(BH<0.05)

res.man <- manova(df ~ age)
pvals.r <- unlist(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]][[1]])) 
length(pvals.r)
BH <- p.adjust(pvals.r,method="BH")
sum(pvals.r<0.00001)
sum(BH<0.05)

res.man <- manova(df ~ species)
pvals.s <- unlist(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]][[1]])) 
length(pvals.s)
BH <- p.adjust(pvals.s,method="BH")
sum(pvals.s<0.00001)
sum(BH<0.05)

res.man <- manova(df ~ rin)
pvals.r <- unlist(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]][[1]])) 
length(pvals.r)
BH <- p.adjust(pvals.r,method="BH")
sum(pvals.r<0.00001)
sum(BH<0.05)

res.man <- manova(df ~ hemi)
pvals.r <- unlist(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]][[1]])) 
length(pvals.r)
BH <- p.adjust(pvals.r,method="BH")
sum(pvals.r<0.00001)
sum(BH<0.05)

res.man <- manova(df ~ regions*species)
pvals.rs <- as.matrix(as.data.frame(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]])))[3,]
length(pvals.rs)
BH <- p.adjust(pvals.rs,method="BH")
sum(pvals.rs<0.00001)
sum(BH<0.05)


##### branch length #####
dat <- list()
for (sp in unique(species)){
  avg <- c()
  for (i in unique(regions[species==sp])){
    avg.i <- c()
    n.i <- 0
    m <- norm[,regions==i&species==sp]
    s <- sum(regions==i&species==sp)
    if(s>1){
      m <- rowMeans(m)
    }
    if(s>0){
      avg.i <- cbind(avg.i,m)
      n.i <- n.i + 1
    }
    if(n.i>1){
      avg.i <- rowMeans(avg.i)
    }
    avg <- cbind(avg,avg.i)
  }
  print(dim(avg))
  dimnames(avg)[[1]] <- peaks
  dimnames(avg)[[2]] <- unique(regions[species==sp])
  dat[[sp]] <- t(avg[pvals.rs<0.00001,])
}

clu <- as.matrix(read.delim("7clusters.txt",header=F,row.names=1))
clu

branch <- c()
pdf("hclust_branchLength.pdf",width=18,height=12)
par(mfrow=c(5,7))
d.mean <- 0
for (r in rownames(clu)){
  print(r)
  avg <- c()
  for (sp in unique(species)){
    avg <- rbind(avg,dat[[sp]][r,])
  }
  dimnames(avg)[[1]] <- unique(species)
  d <- as.dist(1-cor(t(avg),method="p"))
  d.mean <- d + d.mean
  hcl <- hclust(d, method="average")
  plot(as.dendrogram(hcl),main=r,ylim=c(0,1))
  print(hcl)
  for (i in 1:100){
    subavg <- avg[,sample(1:ncol(avg),0.5*ncol(avg))]
    d <- as.dist(1-cor(t(subavg),method="p"))
    hcl <- hclust(d, method="average")
    branch <- rbind(branch,c(sum(hcl$height),r))
  }
}
d.mean <- d.mean/length(unique(regions))
hcl <- hclust(d.mean, method="average")
plot(as.dendrogram(hcl),main="All regions",ylim=c(0,1))
dev.off()

pdf("boxplot_branchLength.pdf")
par(mar=c(18,4,4,2))
#o <- order(as.numeric(unlist(lapply(strsplit(names(val)," "),function (x) x[1]))))
num <- as.numeric(unlist(lapply(strsplit(branch[,2]," "),function (x) x[1])))
boxplot(as.numeric(branch[,1])~num,las=2,outline=F)
abline(h=median(as.numeric(branch[,1])),lty=2)
dev.off()

branch <- cbind(branch,clu[branch[,2],1])
pdf("boxplot_7clusters_branchLength.pdf",width=5,height=4)
#o <- order(as.numeric(unlist(lapply(strsplit(names(val)," "),function (x) x[1]))))
boxplot(as.numeric(branch[,1])~branch[,3],outline=F)
abline(h=median(as.numeric(branch[,1])),lty=2)
dev.off()
###########


###########################
# Species-specific genes  #
###########################

p.th <- 0.00001
qu <- quantile(unlist(lapply(avg.reg, function (x) x[pvals.rs<p.th,])),0.1)
qu
ref.hist <- hist(unlist(lapply(avg.reg, function (x) x[pvals.rs<p.th,])),breaks=-50:50/10,plot=F)

hs.HC <- list()
hs.HB <- list()
sum.sel <- list()
above0 <- list()
hists <- list()
for (i in names(avg.reg)){
  print(i)
  mtx <- avg.reg[[i]][pvals.rs<p.th,]
  fc.th <- quantile(c(
    abs(mtx[,"H"]-mtx[,"M"]),
    abs(mtx[,"C"]-mtx[,"M"]),
    abs(mtx[,"B"]-mtx[,"M"])
  ),0.1)
  print(fc.th)
  
  above0[[i]] <- apply(mtx,2, function (x) sum(x>qu))
  hists[[i]] <- apply(mtx,2, function (x) hist(x,breaks=-50:50/10,plot=F))
  
  sel <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["C"]-x["M"])>fc.th )
  sum.sel[[i]] <- sel
  ratio <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["C"]-x["M"])+0.001))
  hs.HC[[i]] <- ratio*sel
  
  sel <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["B"]-x["M"])>fc.th )
  sum.sel[[i]] <- sum(sum.sel[[i]] & sel)
  ratio <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["B"]-x["M"])+0.001))
  hs.HB[[i]] <- ratio*sel
}

above0 <- as.matrix(as.data.frame(above0,check.names=F))

pdf("evolution_rate_per_region.pdf",width=6,height=12)
col.sp <- c("magenta","blue","red","forestgreen")
names(col.sp) <- colnames(mtx)
par(mfrow=c(3,1))
for (r in names(col)){
  plot(ref.hist$mids,ref.hist$counts/33/4,xlim=c(-1.2,1.2),ylim=c(0,250),type="l",lwd=3,xlab="Expression",ylab="Number of genes",col="gray",main=r)
  for (s in unique(species)){
    lapply(names(col), function (x) lines(hists[[r]][[s]]$mids,hists[[r]][[s]]$counts,col=col.sp[s]))
  }
}
plot(unlist(sum.sel)[names(col)], colMeans(above0[,names(col)]), cex=2, pch=16, col=col)
par(mar=c(15,4,4,2))
barplot(unlist(sum.sel)[names(col)],las=2,main="|Ape - Mac| > th",col=col)
barplot(above0[,names(col)],beside=T,las=2,main="Expressed (B,C,H,M)",col=rep(col,each=4))
dev.off()

save(avg.reg,hs.HC,hs.HB,file=paste0("fc.th.dynamic.RData"))

##############################
# Quantitaive vs qualitative #
##############################
load("human-spec_ProtCoding_2.0.pdf.RData")
tails.HB <- lapply(hs.HB,function (x) x>log2(2) )
tails.HC <- lapply(hs.HC,function (x) x>log2(2) )

mtx <- as.matrix(read.delim("counts.34regions_mtx.txt",header=T,check.names = F))
dim(mtx)
mtx[1:10,1:10]
samples <- colnames(mtx)
peaks <- rownames(mtx)
# remove QS samples if any
samples <- samples[grep("_MS",samples)]
mtx <- mtx[,samples]
mtx.num <- apply(mtx,2,as.numeric)
dimnames(mtx.num)[[1]] <- peaks
dim(mtx.num)

brain <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)-1]))
species <- unlist(lapply(strsplit(brain,""),function (x) x[1]))

# remove bad chimpanzee A samples if any
mtx.num <- mtx.num[,brain!="CHA"]
samples <- samples[brain!="CHA"]
mtx.num <- mtx.num[apply(mtx.num,1,function (x) sum(!is.finite(x))==0),]
dim(mtx.num)

brain <- unlist(lapply(strsplit(samples,"_"),function (x) x[length(x)-1]))
species <- unlist(lapply(strsplit(brain,""),function (x) x[1]))

batches <- as.matrix(read.table("batches_34groups_4ctx_EBnames.txt",header=F,fill=T,row.names=2,sep="\t"))
col <- as.matrix(read.delim("order_EBnames.txt",header=F,row.names=2))[,2]

mtx.num <- mtx.num[,!(batches[samples,7]=="")]
brain <- brain[!(batches[samples,7]=="")]
species <- species[!(batches[samples,7]=="")]
samples <- samples[!(batches[samples,7]=="")]

dimnames(mtx.num)[[2]] <- batches[samples,7]
length(batches[samples,7])
regions <- colnames(mtx.num)
regions <- regions[!(regions=="")]
length(regions)
table(regions)

norm <- mtx.num

avg.reg <- list()
for (i in unique(regions)){
  avg.reg[[i]] <- c()
  names <- c()
  for (b in unique(species)[order(unique(species))]){
    m <- norm[,regions==i&species==b]
    s <- sum(regions==i&species==b)
    if(s>1){
      m <- rowMeans(m)
    }
    if(s>0){
      avg.reg[[i]] <- cbind(avg.reg[[i]],m)
      names <- c(names,b)
    }
  }
  dimnames(avg.reg[[i]])[[1]] <- rownames(norm)
  dimnames(avg.reg[[i]])[[2]] <- names
}

qualit.HB <- list()
qualit.HC <- list()
for (r in names(avg.reg)){
  genes <- names(tails.HB[[r]])[names(tails.HB[[r]]) %in% rownames(avg.reg[[r]])]
  avg.r <- avg.reg[[r]][genes,]
  tails.HB.r <- tails.HB[[r]][genes]
  tails.HC.r <- tails.HC[[r]][genes]
  
  print(dim(avg.r))
  qualit.HB[[r]] <- sum(avg.r[tails.HB.r,"B"]==0 | avg.r[tails.HB.r,"H"]==0)
  qualit.HC[[r]] <- sum(avg.r[tails.HC.r,"C"]==0 | avg.r[tails.HC.r,"H"]==0)
}

qualit.HB <- unlist(qualit.HB)
qualit.HC <- unlist(qualit.HC)

n <- unlist(lapply(strsplit(names(qualit.HB)," "), function (x) as.numeric(x[[1]])))
o <- order(n)
qualit.HB <- qualit.HB[names(qualit.HB)[o]]
qualit.HC <- qualit.HC[names(qualit.HC)[o]]
print(names(qualit.HB))
print(names(qualit.HC))

pdf("qualitative.pdf",width=20,height=20)
par(mar=c(15,4,4,2),las=2,mfrow=c(4,4))
min <- min(c(qualit.HB,qualit.HC))
max <- max(c(qualit.HB,qualit.HC))
bars <- barplot((qualit.HB+qualit.HC)/2,ylim=c(min,max),xlab="",ylab="Number of human-specific genes")
arrows(bars,qualit.HB,bars,qualit.HC,length=0)
dev.off()


##########################
# Compare two procedures #
##########################


pdf("compare_metrics_fc.th.dynamic.pdf",width=20,height=20)
par(mar=c(15,4,4,2),las=2,mfrow=c(4,4))
load("human-spec_ProtCoding_2.0.pdf.RData")
print(names(hs.HB))
print(names(hs.HC))
n <- unlist(lapply(strsplit(names(hs.HB)," "), function (x) as.numeric(x[[1]])))
o <- order(n)
hs.HB <- hs.HB[names(hs.HB)[o]]
hs.HC <- hs.HC[names(hs.HC)[o]]
print(names(hs.HB))
print(names(hs.HC))

tails.HB <- unlist(lapply(hs.HB,function (x) sum(x>log2(2)) ))
tails.HC <- unlist(lapply(hs.HC,function (x) sum(x>log2(2)) ))
min <- min(c(tails.HB,tails.HC))
max <- max(c(tails.HB,tails.HC))
bars <- barplot((tails.HB+tails.HC)/2,ylim=c(min,max),xlab="",ylab="Number of human-specific genes")
arrows(bars,tails.HB,bars,tails.HC,length=0)
old.number <- (tails.HB+tails.HC)/2

th <- quantile(unlist(avg.reg),0.1)
above0 <- unlist(lapply(avg.reg, function (x) sum(rowMeans(x)>th)))
above0.old <- above0[names(tails.HB)]

old.and <- unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(2) & hs.HC[[x]]>log2(2)) ))
old.and1.5 <- unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(1.5) & hs.HC[[x]]>log2(1.5)) ))
old.and0 <- unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>0 & hs.HC[[x]]>0) ))

tails.HB <- unlist(lapply(hs.HB,function (x) sum(x>log2(2)) / sum(x<(-1*log2(2))) ))
tails.HC <- unlist(lapply(hs.HC,function (x) sum(x>log2(2)) / sum(x<(-1*log2(2))) ))
tails.HB <- log2(tails.HB)
tails.HC <- log2(tails.HC)
min <- min(c(tails.HB,tails.HC))
max <- max(c(tails.HB,tails.HC))
plot((tails.HB+tails.HC)/2,pch=16,cex=2,ylim=c(min,max),xlab="",ylab="Human-specificity",xaxt="n",col=col)
axis(1,1:length(tails.HB),names(tails.HB))
arrows(1:length(tails.HB),tails.HB,1:length(tails.HC),tails.HC,length=0)
old.ratio <- (tails.HB+tails.HC)/2

old.ratio.and <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(2) & hs.HC[[x]]>log2(2)) / sum(hs.HB[[x]]<(-1*log2(2)) & hs.HC[[x]]<(-1*log2(2))) )) )
old.ratio.and1.5 <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(1.5) & hs.HC[[x]]>log2(1.5)) / sum(hs.HB[[x]]<(-1*log2(1.5)) & hs.HC[[x]]<(-1*log2(1.5))) )) )
old.ratio.and0 <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>0 & hs.HC[[x]]>0) / sum(hs.HB[[x]]<0 & hs.HC[[x]]<0) )) )

load("fc.th.dynamic.RData")
print(names(hs.HB))
print(names(hs.HC))
n <- unlist(lapply(strsplit(names(hs.HB)," "), function (x) as.numeric(x[[1]])))
o <- order(n)
hs.HB <- hs.HB[names(hs.HB)[o]]
hs.HC <- hs.HC[names(hs.HC)[o]]
print(names(hs.HB))
print(names(hs.HC))

tails.HB <- unlist(lapply(hs.HB,function (x) sum(x>log2(2)) ))
tails.HC <- unlist(lapply(hs.HC,function (x) sum(x>log2(2)) ))
min <- min(c(tails.HB,tails.HC))
max <- max(c(tails.HB,tails.HC))
bars <- barplot((tails.HB+tails.HC)/2,ylim=c(min,max),xlab="",ylab="Number of human-specific genes, adjusted")
arrows(bars,tails.HB,bars,tails.HC,length=0)
new.number <- (tails.HB+tails.HC)/2

th <- quantile(unlist(avg.reg),0.1)
above0 <- unlist(lapply(avg.reg, function (x) sum(rowMeans(x)>th)))
above0.new <- above0[names(tails.HB)]

new.and <- unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(2) & hs.HC[[x]]>log2(2)) ))
new.and1.5 <- unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(1.5) & hs.HC[[x]]>log2(1.5)) ))
new.and0 <- unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>0 & hs.HC[[x]]>0) ))

tails.HB <- unlist(lapply(hs.HB,function (x) sum(x>log2(2)) / sum(x<(-1*log2(2))) ))
tails.HC <- unlist(lapply(hs.HC,function (x) sum(x>log2(2)) / sum(x<(-1*log2(2))) ))
tails.HB <- log2(tails.HB)
tails.HC <- log2(tails.HC)
min <- min(c(tails.HB,tails.HC))
max <- max(c(tails.HB,tails.HC))
plot((tails.HB+tails.HC)/2,pch=16,cex=2,ylim=c(min,max),xlab="",ylab="Human-specificity, adjusted",xaxt="n",col=col)
axis(1,1:length(tails.HB),names(tails.HB))
arrows(1:length(tails.HB),tails.HB,1:length(tails.HC),tails.HC,length=0)
new.ratio <- (tails.HB+tails.HC)/2

new.ratio.and <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(2) & hs.HC[[x]]>log2(2)) / sum(hs.HB[[x]]<(-1*log2(2)) & hs.HC[[x]]<(-1*log2(2))) )) )
new.ratio.and1.5 <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(1.5) & hs.HC[[x]]>log2(1.5)) / sum(hs.HB[[x]]<(-1*log2(1.5)) & hs.HC[[x]]<(-1*log2(1.5))) )) )
new.ratio.and0 <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>0 & hs.HC[[x]]>0) / sum(hs.HB[[x]]<0 & hs.HC[[x]]<0) )) )

bars <- barplot(old.and,xlab="",ylab="Number of human-specific genes, AND, th=2")
bars <- barplot(new.and,xlab="",ylab="Number of human-specific genes, AND, th=2, adjusted")
plot(old.ratio.and,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=2",xaxt="n",col=col)
axis(1,1:length(tails.HB),names(tails.HB))
plot(new.ratio.and,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=2, adjusted",xaxt="n",col=col)
axis(1,1:length(tails.HB),names(tails.HB))
bars <- barplot(old.and1.5,xlab="",ylab="Number of human-specific genes, AND, th=1.5")
bars <- barplot(new.and1.5,xlab="",ylab="Number of human-specific genes, AND, th=1.5, adjusted")
plot(old.ratio.and1.5,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=1.5",xaxt="n",col=col)
axis(1,1:length(tails.HB),names(tails.HB))
plot(new.ratio.and1.5,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=1.5, adjusted",xaxt="n",col=col)
axis(1,1:length(tails.HB),names(tails.HB))
bars <- barplot(old.and0,xlab="",ylab="Number of human-specific genes, AND, th=0")
bars <- barplot(new.and0,xlab="",ylab="Number of human-specific genes, AND, th=0, adjusted")
plot(old.ratio.and0,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=0",xaxt="n",col=col)
axis(1,1:length(tails.HB),names(tails.HB))
plot(new.ratio.and0,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=0, adjusted",xaxt="n",col=col)
axis(1,1:length(tails.HB),names(tails.HB))

par(mfrow=c(4,6))
plot(new.number,old.number,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes, adjusted",ylab="Number of human-specific genes",main=c("Pearson's R",round(cor(new.number,old.number),digits=2)))
plot(new.ratio,old.ratio,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity, adjusted",ylab="Human-specificity",main=c("Pearson's R",round(cor(new.ratio,old.ratio),digits=2)))
plot.new()

plot(new.and,old.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes (AND, th=2), adjusted",ylab="Number of human-specific genes (AND, th=2)",main=c("Pearson's R",round(cor(new.and,old.and),digits=2)))
plot(new.and1.5,old.and1.5,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes (AND, th=1.5), adjusted",ylab="Number of human-specific genes (AND, th=1.5)",main=c("Pearson's R",round(cor(new.and1.5,old.and1.5),digits=2)))
plot(new.and0,old.and0,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes (AND, th=0), adjusted",ylab="Number of human-specific genes (AND, th=0)",main=c("Pearson's R",round(cor(new.and0,old.and0),digits=2)))

plot(old.and1.5,old.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes (AND, th=1.5)",ylab="Number of human-specific genes (AND, th=2)",main=c("Pearson's R",round(cor(old.and1.5,old.and),digits=2)))
plot(old.and0,old.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes (AND, th=0)",ylab="Number of human-specific genes (AND, th=2)",main=c("Pearson's R",round(cor(old.and0,old.and),digits=2)))
plot(new.and1.5,new.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes (AND, th=1.5), adjusted",ylab="Number of human-specific genes (AND, th=2), adjusted",main=c("Pearson's R",round(cor(new.and1.5,new.and),digits=2)))
plot(new.and0,new.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes (AND, th=0), adjusted",ylab="Number of human-specific genes (AND, th=2), adjusted",main=c("Pearson's R",round(cor(new.and0,new.and),digits=2)))

plot(old.ratio.and1.5,old.ratio.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity (AND, th=1.5)",ylab="Human-specificity (AND, th=2)",main=c("Pearson's R",round(cor(old.ratio.and1.5,old.ratio.and),digits=2)))
plot(old.ratio.and0,old.ratio.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity (AND, th=0)",ylab="Human-specificity (AND, th=2)",main=c("Pearson's R",round(cor(old.ratio.and0,old.ratio.and),digits=2)))
plot(new.ratio.and1.5,new.ratio.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity (AND, th=1.5), adjusted",ylab="Human-specificity (AND, th=2), adjusted",main=c("Pearson's R",round(cor(new.ratio.and1.5,new.ratio.and),digits=2)))
plot(new.ratio.and0,new.ratio.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity (AND, th=0), adjusted",ylab="Human-specificity (AND, th=2), adjusted",main=c("Pearson's R",round(cor(new.ratio.and0,new.ratio.and),digits=2)))

plot(new.number,new.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes, adjusted",ylab="Number of human-specific genes, AND, adjusted",main=c("Pearson's R",round(cor(new.number,new.and),digits=2)))
plot(old.number,old.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes",ylab="Number of human-specific genes, AND",main=c("Pearson's R",round(cor(old.number,old.and),digits=2)))
plot(new.ratio,new.ratio.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity, adjusted",ylab="Human-specificity, AND, adjusted",main=c("Pearson's R",round(cor(new.ratio,new.ratio.and),digits=2)))
plot(old.ratio,old.ratio.and,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity",ylab="Human-specificity, AND",main=c("Pearson's R",round(cor(old.ratio,old.ratio.and),digits=2)))

corr <- round(cor(new.number, above0.new), digits=3)
plot(new.number, above0.new, pch=16, xlab="Number of human-specific genes", ylab="Number of highly expressed genes", main=paste("adjusted, Pearson's R =",corr), col=col, cex=2)
corr <- round(cor(old.number, above0.old), digits=3)
plot(old.number, above0.old, pch=16, xlab="Number of human-specific genes", ylab="Number of highly expressed genes", main=paste("Pearson's R =",corr), col=col, cex=2)
corr <- round(cor(new.and, above0.new), digits=3)
plot(new.and, above0.new, pch=16, xlab="Number of human-specific genes, AND, th=2", ylab="Number of highly expressed genes", main=paste("adjusted, Pearson's R =",corr), col=col, cex=2)
corr <- round(cor(old.and, above0.old), digits=3)
plot(old.and, above0.old, pch=16, xlab="Number of human-specific genes, AND, th=2", ylab="Number of highly expressed genes", main=paste("Pearson's R =",corr), col=col, cex=2)
corr <- round(cor(new.and1.5, above0.new), digits=3)
plot(new.and1.5, above0.new, pch=16, xlab="Number of human-specific genes, AND, th=1.5", ylab="Number of highly expressed genes", main=paste("adjusted, Pearson's R =",corr), col=col, cex=2)
corr <- round(cor(old.and1.5, above0.old), digits=3)
plot(old.and1.5, above0.old, pch=16, xlab="Number of human-specific genes, AND, th=1.5", ylab="Number of highly expressed genes", main=paste("Pearson's R =",corr), col=col, cex=2)

plot(new.and,old.number,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes, AND, th=2, adjusted",ylab="Number of human-specific genes",main=c("Pearson's R",round(cor(new.and,old.number),digits=2)))
plot(new.and1.5,old.number,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes, AND, th=1.5, adjusted",ylab="Number of human-specific genes",main=c("Pearson's R",round(cor(new.and1.5,old.number),digits=2)))
plot(new.and0,old.number,pch=21,cex=2,col="darkgray",bg=col,xlab="Number of human-specific genes, AND, th=0, adjusted",ylab="Number of human-specific genes",main=c("Pearson's R",round(cor(new.and0,old.number),digits=2)))

plot(new.ratio.and,old.ratio,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity, AND, th=2, adjusted",ylab="Human-specificity",main=c("Pearson's R",round(cor(new.ratio.and,old.ratio),digits=2)))
plot(new.ratio.and1.5,old.ratio,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity, AND, th=1.5, adjusted",ylab="Human-specificity",main=c("Pearson's R",round(cor(new.ratio.and1.5,old.ratio),digits=2)))
plot(new.ratio.and0,old.ratio,pch=21,cex=2,col="darkgray",bg=col,xlab="Human-specificity, AND, th=0, adjusted",ylab="Human-specificity",main=c("Pearson's R",round(cor(new.ratio.and0,old.ratio),digits=2)))
dev.off()


########################
# ANOVA leave-one-out  #
########################

for (loo in unique(brain[species=="H"])){
  
  print(loo)
  norm.loo <- norm[,brain!=loo]
  print(dim(norm.loo))
  
  avg.reg <- list()
  for (i in unique(regions)){
    avg.reg[[i]] <- c()
    names <- c()
    for (b in unique(species)[order(unique(species))]){
      m <- norm.loo[,regions[brain!=loo]==i&species[brain!=loo]==b]
      s <- sum(regions[brain!=loo]==i&species[brain!=loo]==b)
      if(s>1){
        m <- rowMeans(m)
      }
      if(s>0){
        avg.reg[[i]] <- cbind(avg.reg[[i]],m)
        names <- c(names,b)
      }
    }
    dimnames(avg.reg[[i]])[[1]] <- rownames(norm.loo)
    dimnames(avg.reg[[i]])[[2]] <- names
  }
  
  df <- t(norm.loo)
  res.man <- manova(df ~ regions[brain!=loo])
  pvals.r <- unlist(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]][[1]])) 
  
  res.man <- manova(df ~ regions[brain!=loo]*species[brain!=loo])
  pvals.rs <- as.matrix(as.data.frame(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]])))[3,]
  
  
  ##### branch length #####
  dat <- list()
  for (sp in unique(species)){
    avg <- c()
    for (i in unique(regions[species==sp])){
      avg.i <- c()
      n.i <- 0
      m <- norm.loo[,regions[brain!=loo]==i&species[brain!=loo]==sp]
      s <- sum(regions[brain!=loo]==i&species[brain!=loo]==sp)
      if(s>1){
        m <- rowMeans(m)
      }
      if(s>0){
        avg.i <- cbind(avg.i,m)
        n.i <- n.i + 1
      }
      if(n.i>1){
        avg.i <- rowMeans(avg.i)
      }
      avg <- cbind(avg,avg.i)
    }
    print(dim(avg))
    dimnames(avg)[[1]] <- peaks
    dimnames(avg)[[2]] <- unique(regions[species==sp])
    dat[[sp]] <- t(avg[pvals.rs<0.00001,])
  }
  
  clu <- as.matrix(read.delim("7clusters.txt",header=F,row.names=1))
  clu
  
  branch <- c()
  pdf(paste0(loo,".hclust_branchLength.pdf"),width=18,height=12)
  par(mfrow=c(5,7))
  d.mean <- 0
  for (r in rownames(clu)){
    print(r)
    avg <- c()
    for (sp in unique(species)){
      avg <- rbind(avg,dat[[sp]][r,])
    }
    dimnames(avg)[[1]] <- unique(species)
    d <- as.dist(1-cor(t(avg),method="p"))
    d.mean <- d + d.mean
    hcl <- hclust(d, method="average")
    plot(as.dendrogram(hcl),main=r,ylim=c(0,1))
    print(hcl)
    for (i in 1:100){
      subavg <- avg[,sample(1:ncol(avg),0.5*ncol(avg))]
      d <- as.dist(1-cor(t(subavg),method="p"))
      hcl <- hclust(d, method="average")
      branch <- rbind(branch,c(sum(hcl$height),r))
    }
  }
  d.mean <- d.mean/length(unique(regions))
  hcl <- hclust(d.mean, method="average")
  plot(as.dendrogram(hcl),main="All regions",ylim=c(0,1))
  dev.off()
  
  pdf(paste0(loo,".boxplot_branchLength.pdf"))
  par(mar=c(18,4,4,2))
  #o <- order(as.numeric(unlist(lapply(strsplit(names(val)," "),function (x) x[1]))))
  num <- as.numeric(unlist(lapply(strsplit(branch[,2]," "),function (x) x[1])))
  boxplot(as.numeric(branch[,1])~num,las=2,outline=F)
  abline(h=median(as.numeric(branch[,1])),lty=2)
  dev.off()
  
  branch <- cbind(branch,clu[branch[,2],1])
  pdf(paste0(loo,".boxplot_7clusters_branchLength.pdf"),width=5,height=4)
  #o <- order(as.numeric(unlist(lapply(strsplit(names(val)," "),function (x) x[1]))))
  boxplot(as.numeric(branch[,1])~branch[,3],outline=F)
  abline(h=median(as.numeric(branch[,1])),lty=2)
  dev.off()
  ###########
  
  
  #########################################
  # Species-specific genes leave-one-out  #
  #########################################
  
  p.th <- 0.00001
  qu <- quantile(unlist(lapply(avg.reg, function (x) x[pvals.rs<p.th,])),0.1)
  ref.hist <- hist(unlist(lapply(avg.reg, function (x) x[pvals.rs<p.th,])),breaks=-50:50/10,plot=F)
  
  hs.HC <- list()
  hs.HB <- list()
  sum.sel <- list()
  above0 <- list()
  hists <- list()
  for (i in names(avg.reg)){
    print(i)
    mtx <- avg.reg[[i]][pvals.rs<p.th,]
    print(dim(mtx))
    fc.th <- quantile(c(
      abs(mtx[,"H"]-mtx[,"M"]),
      abs(mtx[,"C"]-mtx[,"M"]),
      abs(mtx[,"B"]-mtx[,"M"])
    ),0.1)
    print(fc.th)
    
    above0[[i]] <- apply(mtx,2, function (x) sum(x>qu))
    hists[[i]] <- apply(mtx,2, function (x) hist(x,breaks=-50:50/10,plot=F))
    
    sel <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["C"]-x["M"])>fc.th )
    sum.sel[[i]] <- sel
    ratio <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["C"]-x["M"])+0.001))
    hs.HC[[i]] <- ratio*sel
    
    sel <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["B"]-x["M"])>fc.th )
    sum.sel[[i]] <- sum(sum.sel[[i]] & sel)
    ratio <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["B"]-x["M"])+0.001))
    hs.HB[[i]] <- ratio*sel
  }
  
  above0 <- as.matrix(as.data.frame(above0,check.names=F))
  
  pdf(paste0(loo,"evolution_rate_per_region.pdf"),width=6,height=12)
  col.sp <- c("magenta","blue","red","forestgreen")
  names(col.sp) <- colnames(mtx)
  par(mfrow=c(3,1))
  for (r in names(col)){
    plot(ref.hist$mids,ref.hist$counts/33/4,xlim=c(-1.2,1.2),ylim=c(0,250),type="l",lwd=3,xlab="Expression",ylab="Number of genes",col="gray",main=r)
    for (s in unique(species)){
      lapply(names(col), function (x) lines(hists[[r]][[s]]$mids,hists[[r]][[s]]$counts,col=col.sp[s]))
    }
  }
  plot(unlist(sum.sel)[names(col)], colMeans(above0[,names(col)]), cex=2, pch=16, col=col)
  par(mar=c(15,4,4,2))
  barplot(unlist(sum.sel)[names(col)],las=2,main="|Ape - Mac| > th",col=col)
  barplot(above0[,names(col)],beside=T,las=2,main="Expressed (B,C,H,M)",col=rep(col,each=4))
  dev.off()
  
  save(avg.reg,hs.HC,hs.HB,file=paste0(loo,".fc.th.dynamic.RData"))

}

pdf("leave-one-out.HSratio.pdf",width=15,height=25)
par(mar=c(15,4,4,2),las=2,mfrow=c(5,3))
out <- c()
out1.5 <- c()
out0 <- c()
for (loo in unique(brain[species=="H"])){
  load(paste0(loo,".fc.th.dynamic.RData"))

  n <- unlist(lapply(strsplit(names(hs.HB)," "), function (x) as.numeric(x[[1]])))
  o <- order(n)
  hs.HB <- hs.HB[names(hs.HB)[o]]
  hs.HC <- hs.HC[names(hs.HC)[o]]
  
  new.ratio.and <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(2) & hs.HC[[x]]>log2(2)) / sum(hs.HB[[x]]<(-1*log2(2)) & hs.HC[[x]]<(-1*log2(2))) )) )
  new.ratio.and1.5 <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(1.5) & hs.HC[[x]]>log2(1.5)) / sum(hs.HB[[x]]<(-1*log2(1.5)) & hs.HC[[x]]<(-1*log2(1.5))) )) )
  new.ratio.and0 <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>0 & hs.HC[[x]]>0) / sum(hs.HB[[x]]<0 & hs.HC[[x]]<0) )) )
  
  plot(new.ratio.and0,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=0",xaxt="n",col=col)
  axis(1,1:length(hs.HB),names(hs.HB))
  plot(new.ratio.and1.5,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=1.5",xaxt="n",col=col)
  axis(1,1:length(hs.HB),names(hs.HB))
  plot(new.ratio.and,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=2",xaxt="n",col=col)
  axis(1,1:length(hs.HB),names(hs.HB))
  
  out <- rbind(out, new.ratio.and)
  out1.5 <- rbind(out1.5, new.ratio.and1.5)
  out0 <- rbind(out0, new.ratio.and0)
}
means <- apply(out0,2,mean)
sds <- apply(out0,2,sd)
plot(means,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=0",xaxt="n",col=col,ylim=c(min(out0),max(out0)))
arrows(1:length(hs.HB),means-sds,1:length(hs.HB),means+sds,length=0)
axis(1,1:length(hs.HB),names(hs.HB))
means <- apply(out1.5,2,mean)
sds <- apply(out1.5,2,sd)
plot(means,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=1.5",xaxt="n",col=col,ylim=c(min(out1.5),max(out1.5)))
arrows(1:length(hs.HB),means-sds,1:length(hs.HB),means+sds,length=0)
axis(1,1:length(hs.HB),names(hs.HB))
means <- apply(out,2,mean)
sds <- apply(out,2,sd)
plot(means,pch=16,cex=2,xlab="",ylab="Human-specificity, AND, th=2",xaxt="n",col=col,ylim=c(min(out),max(out)))
arrows(1:length(hs.HB),means-sds,1:length(hs.HB),means+sds,length=0)
axis(1,1:length(hs.HB),names(hs.HB))
dev.off()

##########


########################
#     Downsampling     #
########################

df <- t(norm)
res.man <- manova(df ~ regions)
pvals.r <- unlist(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]][[1]])) 
length(pvals.r)
BH <- p.adjust(pvals.r,method="BH")
sum(pvals.r<0.00001)
sum(BH<0.05)

res.man <- manova(df ~ regions*species)
pvals.rs <- as.matrix(as.data.frame(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]])))[3,]
length(pvals.rs)
BH <- p.adjust(pvals.rs,method="BH")
sum(pvals.rs<0.00001)
sum(BH<0.05)

  
  #########################################
  # Species-specific genes downsampling   #
  #########################################
  
  p.th <- 0.00001

  hs.HC <- list()
  hs.HB <- list()
  for (i in unique(regions)){
    print(i)
    norm.i <- norm[pvals.rs<p.th,regions==i]
    species.i <- species[regions==i]
    
    hs.HC[[i]] <- c()
    hs.HB[[i]] <- c()
    for(p in 1:100){
      mtx <- c()
      for (s in unique(species)){
        n1 <- (1:ncol(norm.i))[species.i==s]
        if(length(n1)>1){
          n1 <- sample(n1,1)
        }
        mtx <- cbind(mtx, norm.i[,n1])
      }
      dimnames(mtx)[[2]] <- unique(species)
      
      fc.th <- quantile(c(
        abs(mtx[,"H"]-mtx[,"M"]),
        abs(mtx[,"C"]-mtx[,"M"]),
        abs(mtx[,"B"]-mtx[,"M"])
      ),0.1)
    
      sel.HC <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["C"]-x["M"])>fc.th )
      ratio.HC <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["C"]-x["M"])+0.001))
      sel.HB <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["B"]-x["M"])>fc.th )
      ratio.HB <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["B"]-x["M"])+0.001))
      
      sel <- sel.HB*sel.HC
      print(sum(sel))
      
      hs.HB[[i]] <- rbind( hs.HB[[i]], ratio.HB*sel)
      hs.HC[[i]] <- rbind( hs.HC[[i]], ratio.HC*sel)
    }
  }
  
  save(avg.reg,hs.HC,hs.HB,file="n1.fc.th.dynamic.RData")
  
th <- 2

n <- unlist(lapply(strsplit(names(hs.HB)," "), function (x) as.numeric(x[[1]])))
o <- order(n)
hs.HB <- hs.HB[names(hs.HB)[o]]
hs.HC <- hs.HC[names(hs.HC)[o]]

perm <- list()
for (x in names(hs.HC)){
  perm[[x]] <- c()
  for (p in 1:nrow(hs.HC[[x]])){
    ratio <- log2( sum(hs.HB[[x]][p,]>log2(th) & hs.HC[[x]][p,]>log2(th)) / sum(hs.HB[[x]][p,]<(-1*log2(th)) & hs.HC[[x]][p,]<(-1*log2(th))) )
    perm[[x]] <- c(perm[[x]], ratio)
  }
}

library(vioplot)

pdf("downsampling.HSratio.pdf",width=15,height=10)
par(mar=c(15,4,4,2),las=2)
vioplot(perm,xlab="",ylab=paste0("Human-specificity, AND, th=",th),col="lightgray",border="gray",lineCol="gray",rectCol="gray",ylim=c(min(unlist(perm)),max(unlist(perm))))
dev.off()

##########


#######################################
#  ANOVA permutations + downsampling  #
#######################################

p.th <- 0.00001

hs.HC <- list()
hs.HB <- list()
for (i in unique(regions)){
  print(i)
  norm.i <- norm[pvals.rs<p.th,regions==i]
  species.i <- species[regions==i]
  
  hs.HC[[i]] <- c()
  hs.HB[[i]] <- c()
  for(p in 1:100){
    mtx <- c()
    for (s in c("B","C","H","M")){
      n1 <- (1:ncol(norm.i))[species.i==s]
      if(length(n1)>1){
        n1 <- sample(n1,1)
      }
      mtx <- cbind(mtx, norm.i[,n1])
    }
    dimnames(mtx)[[2]] <- sample(c("B","C","H","M"),4,replace=F)
    
    fc.th <- quantile(c(
      abs(mtx[,"H"]-mtx[,"M"]),
      abs(mtx[,"C"]-mtx[,"M"]),
      abs(mtx[,"B"]-mtx[,"M"])
    ),0.1)
  
    if(sum(colnames(mtx)==c("B","C","H","M"))!=4 & sum(colnames(mtx)==c("C","B","H","M"))!=4){
      sel.HC <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["C"]-x["M"])>fc.th )
      ratio.HC <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["C"]-x["M"])+0.001))
      sel.HB <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["B"]-x["M"])>fc.th )
      ratio.HB <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["B"]-x["M"])+0.001))
      
      sel <- sel.HB*sel.HC
      print(sum(sel))
      
      hs.HB[[i]] <- rbind( hs.HB[[i]], ratio.HB*sel)
      hs.HC[[i]] <- rbind( hs.HC[[i]], ratio.HC*sel)
    }
  }
}

save(hs.HC,hs.HB,file="permut.n1.fc.th.dynamic.RData")

load("permut.n1.fc.th.dynamic.RData")

th <- 2

n <- unlist(lapply(strsplit(names(hs.HB)," "), function (x) as.numeric(x[[1]])))
o <- order(n)
hs.HB <- hs.HB[names(hs.HB)[o]]
hs.HC <- hs.HC[names(hs.HC)[o]]

perm <- list()
for (x in names(hs.HC)){
  perm[[x]] <- c()
  for (p in 1:nrow(hs.HC[[x]])){
    ratio <- log2( sum(hs.HB[[x]][p,]>log2(th) & hs.HC[[x]][p,]>log2(th)) / sum(hs.HB[[x]][p,]<(-1*log2(th)) & hs.HC[[x]][p,]<(-1*log2(th))) )
    perm[[x]] <- c(perm[[x]], ratio)
  }
}

load("n1.fc.th.dynamic.RData")

n <- unlist(lapply(strsplit(names(hs.HB)," "), function (x) as.numeric(x[[1]])))
o <- order(n)
hs.HB <- hs.HB[names(hs.HB)[o]]
hs.HC <- hs.HC[names(hs.HC)[o]]

down <- list()
for (x in names(hs.HC)){
  down[[x]] <- c()
  for (p in 1:nrow(hs.HC[[x]])){
    ratio <- log2( sum(hs.HB[[x]][p,]>log2(th) & hs.HC[[x]][p,]>log2(th)) / sum(hs.HB[[x]][p,]<(-1*log2(th)) & hs.HC[[x]][p,]<(-1*log2(th))) )
    down[[x]] <- c(down[[x]], ratio)
  }
}

library(vioplot)

perm.med <- lapply(perm, median)
pval <- 1 - sapply(names(perm), function (x) sum(down[[x]]>perm.med[[x]])/length(down[[x]]))
star <- pval
star[pval<0.01] <- "***"
star[pval<0.05&pval>=0.01] <- "**"
star[pval<0.1&pval>=0.05] <- "*"
star[pval>=0.1] <- ""

pdf("permut.n1.HSratio.pdf",width=15,height=10)
par(mar=c(15,4,4,2),las=2)
vioplot(perm,wex=0.5,xlab="",ylab=paste0("Human-specificity, AND, th=",th),col="lightgray",border="dimgray",lineCol="dimgray",rectCol="dimgray",colMed="black")
vioplot(down,at=1:33+0.4,wex=0.5,add=T,col="gold",border="dimgray",lineCol="dimgray",rectCol="dimgray")
text(1:33+0.2,max(unlist(perm))+0.1,labels = pval,col="darkblue",srt=90)
text(1:33+0.2,max(unlist(perm))-0.1,labels = star,col="darkblue",srt=90)
for (i in 0:33){
  abline(v=i+0.7,lwd=0.4)
}
dev.off()


##########


################################
#  ANOVA extreme permutations  #
################################

avg.reg <- list()
for (i in unique(regions)){
  avg.reg[[i]] <- c()
  names <- c()
  for (b in unique(species)[order(unique(species))]){
    m <- norm[,regions==i&species==b]
    s <- sum(regions==i&species==b)
    if(s>1){
      m <- rowMeans(m)
    }
    if(s>0){
      avg.reg[[i]] <- cbind(avg.reg[[i]],m)
      names <- c(names,b)
    }
  }
  dimnames(avg.reg[[i]])[[1]] <- rownames(norm)
  dimnames(avg.reg[[i]])[[2]] <- names
}

df <- t(norm)
res.man <- manova(df ~ regions)
pvals.r <- unlist(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]][[1]])) 
length(pvals.r)
BH <- p.adjust(pvals.r,method="BH")
sum(pvals.r<0.00001)
sum(BH<0.05)

res.man <- manova(df ~ regions*species)
pvals.rs <- as.matrix(as.data.frame(lapply(summary.aov(res.man),function (x) x[["Pr(>F)"]])))[3,]
length(pvals.rs)
BH <- p.adjust(pvals.rs,method="BH")
sum(pvals.rs<0.00001)
sum(BH<0.05)

p.th <- 0.00001

hs.HC <- list()
hs.HB <- list()
for (i in names(avg.reg)){
  print(i)
  mtx <- avg.reg[[i]][pvals.rs<p.th,]
  fc.th <- quantile(c(
    abs(mtx[,"H"]-mtx[,"M"]),
    abs(mtx[,"C"]-mtx[,"M"]),
    abs(mtx[,"B"]-mtx[,"M"])
  ),0.1)
  print(fc.th)
  
  hs.HC[[i]] <- c()
  hs.HB[[i]] <- c()
  for(p in 1:100){
    mtx <- avg.reg[[i]][pvals.rs<p.th,]
    dimnames(mtx)[[2]] <- colnames(avg.reg[[i]])[sample(1:4,4,replace=F)]
    if(sum(colnames(mtx)==c("B","C","H","M"))!=4 & sum(colnames(mtx)==c("C","B","H","M"))!=4){
      sel.HC <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["C"]-x["M"])>fc.th )
      ratio.HC <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["C"]-x["M"])+0.001))
      sel.HB <- apply(mtx,1, function (x) abs(x["H"]-x["M"])>fc.th | abs(x["B"]-x["M"])>fc.th )
      ratio.HB <- apply(mtx,1, function (x) log2(abs(x["H"]-x["M"])+0.001) - log2(abs(x["B"]-x["M"])+0.001))
      
      sel <- sel.HB*sel.HC
      print(sum(sel))
      
      hs.HB[[i]] <- rbind( hs.HB[[i]], ratio.HB*sel)
      hs.HC[[i]] <- rbind( hs.HC[[i]], ratio.HC*sel)
    }
  }
}

save(hs.HC,hs.HB,file="permut.fc.th.dynamic.RData")

load("permut.fc.th.dynamic.RData")

th <- 2

n <- unlist(lapply(strsplit(names(hs.HB)," "), function (x) as.numeric(x[[1]])))
o <- order(n)
hs.HB <- hs.HB[names(hs.HB)[o]]
hs.HC <- hs.HC[names(hs.HC)[o]]

perm <- list()
for (x in names(hs.HC)){
  perm[[x]] <- c()
  for (p in 1:nrow(hs.HC[[x]])){
    ratio <- log2( sum(hs.HB[[x]][p,]>log2(th) & hs.HC[[x]][p,]>log2(th)) / sum(hs.HB[[x]][p,]<(-1*log2(th)) & hs.HC[[x]][p,]<(-1*log2(th))) )
    perm[[x]] <- c(perm[[x]], ratio)
  }
}

load("fc.th.dynamic.RData")
n <- unlist(lapply(strsplit(names(hs.HB)," "), function (x) as.numeric(x[[1]])))
o <- order(n)
hs.HB <- hs.HB[names(hs.HB)[o]]
hs.HC <- hs.HC[names(hs.HC)[o]]
new.ratio.and <- log2( unlist(sapply(names(hs.HB),function (x) sum(hs.HB[[x]]>log2(th) & hs.HC[[x]]>log2(th)) / sum(hs.HB[[x]]<(-1*log2(th)) & hs.HC[[x]]<(-1*log2(th))) )) )

library(vioplot)

pdf("permut.HSratio.pdf",width=7,height=7)
par(mar=c(15,4,4,2),las=2)
vioplot(perm,xlab="",ylab=paste0("Human-specificity, AND, th=",th),col="lightgray",border="gray",lineCol="gray",rectCol="gray",ylim=c(min(out),max(out)))

means <- apply(out,2,mean)
sds <- apply(out,2,sd)
points(means,pch=16,cex=2,col=col)
arrows(1:length(hs.HB),means-sds,1:length(hs.HB),means+sds,length=0)
dev.off()

for (i in 1:33){
  print(c(
    sum(perm[[i]]>mean(out[,i]))/length(perm[[i]]),
    sum(perm[[i]]>new.ratio.and[i])/length(perm[[i]])
    ))
}

##########

