yzbrown<-"#815733"
yzpurple<-"#7a28a8"
yzpink<-"#e00077"
yzblack<-"#000000"
yzblue<-"#004182"
yzgray<-"#aaaaaa"
yzorange<-"#ff7d00"

args = commandArgs(trailingOnly=TRUE)

pdffile=paste0('novel.allele.counts.pdf')

hlagenes<-c("HLA-F","HLA-V","HLA-P","HLA-G","HLA-H","HLA-T","HLA-K","HLA-U","HLA-A","HLA-W","HLA-Y","HLA-J","HLA-L","HLA-N","HLA-E","HLA-C","HLA-B","HLA-S","MICA","MICB","HLA-DRA","HLA-DRB345","HLA-DRB1","HLA-DQA1","HLA-DQB1","HLA-DQA2","HLA-DOB","TAP2","TAP1","HLA-DMB","HLA-DMA","HLA-DOA","HLA-DPA1","HLA-DPB1","HLA-DPA2","HLA-DPB2")

classI <-c("HLA-A","HLA-B","HLA-C","HLA-E","HLA-F","HLA-G","HLA-H","HLA-HFE","HLA-J","HLA-K","HLA-L","HLA-N","HLA-P","HLA-S","HLA-T","HLA-U","HLA-V","HLA-W","HLA-Y")

classII <-c("HLA-DMA","HLA-DMB","HLA-DOA","HLA-DOB","HLA-DPA1","HLA-DPA2","HLA-DPB1","HLA-DPB2","HLA-DQA1","HLA-DQA2","HLA-DQB1","HLA-DRA","HLA-DRB1","HLA-DRB2","HLA-DRB3","HLA-DRB4","HLA-DRB5","HLA-DRB6","HLA-DRB7","HLA-DRB8","HLA-DRB9", "HLA-DRB345")

kirgenes<-c("KIR3DL3", "KIR2DS2", "KIR2DL3","KIR2DL2", "KIR2DL5B", "KIR2DP1","KIR2DL1","KIR3DP1","KIR2DL4","KIR3DS1","KIR2DL5A","KIR2DS3", "KIR2DS5","KIR3DL1", "KIR2DS1", "KIR2DS4","KIR3DL2")

classic<-c("HLA-A", "HLA-B", "HLA-C", "HLA-DRB1", "HLA-DPB1", "HLA-DQB1")



allgenes <- c(hlagenes, kirgenes)

genecol = rep(yzgray, length(allgenes))
genecol[allgenes %in% classI] = yzblack
genecol[allgenes %in% classII] = yzbrown
genecol[allgenes %in% kirgenes] = yzblue

pseudogene <- c("HLA-DPA2", "HLA-DPA3","HLA-DPB2","HLA-DRB2","HLA-DRB6","HLA-DRB7","HLA-DRB8","HLA-DRB9","HLA-H","HLA-J","HLA-K","HLA-L","HLA-N","HLA-P","HLA-S","HLA-T","HLA-U","HLA-V","HLA-W","HLA-X","HLA-Y","HLA-Z","KIR2DP1","KIR3DP1")
pseudogene <- pseudogene[pseudogene %in% allgenes]

hlacoding <- hlagenes[!hlagenes %in% pseudogene]
hlanoncoding <- hlagenes[hlagenes %in% pseudogene]
kircoding <- kirgenes[!kirgenes %in% pseudogene]
kirnoncoding <- kirgenes[kirgenes %in% pseudogene]

genelabels = allgenes
for (i in 1:length(genelabels)){
if(genelabels[i] %in% pseudogene) genelabels[i] = paste0("*", genelabels[i])
}

# overall
if(T){

  # load unique gene seq data
  da00 <- read.csv(file="all.gene.uniq.counts.txt", header=F,sep="\t")
  da01 <- read.csv(file="novel.gene.uniq.counts.txt", header=F,sep="\t")
  # load unique cds data
  da10 <- read.csv(file="all.cds.summary", header=F,sep=" ")[,2:3]
  da11 <- read.csv(file="novel.cds.summary", header=F,sep=" ")[,2:3]

  # load unique prot data
  da20 <- read.csv(file="all.prot.summary", header=F,sep=" ")[,2:3]
  da21 <- read.csv(file="novel.prot.summary", header=F,sep=" ")[,2:3]

  tmp <- sum(da00[da00[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), 2])
  da00<-rbind(da00, c("HLA-DRB345", tmp))
  da00 <- da00[!da00[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), ]

  tmp <- sum(da01[da01[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), 2])
  da01<-rbind(da01, c("HLA-DRB345", tmp))
  da01 <- da01[!da01[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), ]

  tmp <- sum(da10[da10[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), 2])
  da10<-rbind(da10, c("HLA-DRB345", tmp))
  da10 <- da10[!da10[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), ]


  tmp <- sum(da11[da11[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), 2])
  da11<-rbind(da11, c("HLA-DRB345", tmp))
  da11 <- da11[!da11[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), ]


  tmp <- sum(da20[da20[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), 2])
  da20<-rbind(da20, c("HLA-DRB345", tmp))
  da20 <- da20[!da20[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), ]


  tmp <- sum(da21[da21[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), 2])
  da21<-rbind(da21, c("HLA-DRB345", tmp))
  da21 <- da21[!da21[,1] %in% c("HLA-DRB3","HLA-DRB4","HLA-DRB5"), ]

  rownames(da00) = da00[,1]
  rownames(da01) = da01[,1]
  rownames(da10) = da10[,1]
  rownames(da11) = da11[,1]
  rownames(da20) = da20[,1]
  rownames(da21) = da21[,1]

  damat0 = matrix(0, nrow=length(allgenes), ncol=3)
  damat1 = matrix(0, nrow=length(allgenes), ncol=3)
  rownames(damat0) = allgenes
  rownames(damat1) = allgenes
  colnames(damat0) = c("all-gene", "all-cds", "all-prot")
  colnames(damat1) = c("novel-gene", "novel-cds", "novel-prot")
  for(g in allgenes){
    damat0[g,] = c(da00[g,2], da10[g,2],da20[g,2])
    damat1[g,] = c(da01[g,2], da11[g,2],da21[g,2])
  }

  damat0 = as.data.frame(damat0)
  damat1 = as.data.frame(damat1)
}

if(T){
  pdf(pdffile, width=10, height=8)
  par(mfrow=c(3,1), mar=c(6,5,2,2))
  tags = c('gene', 'CDS', 'protein')
  fig_index = c('A', 'B', 'C')
  ymaxs = c(200, 80, 80)
  for(i in 1:3){
    tag =tags[i]
    barplot0 <- barplot(t(damat0[,i]),
                        main = paste0("Unique ",tag," sequence"),
                        xlab = "", names.arg=NULL, axisnames = FALSE,
                        col = yzgray, ylim=c(0, ymaxs[i]),
                        xlim=c(1,67), axes = F, ylab = 'Count', cex.lab=1.8, cex.main=2
                        )
    barplot1 <- barplot(t(damat1[,i]),
                        main = "",
                        xlab = "", names.arg=NULL, axisnames = FALSE,
                        col = yzorange, axes=F, add=T
                        )


    axis(1, labels= F, at= barplot0)
    for(col in c(yzblack, yzbrown, yzblue, yzgray)){
      oI <- genecol == col
      text(x = barplot0[oI],
           y = par("usr")[3] - ymaxs[i]*0.05,
           labels = genelabels[oI],
           xpd = NA, col = col,
           srt = 45,
           adj = 1,
           cex = 1.2)
    }
    text(x= -5, y = ymaxs[i]*1.25, labels = fig_index[i], cex = 2, xpd = NA)
    axis(2, las=1, cex.axis=1.5)
    legend("bottomright", bty='n',cex=1.5,
           c("Class I", "Class II", "Other", "KIR"),
           text.col = c(yzblack, yzbrown, yzgray, yzblue))
    legend("topright", bty='n',cex=1.5,
           c("novel", "IPD"),
           col = c(yzorange, yzgray),
           pch=15
           )
  }

  dev.off()

}

if(F){
# overall table
da1<-read.csv(file="../1.new.allele.list/pub/hprc_per_gene.csv", header=T,sep="\t")
da2<-read.csv(file="../1.new.allele.list/pub/cpc_per_gene.csv", header=T,sep="\t")
da3<-read.csv(file="../1.new.allele.list/pub/asm6_per_gene.csv", header=T,sep="\t")
da4<-read.csv(file="../1.new.allele.list/pub/ref_per_gene.csv", header=T,sep="\t")
da5<-read.csv(file="../1.new.allele.list/pub/grckir_per_gene.csv", header=T,sep="\t")

rownames(da1)<-da1$gene
rownames(da2)<-da2$gene
rownames(da3)<-da3$gene
rownames(da4)<-da4$gene
rownames(da5)<-da5$gene

damat2 = matrix(0, nrow=length(allgenes)+3, ncol=12)
rownames(damat2) = c(allgenes, 'HLA-DRB3', 'HLA-DRB4', 'HLA-DRB5')
colnames(damat2) = c(colnames(da1)[2:7], colnames(damat0), colnames(damat1))
for(gene in c(allgenes, 'HLA-DRB3', 'HLA-DRB4', 'HLA-DRB5')){
  x = as.numeric(da1[gene, 2:7])
  if(gene %in% da2$gene){ x = x + as.numeric(da2[gene, 2:7])}
  if(gene %in% da3$gene){ x = x + as.numeric(da3[gene, 2:7])}
  if(gene %in% da4$gene){ x = x + as.numeric(da4[gene, 2:7])}
  if(gene %in% da5$gene){ x = x + as.numeric(da5[gene, 2:7])}
  for(i in 1:6){
    damat2[gene, i] = damat2[gene, i] + x[i]
  }
}

damat2['HLA-DRB345', ] = colSums(damat2[c('HLA-DRB3', 'HLA-DRB4', 'HLA-DRB5'), ])
damat2 = damat2[!rownames(damat2) %in% c('HLA-DRB3', 'HLA-DRB4', 'HLA-DRB5'), ]


for(gene in allgenes){
for(i in 1:3){damat2[gene, i+6] = as.numeric(damat0[gene, i])}
for(i in 1:3){damat2[gene, i+9] = as.numeric(damat1[gene, i])}
}
damat2[is.na(damat2)] = 0

write.table(damat2, file="alleles.smry.csv", quote=F, sep="\t")

smry_out<-c()
smry_out<-rbind(smry_out, c("hlacoding", as.numeric(colSums(damat2[hlacoding,]))))
smry_out<-rbind(smry_out, c("hlanoncoding", as.numeric(colSums(damat2[hlanoncoding,]))))
smry_out<-rbind(smry_out, c("hlaclassic", as.numeric(colSums(damat2[classic,]))))
smry_out<-rbind(smry_out, c("hlaall", as.numeric(colSums(damat2[hlagenes,]))))
smry_out<-rbind(smry_out, c("kircoding", as.numeric(colSums(damat2[kircoding,]))))
smry_out<-rbind(smry_out, c("kirnoncoding", as.numeric(colSums(damat2[kirnoncoding,]))))
smry_out<-rbind(smry_out, c("kirall", as.numeric(colSums(damat2[kirgenes,]))))
smry_out<-rbind(smry_out, c("all", as.numeric(colSums(damat2))))

colnames(smry_out)<-c("category", colnames(damat2))

write.table(smry_out, file="alleles.category.smry.csv", quote=F, sep="\t", row.names=F)
}
