
yzbrown<-"#815733"
yzpurple<-"#7a28a8"
yzpink<-"#e00077"
yzblack<-"#000000"
yzblue<-"#004182"
yzgray<-"#aaaaaa"
yzorange<-"#ff7d00"

args = commandArgs(trailingOnly=TRUE)

inpfile=args[1]
inpfile="output.pairup.csv"
#pngfile=paste0(args[1], 'bar.png')
pdffile=paste0(inpfile, 'bar.pdf')

#da<-read.csv(file="output.pairup.curated.csv", header=F,sep=" ")
#da<-read.csv(file="output.pairup-nocnv.curated.csv", header=F,sep=" ")
#da<-read.csv(file="output.geneseq-only.pairup.csv", header=F,sep=" ")
da<-read.csv(file=inpfile, header=F,sep=" ")


new_field<-function(allele_name)
{
  if(is.na(allele_name)) return(NA)
  x1 = unlist(strsplit(allele_name, '*', fixed=T))
  fields = x1[2]
  if(substr(x1[1], 1, 3) == 'HLA'){
    n = match('new', unlist(strsplit(fields, ':')))
  } else {
    n = match('n', unlist(strsplit(fields, '*')))
    if(!is.na(n)){
      if(n == 4) n = 2
      else if (n == 6) n = 3
    }
  }
  return(n)
}

for(i in 1:dim(da)[1]){
da$V6[i] = new_field(da$V3[i])
}



ids <- unique(da$V1)
genes <- unique(da$V2)

#hlagenes<-c("HLA-HFE","HLA-F","HLA-V","HLA-P","HLA-G","HLA-H","HLA-T","HLA-K","HLA-U","HLA-A","HLA-W", "HLA-Y", "HLA-J","HLA-L","HLA-N","HLA-E","HLA-C","HLA-B","HLA-S","MICA","MICB","HLA-DRA","HLA-DRB345","HLA-DRB1","HLA-DQA1","HLA-DQB1","HLA-DQA2","HLA-DOB","TAP2","TAP1","HLA-DMB","HLA-DMA","HLA-DOA","HLA-DPA1","HLA-DPB1","HLA-DPA2","HLA-DPB2")
hlagenes<-c("HLA-F","HLA-V","HLA-P","HLA-G","HLA-H","HLA-T","HLA-K","HLA-U","HLA-A","HLA-W","HLA-Y","HLA-J","HLA-L","HLA-N","HLA-E","HLA-C","HLA-B","HLA-S","MICA","MICB","HLA-DRA","HLA-DRB345","HLA-DRB1","HLA-DQA1","HLA-DQB1","HLA-DQA2","HLA-DOB","TAP2","TAP1","HLA-DMB","HLA-DMA","HLA-DOA","HLA-DPA1","HLA-DPB1","HLA-DPA2","HLA-DPB2")

classI <-c("HLA-A","HLA-B","HLA-C","HLA-E","HLA-F","HLA-G","HLA-H","HLA-HFE","HLA-J","HLA-K","HLA-L","HLA-N","HLA-P","HLA-S","HLA-T","HLA-U","HLA-V","HLA-W","HLA-Y")

classII <-c("HLA-DMA","HLA-DMB","HLA-DOA","HLA-DOB","HLA-DPA1","HLA-DPA2","HLA-DPB1","HLA-DPB2","HLA-DQA1","HLA-DQA2","HLA-DQB1","HLA-DRA","HLA-DRB1","HLA-DRB2","HLA-DRB3","HLA-DRB4","HLA-DRB5","HLA-DRB6","HLA-DRB7","HLA-DRB8","HLA-DRB9", "HLA-DRB345")

kirgenes<-c("KIR3DL3", "KIR2DS2", "KIR2DL3","KIR2DL2", "KIR2DL5B", "KIR2DP1","KIR2DL1","KIR3DP1","KIR2DL4","KIR3DS1","KIR2DL5A","KIR2DS3", "KIR2DS5","KIR3DL1", "KIR2DS1", "KIR2DS4","KIR3DL2")

#keygenes<-c("HLA-A", "HLA-B", "HLA-C", "HLA-DRB1", "HLA-DQB1", "HLA-DQA1", "HLA-DPA1","HLA-DPB1")
keygenes<-c("HLA-A", "HLA-B", "HLA-C", "HLA-DRB1", "HLA-DQB1", "HLA-DPB1")

allgenes <- c(hlagenes, kirgenes)

genecol = rep(yzgray, length(allgenes))
genecol[allgenes %in% classI] = yzblack
genecol[allgenes %in% classII] = yzbrown
genecol[allgenes %in% kirgenes] = yzblue

pseudogene <- c("HLA-DPA2", "HLA-DPA3","HLA-DPB2","HLA-DRB2","HLA-DRB6","HLA-DRB7","HLA-DRB8","HLA-DRB9","HLA-H","HLA-J","HLA-K","HLA-L","HLA-N","HLA-P","HLA-S","HLA-T","HLA-U","HLA-V","HLA-W","HLA-X","HLA-Y","HLA-Z","KIR2DP1","KIR3DP1")
pseudogene <- pseudogene[pseudogene %in% allgenes]

genelabels = allgenes
for (i in 1:length(genelabels)){
if(genelabels[i] %in% pseudogene) genelabels[i] = paste0("*", genelabels[i])
}


# overall
if(T){
  damat = matrix(NA, nrow=length(allgenes), ncol=7)
  colnames(damat) = c("hprc_all", "t1k_all", "matched", "consistent", 
                      "mismatched", "t1k_mis", "hprc_mis")
  rownames(damat) = allgenes
  for (g in allgenes) {
    c1 <- da$V2 == g
    c2 <- !is.na(da$V3)
    c3 <- !is.na(da$V4)
    c4 <- da$V5 == 'matched'
    c5 <- da$V5 == 'consistent'
    c6 <- da$V5 == 'mismatched'

    damat[g,1] = sum(c1 & c2)
    damat[g,2] = sum(c1 & c3)
    damat[g,3] = sum(c1 & c4)
    damat[g,4] = sum(c1 & c5)
    damat[g,5] = sum(c1 & c6)
    damat[g,6] = damat[g,1] - damat[g,3] - damat[g,4]
    damat[g,7] = damat[g,2] - damat[g,3] - damat[g,4]
  }
}

#print(damat)

if(T){
  #png(pngfile, width=3100, height=2200, res=250)
  pdf(pdffile, width=12, height=8)
  par(mfrow=c(2,1))
  barplot1 <- barplot(t(damat[, c("matched", "consistent", "t1k_mis")]),
                      main = paste0("T1K genotyping sensitivity"),
                      xlab = "", names.arg=NULL, axisnames = FALSE,
                      col = c('darkblue', 'lightblue', 'red2'),
                      xlim=c(0,75), axes = F, ylab = 'Count', cex.main=2,
                      cex.lab=1.5
                      )
  axis(1, labels= F, at= barplot1, cex.axis=1.5)
  for(col in c(yzblack, yzbrown, yzblue, yzgray)){
    oI <- genecol == col
    text(x = barplot1[oI],
         y = par("usr")[3] - 5,
         labels = genelabels[oI],
         xpd = NA, col = col,
         srt = 45,
         adj = 1,
         cex = 0.8)

  }
  axis(2, las=1, cex.axis=1.5)
  legend("bottomright", cex=1.3, bty = "n",
         c("Class I", "Class II", "Other", "KIR"),
         text.col = c(yzblack, yzbrown, yzgray, yzblue))
  legend("topright", cex = 1.3, bty = "n",
         c("matched", "consistent", "T1K FN"),
         fill = c('darkblue', 'lightblue', 'red2'))

  barplot2 <- barplot(t(damat[, c("matched", "consistent", "hprc_mis")]),
                      main = paste0("T1K genotyping precision"),
                      xlab = "", names.arg=NULL, axisnames = FALSE,
                      col = c('darkblue', 'lightblue', 'red2'),
                      xlim=c(0,75), axes = F, ylab = 'Count', cex.main=2, 
                      cex.lab=1.5
                      )
  axis(1, labels= F, at= barplot2)
  for(col in c(yzblack, yzbrown, yzblue, yzgray)){
    oI <- genecol == col
    #axis(1, at = barplot2[oI], labels = genelabels[oI], col.axis = col,
    #     las = 2, tick = F, cex.axis = 0.85, line = -1)
    text(x = barplot2[oI],
         y = par("usr")[3] - 5,
         labels = genelabels[oI],
         xpd = NA, col = col,
         srt = 45,
         adj = 1,
         cex = 0.8)
  }
  axis(2, las=1, cex.axis=1.5)
  legend("topright", cex=1.3, bty = "n",
         c("matched", "consistent", "T1K FP"),
         fill = c('darkblue', 'lightblue', 'red2'))

  dev.off()


  print("Summary:")
  ## overall
  x <- colSums(damat[hlagenes, ])
  sensitivity <- (x['matched'] + x['consistent']) / x['hprc_all']
  precision <- c(x['matched']/x['t1k_all'], 
                 (x['matched'] + x['consistent'])/x['t1k_all'])
  print(paste0("all HLA genes n=", length(hlagenes)))
  print(paste0("HLA overall sensitivity (matched + consistent / hrpc_all) :", 
               round(sensitivity, digit=3)))
  print(paste0("HLA overall precision (lower bound: matched / t1k_all) :",
               round(precision[1], digit=3)))
  print(paste0("HLA overall precision (upper bound :matched + consistent / t1k_all) :", 
               round(precision[2], digit=3)))
  ## classic genes
  x <- colSums(damat[keygenes, ])
  sensitivity <- (x['matched'] + x['consistent']) / x['hprc_all']
  precision <- c(x['matched']/x['t1k_all'],
                 (x['matched'] + x['consistent'])/x['t1k_all'])
  print(paste0("classic HLA genes n=", length(keygenes)))
  print(keygenes)
  print(paste0("HLA overall sensitivity (matched + consistent / hrpc_all) :",
               round(sensitivity, digit=3)))
  print(paste0("HLA overall precision (lower bound: matched / t1k_all) :",
               round(precision[1], digit=3)))
  print(paste0("HLA overall precision (upper bound :matched + consistent / t1k_all) :",
               round(precision[2], digit=3)))

  ## Kir
  x <- colSums(damat[kirgenes, ])
  sensitivity <- (x['matched'] + x['consistent']) / x['hprc_all']
  precision <- c(x['matched']/x['t1k_all'],
                 (x['matched'] + x['consistent'])/x['t1k_all'])
  print(paste0("all KIR genes n=", length(kirgenes)))
  print(paste0("KIR overall sensitivity (matched + consistent / hrpc_all) :",
               round(sensitivity, digit=3)))
  print(paste0("KIR overall precision (lower bound: matched / t1k_all) :",
               round(precision[1], digit=3)))
  print(paste0("KIR overall precision (upper bound :matched + consistent / t1k_all) :",
               round(precision[2], digit=3)))

  ## new allele
  ohla = da$V2 %in% hlagenes
  print("HLA all")
  mtrx = matrix(NA, nrow = 5, ncol = 3)
  rownames(mtrx) = paste0('field-', 0:4)
  colnames(mtrx) = c("matched", "consistent", "mismatched")
  for(f in 0:4){
    if(f == 0) o = is.na(da$V6) & ohla
    else o = !is.na(da$V6) & da$V6 == f & ohla
    x = table(da$V5[o])
    for(y in names(x))mtrx[f+1, y] = x[y]
  }
  mtrx[is.na(mtrx)] = 0
  print(mtrx)

  okir = da$V2 %in% kirgenes
    print("KIR all")
  mtrx = matrix(NA, nrow = 4, ncol = 3)
  rownames(mtrx) = paste0('field-', 0:3)
  colnames(mtrx) = c("matched", "consistent", "mismatched")
  for(f in 0:3){

    if(f == 0) o = is.na(da$V6) & okir
    else o = !is.na(da$V6) & da$V6 == f & okir
    x = table(da$V5[o])
    for(y in names(x))mtrx[f+1, y] = x[y]
  }
  mtrx[is.na(mtrx)] = 0
  print(mtrx)
}
