
yzbrown<-"#815733"
yzpurple<-"#7a28a8"
yzpink<-"#e00077"
yzblack<-"#000000"
yzblue<-"#004182"
yzgray<-"#aaaaaa"
yzorange<-"#ff7d00"

args = commandArgs(trailingOnly=TRUE)

inpfile=args[1]
inpfile="output.nocnv.csv"
pdffile=paste0(inpfile, '.bar.pdf')

#da<-read.csv(file="output.pairup.curated.csv", header=F,sep=" ")
#da<-read.csv(file="output.pairup-nocnv.curated.csv", header=F,sep=" ")
#da<-read.csv(file="output.geneseq-only.pairup.csv", header=F,sep=" ")
da<-read.csv(file=inpfile, header=T,sep="\t")


ids <- unique(da$sample)
genes <- unique(da$gene)

#hlagenes<-c("HLA-HFE","HLA-F","HLA-V","HLA-P","HLA-G","HLA-H","HLA-T","HLA-K","HLA-U","HLA-A","HLA-W", "HLA-Y", "HLA-J","HLA-L","HLA-N","HLA-E","HLA-C","HLA-B","HLA-S","MICA","MICB","HLA-DRA","HLA-DRB345","HLA-DRB1","HLA-DQA1","HLA-DQB1","HLA-DQA2","HLA-DOB","TAP2","TAP1","HLA-DMB","HLA-DMA","HLA-DOA","HLA-DPA1","HLA-DPB1","HLA-DPA2","HLA-DPB2")
hlagenes<-c("HLA-F","HLA-V","HLA-P","HLA-G","HLA-H","HLA-T","HLA-K","HLA-U","HLA-A","HLA-W","HLA-Y","HLA-J","HLA-L","HLA-N","HLA-E","HLA-C","HLA-B","HLA-S","MICA","MICB","HLA-DRA","HLA-DRB345","HLA-DRB1","HLA-DQA1","HLA-DQB1","HLA-DQA2","HLA-DOB","TAP2","TAP1","HLA-DMB","HLA-DMA","HLA-DOA","HLA-DPA1","HLA-DPB1","HLA-DPA2","HLA-DPB2")

classI <-c("HLA-A","HLA-B","HLA-C","HLA-E","HLA-F","HLA-G","HLA-H","HLA-HFE","HLA-J","HLA-K","HLA-L","HLA-N","HLA-P","HLA-S","HLA-T","HLA-U","HLA-V","HLA-W","HLA-Y")

classII <-c("HLA-DMA","HLA-DMB","HLA-DOA","HLA-DOB","HLA-DPA1","HLA-DPA2","HLA-DPB1","HLA-DPB2","HLA-DQA1","HLA-DQA2","HLA-DQB1","HLA-DRA","HLA-DRB1","HLA-DRB2","HLA-DRB3","HLA-DRB4","HLA-DRB5","HLA-DRB6","HLA-DRB7","HLA-DRB8","HLA-DRB9", "HLA-DRB345")

kirgenes<-c("KIR3DL3", "KIR2DS2", "KIR2DL3","KIR2DL2", "KIR2DL5B", "KIR2DP1","KIR2DL1","KIR3DP1","KIR2DL4","KIR3DS1","KIR2DL5A","KIR2DS3", "KIR2DS5","KIR3DL1", "KIR2DS1", "KIR2DS4","KIR3DL2")

#keygenes<-c("HLA-A", "HLA-B", "HLA-C", "HLA-DRB1", "HLA-DQB1", "HLA-DQA1", "HLA-DPA1","HLA-DPB1")
keygenes<-c("HLA-A", "HLA-B", "HLA-C", "HLA-DRB1", "HLA-DQB1", "HLA-DPB1")

allgenes <- c(hlagenes, kirgenes)

genecol = rep(yzgray, length(allgenes))
genecol[allgenes %in% classI] = yzblack
genecol[allgenes %in% classII] = yzbrown
genecol[allgenes %in% kirgenes] = yzblue

pseudogene <- c("HLA-DPA2", "HLA-DPA3","HLA-DPB2","HLA-DRB2","HLA-DRB6","HLA-DRB7","HLA-DRB8","HLA-DRB9","HLA-H","HLA-J","HLA-K","HLA-L","HLA-N","HLA-P","HLA-S","HLA-T","HLA-U","HLA-V","HLA-W","HLA-X","HLA-Y","HLA-Z","KIR2DP1","KIR3DP1")
pseudogene <- pseudogene[pseudogene %in% allgenes]

genelabels = allgenes
for (i in 1:length(genelabels)){
if(genelabels[i] %in% pseudogene) genelabels[i] = paste0("*", genelabels[i])
}


# overall
if(T){
  hprc_damat = matrix(NA, nrow=length(allgenes), ncol=3)
  t1k_damat = matrix(NA, nrow=length(allgenes), ncol=3)
  colnames(hprc_damat) = c("match", "consist", "mismatch")
  colnames(t1k_damat) = c("match", "consist", "mismatch")
  rownames(hprc_damat) = allgenes
  rownames(t1k_damat) = allgenes
  for (g in allgenes) {
    o <- da$gene == g
    dax = da[o,]
    hprc_damat[g, 1] = sum(dax$match1, na.rm=T)
    hprc_damat[g, 2] = sum(dax$consist1, na.rm=T)
    hprc_damat[g, 3] = sum(dax$miss1, na.rm=T)
    
    t1k_damat[g, 1] = sum(dax$match2, na.rm=T)
    t1k_damat[g, 2] = sum(dax$consist2, na.rm=T)
    t1k_damat[g, 3] = sum(dax$miss2, na.rm=T)
  }
}

#print(damat)

if(T){
  pdf(pdffile, width=12, height=8)
  par(mfrow=c(2,1))
  barplot1 <- barplot(t(hprc_damat),
                      main = paste0("T1K genotyping sensitivity"),
                      xlab = "", names.arg=NULL, axisnames = FALSE,
                      col = c('darkblue', 'lightblue', 'red2'),
                      xlim=c(0,75), axes = F, ylab = 'Count'
                      )
  axis(1, labels= F, at= barplot1)
  for(col in c(yzblack, yzbrown, yzblue, yzgray)){
    oI <- genecol == col
    #axis(1, at = barplot1[oI], labels = genelabels[oI], col.axis = col,
    #     las = 2, tick = F, cex.axis = 0.85, line = -1)
    text(x = barplot1[oI],
         y = par("usr")[3] - 5,
         labels = genelabels[oI],
         xpd = NA, col = col,
         srt = 45,
         adj = 1,
         cex = 0.8)

  }
  axis(2, las=1)
  legend("bottomright",
         c("Class I", "Class II", "Other", "KIR"),
         text.col = c(yzblack, yzbrown, yzgray, yzblue))
  legend("topright",
         c("matched", "consistent", "t1k missed"),
         fill = c('darkblue', 'lightblue', 'red2'))

  barplot2 <- barplot(t(t1k_damat),
                      main = paste0("T1K genotyping precision"),
                      xlab = "", names.arg=NULL, axisnames = FALSE,
                      col = c('darkblue', 'lightblue', 'red2'),
                      xlim=c(0,75), axes = F, ylab = 'Count'
                      )
  axis(1, labels= F, at= barplot2)
  for(col in c(yzblack, yzbrown, yzblue, yzgray)){
    oI <- genecol == col
    #axis(1, at = barplot2[oI], labels = genelabels[oI], col.axis = col,
    #     las = 2, tick = F, cex.axis = 0.85, line = -1)
    text(x = barplot2[oI],
         y = par("usr")[3] - 5,
         labels = genelabels[oI],
         xpd = NA, col = col,
         srt = 45,
         adj = 1,
         cex = 0.8)
  }
  axis(2, las=1)
  legend("topright",
         c("matched", "consistent", "hprc missed"),
         fill = c('darkblue', 'lightblue', 'red2'))

  dev.off()

}

if(T){
  print("Summary:")
  ## overall
  x <- colSums(hprc_damat[hlagenes, ])
  y <- colSums(t1k_damat[hlagenes, ])
  sensitivity <- (x['match'] + x['consist']) / sum(x)
  precision <- c(y['match']/sum(y), 
                 (y['match'] + y['consist'])/sum(y))
  print(paste0("all HLA genes n=", length(hlagenes)))
  print(paste0("HLA overall sensitivity (matched + consistent / hrpc_all) :", 
               round(sensitivity, digit=3)))
  print(paste0("HLA overall precision (lower bound: matched / t1k_all) :",
               round(precision[1], digit=3)))
  print(paste0("HLA overall precision (upper bound :matched + consistent / t1k_all) :", 
               round(precision[2], digit=3)))
  ## classic genes
  x <- colSums(hprc_damat[keygenes, ])
  y <- colSums(t1k_damat[keygenes, ])
  sensitivity <- (x['match'] + x['consist']) / sum(x)
  precision <- c(y['match']/sum(y), 
                 (y['match'] + y['consist'])/sum(y))


  print(paste0("classic HLA genes n=", length(keygenes)))
  print(keygenes)
  print(paste0("HLA overall sensitivity (matched + consistent / hrpc_all) :",
               round(sensitivity, digit=3)))
  print(paste0("HLA overall precision (lower bound: matched / t1k_all) :",
               round(precision[1], digit=3)))
  print(paste0("HLA overall precision (upper bound :matched + consistent / t1k_all) :",
               round(precision[2], digit=3)))

  ## Kir
  x <- colSums(hprc_damat[kirgenes, ])
  y <- colSums(t1k_damat[kirgenes, ])
  sensitivity <- (x['match'] + x['consist']) / sum(x)
  precision <- c(y['match']/sum(y), 
                 (y['match'] + y['consist'])/sum(y))
  print(paste0("all KIR genes n=", length(kirgenes)))
  print(paste0("KIR overall sensitivity (matched + consistent / hrpc_all) :",
               round(sensitivity, digit=3)))
  print(paste0("KIR overall precision (lower bound: matched / t1k_all) :",
               round(precision[1], digit=3)))
  print(paste0("KIR overall precision (upper bound :matched + consistent / t1k_all) :",
               round(precision[2], digit=3)))
}
