##Convert to VCF Style Tab

library(tidyverse)

dvmp <- read.table("MpileupDeepvariant_RepeatRemoved_35bpRemoved_BlacklListGenes_ART_NAT_FINAL.tab") %>% mutate(type = "dvmp")
names(dvmp) <- c("CHROM",
                 "POS",
                 "ID",
                 "QUAL",
                 "SAMPLE",
                 "GT",
                 "group",
                 "unknown1",
                 "unknown2",
                 "unknown3",
                 "type")
dvmp <- dvmp %>% 
  select(-CHROM, -POS,-GT) %>%
  separate(ID, into=c("CHROM", "POS", "REF", "ALT"),remove=F) %>%
  mutate(FILTER="PASS",
         ID=paste(CHROM,POS,REF,ALT, sep="_"),
         FORMAT="GT:DP:AD:GQ:PL:RNC",
         INFO="AF=0.013889;AQ=44") %>% 
  dplyr::select(CHROM,
                POS,
                ID,
                REF,
                ALT,
                QUAL,
                FILTER,
                INFO,
                FORMAT,
                everything()) %>% 
  mutate(Sample = paste(sep=".", type,SAMPLE)) %>% 
  select(-SAMPLE, -unknown1, -unknown2, -unknown3) %>%
  mutate(value = "0/1:0:0:0:0:0") %>%
  pivot_wider(names_from=Sample, values_from=value,
              values_fill = "0/0:0:0:0:0:0") %>% 
  mutate(overall = "0/1:0:0:0:0:0") %>% 
  rowwise() %>%
  mutate(ART = if_else(any(grepl("^0/1",
                                       c_across(matches("-8.$")))),
                             "0/1:0:0:0:0:0", 
                             "0/0:0:0:0:0:0")) %>%
  ungroup() %>% 
  rowwise() %>%
  mutate(Natural = if_else(any(grepl("^0/1",
                                          c_across(matches("-5..$")))),
                                "0/1:0:0:0:0:0", 
                                "0/0:0:0:0:0:0"))%>% 
  select(-group, -type) %>%
  pivot_longer(10:last_col())

homopolymer_remove <- read.csv("../../remove_variants.csv") %>% 
  separate(ID, into=c("CHROM", "POS"), sep=":") 

dvmp %>% 
  distinct() %>%  
  pivot_wider(names_from=name, values_from=value,
              values_fill = "0/0:0:0:0:0:0")  %>% 
  filter(!paste(CHROM, POS) %in% paste(homopolymer_remove$CHROM, homopolymer_remove$POS)) %>%
  write.table("../vcf_style_05_08_dvmp_hp_remove.txt",quote = FALSE, row.names = FALSE, sep = "\t")
