library(vcfR)
library(dplyr)
library(tidyverse)

#########################
# SET WORKING DIRECTORY
#########################
setwd("/work/directory/")

parent_columns <- c("492", "493", "517", "518", "519", 
                    "121586", "121587", "121588")

offspring_columns <- c("1F-517", "1F-519", "1F-87", "1F-88",
                       "1M-517", "1M-519", "1M-87", "1M-88",
                       "2F-517", "2F-519", "2M-517", "2M-519",
                       "2M-87", "2M-88", "3F-517", "3F-519",
                       "3M-517", "3M-519", "3M-87", "3M-88",
                       "4M-517", "4M-519", "4M-87", "4M-88",
                       "5M-519", "5M-87", "5M-88", "6M-519")

# Read the VCF file
vcf <- read.vcfR("RAW_VCF.vcf")
parent_columns_idx <- which(colnames(vcf@gt) %in% parent_columns)
offspring_columns_idx <- which(colnames(vcf@gt) %in% offspring_columns)

# Remove homozygous alternative
tmp <- apply(vcf@gt, 1, function(xx){
  sum(grepl(pattern = '1/1|(1\\|1)', x = xx)) == 0
})

# Filter VCF to find unique variants
vcf_filtered <- vcf[tmp,]

# Find variants with exactly one heterozygous call
tmp <- apply(vcf_filtered@gt, 1, function(xx){
  sum(grepl(pattern = '0/1|(0\\|1)', x = xx)) == 1
})

# Filter VCF to find unique variants
vcf_filtered <- vcf_filtered[tmp,]

# Keep only unique variants that are in the offspring
tmp <- apply(vcf_filtered@gt, 1, function(xx){
  grep(pattern = '0/1|(0\\|1)', x = xx) %in% offspring_columns_idx
})

# Filter VCF to find unique variants that are only in the offspring 
vcf_filtered <- vcf_filtered[tmp,]


