# Load necessary libraries
library(vcfR)
library(dplyr)
library(tidyverse)
library(dplyr)


#####################################
# REMOVE SNPS ON REPEATS
#####################################
snp_distances_filtered<- read.table("snp_distances_filtered.bed")

# Create GRanges from tibble
gr_snps <- GRanges(
  seqnames = snp_distances_filtered$CHROM,
  ranges = IRanges(start = snp_distances_filtered$POS, end = snp_distances_filtered$POS),
  ID = snp_distances_filtered$ID)

# Read BED file of repeats
bed <- read_tsv("mm39_repeats.bed",
                col_names = c("chr", "start", "end"))

# BED to GRanges (BED is 0-based, convert to 1-based)
gr_bed <- GRanges(
  seqnames = bed$chr,
  ranges = IRanges(start = bed$start + 1, end = bed$end)
)

# Find overlaps
hits <- findOverlaps(gr_snps, gr_bed)

# Indices of overlapping SNPs
overlap_idx <- unique(queryHits(hits))

# Remove overlapping SNPs
snp_distances_filtered_cleaned <- snp_distances_filtered[-overlap_idx, ]
