source(paste0(Sys.getenv("HOME"), "/code/malawi_transposon/script/R_SCRIPT/modules/frequently_used_items.R"))

import_transposons_into_granges <- function(species, annoType="calliptera") {
  assembly_name <- color_schema_species[color_schema_species$species_abrv == species, 'species']
  
  chrom_sizes <- chrom_sizes_list[[species]]
  chrom_info <- Seqinfo(chrom_sizes[[1]], chrom_sizes[[2]])
  
  dir_te_lookup <- c(dir_te, glue("{dir_te_own}/{assembly_name}"))
  names(dir_te_lookup) <- c("calliptera", "own")
  input_path <- dir_te_lookup[annoType]
  
  filepath_te_species <- glue("{input_path}/{assembly_name}.simple")
  tmp_df <- readr::read_delim(filepath_te_species, delim='\t', col_types='idciiccccccc') %>% 
    mutate(repeat_class_broad = gsub(" ", "", repeat_class_broad)) %>% 
    filter(!repeat_class_broad %in% c('Simple_repeat', 'Low_complexity', 'tRNA', 'snRNA', 'rRNA', 'Satellite')) %>% 
    mutate(repeat_class_broad = if_else(repeat_class_broad == 'RC', 'Helitron', repeat_class_broad)) %>% 
    mutate(repeat_class_broad = if_else(repeat_class_broad == 'SINE?', 'SINE', repeat_class_broad)) %>% 
    arrange(chr, start, end)
  
  # refactor the TE column to have desired order
  tmp_df$repeat_class_broad <- factor(tmp_df$repeat_class_broad, levels = color_schema_te$te_family)
  
  # keep relevant columns
  gr_transposons <- GenomicRanges::GRanges(
    seqnames = tmp_df$chr,
    ranges = IRanges(start = tmp_df$start, end = tmp_df$end),
    strand = tmp_df$complement, name = tmp_df$repeat_family, score = tmp_df$perc_div,
    repeat_color = tmp_df$color, 
    repeat_class = tmp_df$repeat_class_broad, repeat_subclass = tmp_df$repeat_class,
    repmask_id = tmp_df$repmask_id, te_id = tmp_df$id, split = tmp_df$split,
    species = species, seqinfo = chrom_info
  )
  
  return(gr_transposons)
}
