#!/usr/bin/env bash

# This script extracts information about the path taken by each chromosome in each species from the GFA graph.
# Produces 4 files for each species
# .nodes
# .nodes.core.bed
# .nodes.flex.bed
# .pseudoref.paths

GRAPHDIR="$HOME/code/malawi_transposon/cloud/graph_genome/malawi_haplochromines/"
GRAPHNAME="malawi_haplochromines"

SAMPLES=(astCal1.2_ensembl mayZeb2.0_ensembl troMau2.0 aulStu5.0 otoArg1.0 copChr1.0 rhaChi1.0 rhaChi2.0)
SAMPLES_ABRV=(astCal mayZeb troMau aulStu otoArg copChr rhaChi rhaChi2)

date
echo "INFERRING PATHS OF EACH SPECIES..."
cd $GRAPHDIR
mkdir -p pseudoreference/nodes_and_paths/

for i in "${!SAMPLES[@]}"
do
    SAMPLE=${SAMPLES[i]}
    SAMPLE_ABRV=${SAMPLES_ABRV[i]}
    echo "processing..." $SAMPLE
    python3 $HOME/code/malawi_transposon/modules/minigraph/extract_genome_path.py \
       $SAMPLE.variants $SAMPLE.coverage $GRAPHNAME-variants.nodesbybubble $SAMPLE_ABRV \
       --output_dir pseudoreference/nodes_and_paths/
done
date

