# please source the `source_alias.sh` file in the repo root before running

SAMPLES=(astCal1.2_ensembl mayZeb2.0_ensembl troMau2.0 aulStu5.0 otoArg1.0 copChr1.0 rhaChi1.0 rhaChi2.0)

GRAPHDIR="$REPO_DIR/cloud/graph_genome/malawi_haplochromines/"
GRAPHNAME="malawi_haplochromines"

# individual samples
for sample in ${SAMPLES[@]}
do
    echo "processing..." $sample
    cat $GRAPHDIR/$sample.variants | python $MODULE_DIR/minigraph/parse_minigraph_variants6col.py | cut -f7-12 > $GRAPHDIR/$sample.variants6col.bed
done

# combine these into a huge matrices
echo "combining everything into summary"
cd $GRAPHDIR
python $MODULE_DIR/minigraph/combine_variants6col_files.py $GRAPHDIR/$GRAPHNAME-variants.bed $(cat <(for i in ${SAMPLES[@]/%/.variants6col.bed}; do echo -n "$i "; done))

# get the longest and shortest sequence columns into a separate file
cut -f13,14 $GRAPHDIR/$GRAPHNAME-summary.csv > $GRAPHDIR/$GRAPHNAME-summary_sequences.txt
