# script to parse repeat masker outputs into nicer human readable and IGV compatible files
# for Malawi haplochromines

GRAPHDIR="$HOME/code/malawi_transposon/cloud/graph_genome/malawi_haplochromines/"
SAMPLES=(astCal aulStu mayZeb rhaChi troMau copChr otoArg rhaChi2)
SIZES=(880445564 889966833 957485262 901598841 912692380 855116986 871152569 846032830)

SUBMODULE_DIR="$MODULE_DIR/repeatmasker/"
cd $GRAPHDIR/pseudoreference/repeatmasker_astCalLib/
mkdir fig/

for i in "${!SAMPLES[@]}"
do
    # get sample information
    SAMPLE=${SAMPLES[i]}
    SIZE=${SIZES[i]}
    SAMPLE_PATH=path_$SAMPLE.fa.out

    echo processing... $SAMPLE 

    # certain species need to have chromosomes renamed
    echo "repmask to simple"
    cat raw_output/path_$SAMPLE.fa.out | \
    python $SUBMODULE_DIR/parse_repmask_to_simple.py | \
    python $SUBMODULE_DIR/parse_repeatfamily_column.py > path_$SAMPLE.simple

    echo "simple to bedready"
    cat path_$SAMPLE.simple | python $SUBMODULE_DIR/parse_simple_to_bedready.py > path_$SAMPLE.bedready.txt
    
    echo "bedready to viz"
    cat path_$SAMPLE.bedready.txt | python $SUBMODULE_DIR/parse_bedready_for_viz.py > path_$SAMPLE.bedviz.bed

    echo "filter viz"
    cat path_$SAMPLE.bedviz.bed | egrep -v "Simple_repeat|Low_complexity" | awk '$5<=11' > path_"$SAMPLE".bedviz_filtered.bed

    echo "landscape plots"
    python $SUBMODULE_DIR/plot_repmask_landscape.py path_$SAMPLE.bedready.txt $SIZE fig/
    
    echo

done
