# script to parse repeat masker outputs into nicer human readable and IGV compatible files
# for the astCal runs where various RepModel parameters were tested

REPO_DIR=$HOME/code/malawi_transposon/
SCRIPT_DIR=$REPO_DIR/script/
MODULE_DIR=$REPO_DIR/modules/
SUBMODULE_DIR="$MODULE_DIR/repeatmasker/"
META_DIR=$REPO_DIR/metadata/

SIZE=880445564
OUT_DIR="$HOME/code/malawi_transposon/storage/cloud/out/repmask_calliptera_reparse_jun23/"

################################################################################

#  calliptera, remake the landscape plot, after removing the artefactual family

SAMPLE="astCal_Repbase_LTR_remake"

echo processing... $SAMPLE

echo "filtering previous bedready"
grep -v "rnd-1_family-37" \
    $HOME/code/malawi_transposon/storage/cloud/out/repmask/calliptera_repbase_jun21/astCal1.2_ensembl.bedready.txt > \
    $OUT_DIR/$SAMPLE.bedready.txt

echo "landscape plots"
python $SUBMODULE_DIR/plot_repmask_landscape.py $OUT_DIR/$SAMPLE.bedready.txt $SIZE $OUT_DIR/fig/

################################################################################

# calliptera, repbase + no LTR module (out/repmask/calliptera_repbase_noLTR_jun21/ originally)

SAMPLE_PATH="$HOME/code/malawi_transposon/storage/cloud/out/repmask/calliptera_repbase_noLTR_jun21/raw_output/astCal_v1.2.fa.out"
SAMPLE="astCal_RepBase_noLTR"

echo processing... $SAMPLE

echo "repmask to simple"
cat $SAMPLE_PATH | \
    python $SUBMODULE_DIR/parse_repmask_to_simple.py | \
    python $SUBMODULE_DIR/parse_repeatfamily_column.py > \
    $OUT_DIR/$SAMPLE.simple

echo "simple to bedready"
cat $OUT_DIR/$SAMPLE.simple | python $SUBMODULE_DIR/parse_simple_to_bedready.py > $OUT_DIR/$SAMPLE.bedready.txt

echo "bedready to viz"
cat $OUT_DIR/$SAMPLE.bedready.txt | python $SUBMODULE_DIR/parse_bedready_for_viz.py > $OUT_DIR/$SAMPLE.bedviz.bed

echo "filter viz"
cat $OUT_DIR/$SAMPLE.bedviz.bed | egrep -v "Simple_repeat|Low_complexity" | awk '$5<=11' > $OUT_DIR/"$SAMPLE".bedviz_filtered.bed

echo "landscape plots"
python $SUBMODULE_DIR/plot_repmask_landscape.py $OUT_DIR/$SAMPLE.bedready.txt $SIZE $OUT_DIR/fig/

echo

################################################################################

#  calliptera, no repbase + LTR module (astCal_alone in repmask_2021apr)

SAMPLE_PATH="$HOME/code/malawi_transposon/storage/cloud/repeatmodeler_old/2021.02.26_calliptera/repeatmasker_output/astCal1.2.ref.fa.out"
SAMPLE="astCal_noRepBase_LTR"

echo processing... $SAMPLE

echo "repmask to simple"
cat $SAMPLE_PATH | \
    python $SUBMODULE_DIR/parse_repmask_to_simple.py | \
    python $MODULE_DIR/misc/modify_chr_column.py --alias $META_DIR/alias_astCal1.2_chr-ensembl.txt --drop-mito | \
    python $SUBMODULE_DIR/parse_repeatfamily_column.py > \
    $OUT_DIR/$SAMPLE.simple

echo "simple to bedready"
cat $OUT_DIR/$SAMPLE.simple | python $SUBMODULE_DIR/parse_simple_to_bedready.py > $OUT_DIR/$SAMPLE.bedready.txt

echo "bedready to viz"
cat $OUT_DIR/$SAMPLE.bedready.txt | python $SUBMODULE_DIR/parse_bedready_for_viz.py > $OUT_DIR/$SAMPLE.bedviz.bed

echo "filter viz"
cat $OUT_DIR/$SAMPLE.bedviz.bed | egrep -v "Simple_repeat|Low_complexity" | awk '$5<=11' > $OUT_DIR/"$SAMPLE".bedviz_filtered.bed

echo "landscape plots"
python $SUBMODULE_DIR/plot_repmask_landscape.py $OUT_DIR/$SAMPLE.bedready.txt $SIZE $OUT_DIR/fig/

echo

################################################################################

#  calliptera, no repbase + no LTR module (astCal_noLTR in repmask_2021apr)

SAMPLE_PATH="$HOME/code/malawi_transposon/storage/cloud/repeatmodeler_old/2021.03.05_noltr/repeatmasker_output/astCal1.2.ref.fa.out"
SAMPLE="astCal_noRepBase_noLTR"

echo processing... $SAMPLE

echo "repmask to simple"
cat $SAMPLE_PATH | \
    python $SUBMODULE_DIR/parse_repmask_to_simple.py | \
    python $MODULE_DIR/misc/modify_chr_column.py --alias $META_DIR/alias_astCal1.2_chr-ensembl.txt --drop-mito | \
    python $SUBMODULE_DIR/parse_repeatfamily_column.py > \
    $OUT_DIR/$SAMPLE.simple

echo "simple to bedready"
cat $OUT_DIR/$SAMPLE.simple | python $SUBMODULE_DIR/parse_simple_to_bedready.py > $OUT_DIR/$SAMPLE.bedready.txt

echo "bedready to viz"
cat $OUT_DIR/$SAMPLE.bedready.txt | python $SUBMODULE_DIR/parse_bedready_for_viz.py > $OUT_DIR/$SAMPLE.bedviz.bed

echo "filter viz"
cat $OUT_DIR/$SAMPLE.bedviz.bed | egrep -v "Simple_repeat|Low_complexity" | awk '$5<=11' > $OUT_DIR/"$SAMPLE".bedviz_filtered.bed

echo "landscape plots"
python $SUBMODULE_DIR/plot_repmask_landscape.py $OUT_DIR/$SAMPLE.bedready.txt $SIZE $OUT_DIR/fig/

echo

################################################################################

#  calliptera, originally run by Greg (astCal_greg in repmask_2021apr)

SAMPLE_PATH="$HOME/code/malawi_transposon/storage/cloud/repeatmodeler_old/2021_greg_repeatmodeller/astcal_repeatmodeller.out"
SAMPLE="astCal_greg"

echo processing... $SAMPLE

echo "repmask to simple"
cat $SAMPLE_PATH | \
    python $SUBMODULE_DIR/parse_repmask_to_simple.py | \
    python $MODULE_DIR/misc/modify_chr_column.py --alias $META_DIR/alias_archive/alias_astCal1.2_genbank-ensembl.txt --drop-mito | \
    python $SUBMODULE_DIR/parse_repeatfamily_column.py > \
    $OUT_DIR/$SAMPLE.simple

echo "simple to bedready"
cat $OUT_DIR/$SAMPLE.simple | python $SUBMODULE_DIR/parse_simple_to_bedready.py > $OUT_DIR/$SAMPLE.bedready.txt

echo "bedready to viz"
cat $OUT_DIR/$SAMPLE.bedready.txt | python $SUBMODULE_DIR/parse_bedready_for_viz.py > $OUT_DIR/$SAMPLE.bedviz.bed

echo "filter viz"
cat $OUT_DIR/$SAMPLE.bedviz.bed | egrep -v "Simple_repeat|Low_complexity" | awk '$5<=11' > $OUT_DIR/"$SAMPLE".bedviz_filtered.bed

echo "landscape plots"
python $SUBMODULE_DIR/plot_repmask_landscape.py $OUT_DIR/$SAMPLE.bedready.txt $SIZE $OUT_DIR/fig/

echo

