# script to parse repeat masker outputs into nicer human readable and IGV compatible files
# for Malawi haplochromines

IN_DIR="$HOME/code/malawi_transposon/cloud/out/repmask/"
SUBMODULE_DIR="$MODULE_DIR/repeatmasker/"

################################################################################

#  Malawi species 

SAMPLES=(astCal1.2 aulStu5.0 mayZeb2.0 rhaChi1.0 troMau2.0 copChr1.0 otoArg1.0 rhaChi2.0)
SIZES=(880445564 889966833 957485262 901598841 912692380 855116986 871152569 846032830)

LIB_DIR_ARRAY=(calliptera_repbase_jun21)

for LIB_DIR in "${LIB_DIR_ARRAY[@]}"
do
    echo processing... $LIB_DIR 

    for i in "${!SAMPLES[@]}"
    do
        # get sample information
        SAMPLE=${SAMPLES[i]}
        SIZE=${SIZES[i]}
        echo processing... $SAMPLE 

        # full path information
        SAMPLE_PATH=$IN_DIR/$LIB_DIR/raw_output/$SAMPLE.ref.fa.out
        OUT_DIR=$IN_DIR/$LIB_DIR
        FIG_DIR=$IN_DIR/$LIB_DIR/fig/
        
        mkdir -p $FIG_DIR

        # certain species need to have chromosomes renamed
        echo "repmask to simple"
        if [ $SAMPLE == 'astCal1.2' ]
        then
            SAMPLE="$SAMPLE"_ensembl
            cat $SAMPLE_PATH | \
                python $SUBMODULE_DIR/parse_repmask_to_simple.py | \
                python $MODULE_DIR/misc/modify_chr_column.py --alias $META_DIR/alias_astCal1.2_chr-ensembl.txt --drop-mito | \
                python $SUBMODULE_DIR/parse_repeatfamily_column.py > \
                $OUT_DIR/$SAMPLE.simple
        elif [ $SAMPLE == 'mayZeb2.0' ]
        then
            SAMPLE="$SAMPLE"_ensembl
            cat $SAMPLE_PATH | \
                python $SUBMODULE_DIR/parse_repmask_to_simple.py | \
                python $MODULE_DIR/misc/modify_chr_column.py --alias $META_DIR/alias_mayZeb2.0_chr-ensembl.txt --drop-mito | \
                python $SUBMODULE_DIR/parse_repeatfamily_column.py > \
                $OUT_DIR/$SAMPLE.simple
        else
            cat $SAMPLE_PATH | \
                python $SUBMODULE_DIR/parse_repmask_to_simple.py | \
                python $MODULE_DIR/misc/modify_chr_column.py --drop-mito | 
                python $SUBMODULE_DIR/parse_repeatfamily_column.py > \
                $OUT_DIR/$SAMPLE.simple
        fi

        echo "simple to bedready"
        cat $OUT_DIR/$SAMPLE.simple | python $SUBMODULE_DIR/parse_simple_to_bedready.py > $OUT_DIR/$SAMPLE.bedready.txt
        
        echo "bedready to viz"
        cat $OUT_DIR/$SAMPLE.bedready.txt | python $SUBMODULE_DIR/parse_bedready_for_viz.py > $OUT_DIR/$SAMPLE.bedviz.bed

        echo "filter viz"
        cat $OUT_DIR/$SAMPLE.bedviz.bed | egrep -v "Simple_repeat|Low_complexity" | awk '$5<=11' > $OUT_DIR/"$SAMPLE".bedviz_filtered.bed

        echo "landscape plots"
        python $SUBMODULE_DIR/plot_repmask_landscape.py $OUT_DIR/$SAMPLE.bedready.txt $SIZE $FIG_DIR
        
        echo

    done
done
    
