# README: Generic scripts for parsing RepMask outputs

## Representative example (for calliptera)
```sh
# simplify repmask output and change chr based on alias
cat astCal_longread.ref.fa.out | python $MODULE_DIR/parse_repmask_to_simple.py | python $MODULE_DIR/modify_chr_column.py 2 --alias $META_DIR/alias_astCal1.2_chr-ensembl.txt --drop-mito > astCal_longread.ref.fa.out.simple

# make the output bedready
cat astCal_longread.ref.fa.out.simple | python $MODULE_DIR/parse_simple_to_bedready.py > astCal_longread.ref.fa.bedready.txt

# convert to BED12
cat astCal_longread.ref.fa.bedready.txt | awk 'BEGIN { first = 1; last = 12 }{ for (i = first; i < last; i++) { printf("%s\t", $i) } print $last }' > astCal_longread.ref.fa.bed12.bed

# get BED6 file for overall start and end of transposons
cat astCal_longread.ref.fa.bedready.txt | awk 'BEGIN { first = 1; last = 6 }{ for (i = first; i < last; i++) { printf("%s\t", $i) } print $last }' > astCal_longread.ref.fa.bed6.bed

# get BED6 file for fragments
cat astCal_longread.ref.fa.bed12.bed | bed12tobed6 -i > astCal_longread.ref.fa.bed6_frag.bed

# get BEDvis formatted file
cat astCal_longread.ref.fa.bedready.txt | python $MODULE_DIR/parse_bedready_for_viz.py > astCal_longread.ref.fa.bedviz.bed
```

## Example to filter the BEDviz files (using AWK)
```sh
cat astCal_longread.ref.fa.bedviz.bed | awk '{ if (($1 == 1) && ($2 < 100000)) { print } }' | less -S

cat astCal_longread.ref.fa.bedviz.bed | awk '{ if (($1 == 1) && ($2 < 100000)) { print } }' | less -S

cat <(echo snowy) <(echo) <(echo bibby) | less

# filter by transposon name
head -n 1000 astCal_longread.ref.fa.bedviz.bed | awk '$4 ~ /DNA|LTR/' | less -S
head -n 1000 astCal_longread.ref.fa.bedviz.bed | awk '$4 ~ /DNA\/Tc/' | less -S

# exclude simple repeats
head -n 1000 astCal_longread.ref.fa.bedviz.bed | awk '$4 !~ /Simple_repeat/' | less -S
head -n 1000 astCal_longread.ref.fa.bedviz.bed | awk '$4 !~ /Unknown/' | less -S

# get unique entries
head -n 1000 astCal_longread.ref.fa.bedviz.bed | awk '{ print $4 }' | sort -u | uniq | less -S

# filter by Kimura and length
head -n 1000 astCal_longread.ref.fa.bedviz.bed | awk '(($5 < 20) && ($14 > 1000))' | less -S
```

## Misc commands
```sh
# easier viewing
column -s -t astCal_longread.ref.fa.bedviz.bed | less  -S

# create symbolic links
ln -s $REPO_DIR/output_repmask/2021.02.17_longread/repeatmasker_output/astCal1.2.ref.fa.out astCal
```