#!/bin/bash

#### manual ####
## -n) $nu_fasta nuclear genome file
## -f) $fname nuclear genome name
## -o) $org_fasta organellar genome file
## -m) $mummerdir path to mummer, default: $HOME/biosoft/mummer4.0.0beta2/bin
## -p) $org_fasta_for_plot organellar genome file for mummerplot
####end####

if [ -z "$1" ]
then
    echo "Help: $0 -n nu_fasta_dir -f genome_name -o organelle_fasta_dir -m mummer_bin_dir"
    exit
fi

while [ -n "$1" ]
do
    case "$1" in
        -n) nu_fasta="$2"
            echo "nu_fasta_dir = $nu_fasta"
            shift;;
        -f) fname="$2"
            echo "genome_name = $fname"
            shift;;
        -o) org_fasta="$2"
            echo "organelle_fasta_dir = $org_fasta"
            shift;;
        -m) mummer_bin_dir="$2"
            echo "mummer_bin_dir = $mummer_bin_dir"
            shift;;
        -p) org_fasta_for_plot="$2"
            echo "organelle_fasta_for_plot = $org_fasta_for_plot"
            shift;;
        --) shift
            break;;
        *)  echo "Warning: $1 is not an option";;
    esac
    shift
done

if [ -z $org_fasta ]
then
    echo "No organelle fasta file!"
    exit
fi

if [ -z $nu_fasta ]
then
    echo "No nuclear fasta file!"
    exit
fi

#mummer4
if [ -z $mummer_bin_dir ]
then
    mummer_bin_dir=$HOME/biosoft/mummer4.0.0beta2/bin
    echo "default mummer_bin_dir = $mummer_bin_dir"
    export PATH=$mummer_bin_dir:$PATH
fi

if [ -z $org_fasta_for_plot ]
then
    org_fasta_for_plot=$org_fasta
fi

# mkdir

mkdir -p 01.1.maxdelta
mkdir -p 01.2.maxcoords
mkdir -p 02.bed
mkdir -p 03.1.discBED
mkdir -p 03.2.merge5000
mkdir -p 03.3.merge5000fa
mkdir -p 03.4.cluster5000
mkdir -p 03.5.cluster5000fa
mkdir -p 04.1.1kbed
mkdir -p 04.2.1kfa
mkdir -p 04.3.1kmaxdelta
mkdir -p 04.4.1kmaxcoords
mkdir -p 04.5.1kmaxplot

#nucmer
delta=${fname##*/}.max.delta
coords=$delta.coords

echo $delta
echo $coords


nucmer -c 50 --maxmatch -p 01.1.maxdelta/$fname.max $nu_fasta $org_fasta
show-coords -rclT 01.1.maxdelta/$delta > 01.2.maxcoords/$delta.coords
sed '1,4d' 01.2.maxcoords/$coords | perl scripts/coord2bed.pl  | uniq > 02.bed/$fname.bed

#bedtools merge
sort -k1,1 -k2,2n 02.bed/$fname.bed | bedtools merge -i - > 03.1.discBED/$fname.bed

#bedtools merge 5000bp 
bedtools merge -d 5000 -i 03.1.discBED/$fname.bed > 03.2.merge5000/$fname.bed
bedtools getfasta -fi $nu_fasta -bed 03.2.merge5000/$fname.bed -fo 03.3.merge5000fa/$fname.fa

#使用bedtools cluster by 5000bp
bedtools cluster -d 5000 -i 03.1.discBED/$fname.bed | perl scripts/rename_cluster.pl > 03.4.cluster5000/$fname.bed
bedtools getfasta -fi $nu_fasta -bed 03.4.cluster5000/$fname.bed -name | perl scripts/merge_cluster_fasta.pl > 03.5.cluster5000fa/$fname.fa

#get 1kb bed and fasta
perl scripts/get1kbed.pl 03.2.merge5000/$fname.bed > 04.1.1kbed/$fname.bed
bedtools getfasta -fi $nu_fasta -bed 04.1.1kbed/$fname.bed -fo 04.2.1kfa/$fname.fa

#mummer for 1kb fasta
nucmer -c 50 --maxmatch -p 04.3.1kmaxdelta/$fname $org_fasta_for_plot 04.2.1kfa/$fname.fa
show-coords -rclT 04.3.1kmaxdelta/$fname.delta > 04.4.1kmaxcoords/$fname.coords

#mummerplot
cd 04.5.1kmaxplot
mkdir -p $fname
cd ../

for name in `seqkit seq --name 04.2.1kfa/$fname.fa`
do
	echo $name
	mummerplot -p 04.5.1kmaxplot/$fname/${name/:/_} -q $name 04.3.1kmaxdelta/$fname.delta --png
done 