# see annote_IES.R


GENOME=/data/PARAMECIUM/REFERENCES/tetraurelia/ptetraurelia_mac_51_with_ies.fa
REF_PREF=pt_51_with_ies

IES=internal_IES.pt_51_with_ies.gff3
OUT_GENOME=ptetraurelia_mac_51_with_ies.without_internal_IES.fa
OUT_IES=`basename $IES | perl -p -e 's/.pt_51_with_ies.gff3//'`_sites.pt_51_with_ies_without_internal_IES.gff3
perl ../create_genome_without_ies.pl -genome $GENOME -ies $IES -out_genome $OUT_GENOME -out_genome_suffix _without_internal_IES -out_ies $OUT_IES
perl -pi -e 's/^\n//' $OUT_GENOME
bowtie2-build $OUT_GENOME  $OUT_GENOME
samtools faidx $OUT_GENOME


IES_ON_MAC_IES=/data/PARAMECIUM/ANALYSIS/tetraurelia/ptetraurelia_mac_51//PGM/SAMPLES/ABK_COSP/ParTIES_DeNovo/Insert/Insert.gff3


perl ../gff_to_genome_without_ies.pl -genome $GENOME -ies $IES -out_genome_suffix _without_internal_IES -gff $IES_ON_MAC_IES > internal_eliminated_sequence_PGM_IES51_pt_51_with_ies_without_putative_internal_IESs.gff3


# see MAPPING





source ../../headers.bash

THREADS=15


BASE_OUT_DIR=ParTIES/
mkdir -p $BASE_OUT_DIR

MAC_IES_woINT_GENOME=ptetraurelia_mac_51_with_ies.without_internal_IES.fa
MAC_IES_woINT_REF_PREF=pt_51_with_ies_without_internal_IES
INT_IES_ON_MAC_IES_woINT=internal_IES_sites.pt_51_with_ies_without_internal_IES.gff3
IES_ON_MAC_IES_woINT=internal_eliminated_sequence_PGM_IES51_pt_51_with_ies_without_putative_internal_IESs.gff3


MAC_IES_GENOME=/data/PARAMECIUM/REFERENCES/tetraurelia/ptetraurelia_mac_51_with_ies.fa
MAC_IES_REF_PREF=pt_51_with_ies
INT_IES_ON_MAC_IES=internal_IES.pt_51_with_ies.gff3


MAC_GENOME=/data/PARAMECIUM/REFERENCES/tetraurelia/ptetraurelia_mac_51.fa
MAC_REF_PREF=pt_51
IES_ON_MAC=/data/PARAMECIUM/ANALYSIS/tetraurelia/ptetraurelia_mac_51//PGM/SAMPLES/ABK_COSP/ParTIES_DeNovo/MICA/MICA.gff3



NB_PREF=${#PREFIXES[@]}
for (( N=0; N<${NB_PREF}; N++ )); do

    PREFIX=${PREFIXES[$N]}
    echo $PREFIX

    # Internal IESs

    GENOME_WITH_IES=$MAC_IES_GENOME
    WITH_IES_REF_PREF=$MAC_IES_REF_PREF
    IES_ON_GENOME_WITH_IES=$INT_IES_ON_MAC_IES
    BAM_WITH_IES=`find /data/PARAMECIUM/MAPPING/tetraurelia/ptetraurelia_mac_51_with_ies/DNAseq/ -name $PREFIX.BOWTIE.$MAC_IES_REF_PREF.pe.sorted.bam | head -1`
    
    GENOME_WITHOUT_IES=$MAC_IES_woINT_GENOME
    WITHOUT_IES_REF_PREF=$MAC_IES_woINT_REF_PREF
    IES_ON_GENOME_WITHOUT_IES=$INT_IES_ON_MAC_IES_woINT
    BAM_WITHOUT_IES=mapping/$PREFIX.BOWTIE.$WITHOUT_IES_REF_PREF.pe.sorted.bam
    
    OUT_DIR=$BASE_OUT_DIR/`basename $IES_ON_GENOME_WITH_IES | perl -p -e 's/.gff3//'`/$PREFIX/
    mkdir -p $OUT_DIR   

    if [ ! -f $OUT_DIR/MIRET/MIRET.tab ]; then
        echo $OUT_DIR/MIRET/MIRET.tab
        parties MIRET -tab -trace -threads $THREADS -genome $GENOME_WITHOUT_IES -ies $IES_ON_GENOME_WITHOUT_IES  -germline_genome $GENOME_WITH_IES  -germline_ies $IES_ON_GENOME_WITH_IES -out_dir $OUT_DIR  -bam $BAM_WITHOUT_IES -germline_bam $BAM_WITH_IES 
    fi
    
    # emcompassing IESs
    

    GENOME_WITH_IES=$MAC_IES_woINT_GENOME
    WITH_IES_REF_PREF=$MAC_IES_woINT_REF_PREF
    IES_ON_GENOME_WITH_IES=$IES_ON_MAC_IES_woINT
    BAM_WITH_IES=mapping/$PREFIX.BOWTIE.$WITHOUT_IES_REF_PREF.pe.sorted.bam
    
    GENOME_WITHOUT_IES=$MAC_GENOME
    WITHOUT_IES_REF_PREF=$MAC_REF_PREF
    IES_ON_GENOME_WITHOUT_IES=$IES_ON_MAC
    BAM_WITHOUT_IES=`find /data/PARAMECIUM/MAPPING/tetraurelia/ptetraurelia_mac_51/DNAseq/ -name $PREFIX.BOWTIE.$MAC_REF_PREF.pe.sorted.bam | head -1`
    
    OUT_DIR=$BASE_OUT_DIR/`basename $IES_ON_GENOME_WITH_IES | perl -p -e 's/.gff3//'`/$PREFIX/
    mkdir -p $OUT_DIR   

    if [ $PREFIX == "KLEB" ]; then
        if [ ! -f $OUT_DIR/MIRET/MIRET.tab ]; then
            echo $OUT_DIR/MIRET/MIRET.tab
            parties MIRET -tab -trace -threads $THREADS -genome $GENOME_WITHOUT_IES -ies $IES_ON_GENOME_WITHOUT_IES  -germline_genome $GENOME_WITH_IES  -germline_ies $IES_ON_GENOME_WITH_IES -out_dir $OUT_DIR  -bam $BAM_WITHOUT_IES -germline_bam $BAM_WITH_IES 
        fi
    else 
        if [ ! -f $OUT_DIR/MIRET/MIRET.tab ]; then
            echo $OUT_DIR/MIRET/MIRET.tab
            parties MIRET -tab -trace -threads $THREADS -control $BASE_OUT_DIR/`basename $IES_ON_GENOME_WITH_IES | perl -p -e 's/.gff3//'`/KLEB/MIRET/MIRET.gff3 -genome $GENOME_WITHOUT_IES -ies $IES_ON_GENOME_WITHOUT_IES  -germline_genome $GENOME_WITH_IES  -germline_ies $IES_ON_GENOME_WITH_IES -out_dir $OUT_DIR  -bam $BAM_WITHOUT_IES -germline_bam $BAM_WITH_IES 
 
         fi    
    
    fi    
done

# load_MIRET.R













