source ~/PROJECTS/CALCULATION/MAPPING/DNAseq/tetraurelia/prefixes/anlagen.sh

NB_PREF=${#PREFIXES[@]}
MAX_FRAG_LENGTHs=()
for (( N=0; N<${NB_PREF}; N++ )); do
   MAX_FRAG_LENGTHs+=(500)
done

BASE_MAPPING_DIR=/data/PARAMECIUM/MAPPING/tetraurelia/
REF=/data/PARAMECIUM/REFERENCES/tetraurelia/assembly/Mic2/ptetraurelia_mic2.fa
REF_PREF=mic2
THREADS=20
FASTQ_DIR=~/PROJECTS/CALCULATION/MAPPING/DNAseq/tetraurelia/data/
BASE_RESULT_DIR=mapping/


NB_PREF=${#PREFIXES[@]}


for (( N=0; N<${NB_PREF}; N++ )); do

   PREFIX=${PREFIXES[$N]}
   
   FASTQ1=$FASTQ_DIR/${BASES1[$N]}
   FASTQ2=$FASTQ_DIR/${BASES2[$N]}
   if [ ! -f "$FASTQ1" ]; then
   echo "Error $FASTQ1"
   fi
   if [ ! -f "$FASTQ2" ]; then
   echo "Error $FASTQ2"
   fi
   
   MAX_FRAG_LENGTH=${MAX_FRAG_LENGTHs[$N]}
   
   
   BAM_MAC=`find $BASE_MAPPING_DIR/ptetraurelia_mac_51/ -name $PREFIX.BOWTIE.pt_51.pe.sorted.bam`
   BAM_MAC_IES=`find $BASE_MAPPING_DIR/ptetraurelia_mac_51_with_ies/ -name $PREFIX.BOWTIE.pt_51_with_ies.pe.sorted.bam`
   RESULT_DIR=$BASE_RESULT_DIR/$PREFIX/
   mkdir -p $RESULT_DIR
   
   #echo $BAM_MAC
   #echo BAM_MAC_IES
   FASTQ1_OUT_FILE=`basename $FASTQ1`
   FASTQ1_OUT_FILE=$RESULT_DIR/`echo ${FASTQ1_OUT_FILE%.fastq}`.not_well_mapped.fastq
   FASTQ2_OUT_FILE=`basename $FASTQ2`
   FASTQ2_OUT_FILE=$RESULT_DIR/`echo ${FASTQ2_OUT_FILE%.fastq}`.not_well_mapped.fastq

   
   
   
   
   BAM=$RESULT_DIR/$PREFIX.BOWTIE.$REF_PREF.pe.sorted.bam
   if [ ! -f "$BAM" ]; then
   echo $BAM
       if [ ! -f "$FASTQ1_OUT_FILE" ]; then
          echo $FASTQ1_OUT_FILE
          select_not_well_mapped_reads.pl -bam $BAM_MAC -bam $BAM_MAC_IES -fastq $FASTQ1 -fastq $FASTQ2 -outdir $PWD/$RESULT_DIR 
       fi
       
      bowtie2 -X $MAX_FRAG_LENGTH --threads $THREADS --local -x $REF -1 $FASTQ1_OUT_FILE -2 $FASTQ2_OUT_FILE | samtools view -F 4 -uS - | samtools sort -o $BAM - 
      samtools index $BAM
   fi
   

done
#nohup bzip2 results/PTET_*/*fastq&


BASE_MAPPING_DIR=mapping/
BASE_RESULT_DIR=results/


THREADS=20

source ../../headers.bash

NB_PREF=${#PREFIXES[@]}
for (( N=0; N<${NB_PREF}; N++ )); do

    PREFIX=${PREFIXES[$N]}

    RESULT_DIR=$BASE_RESULT_DIR/$PREFIX/
    mkdir -p $RESULT_DIR
     #echo $RESULT_DIR
    #rm -rf $RESULT_DIR/ParTIES/MILORD
    
    BAM=$BASE_MAPPING_DIR/$PREFIX/$PREFIX.BOWTIE.$REF_PREF.pe.sorted.bam
   # ls $BAM
    if [ ! -f "$RESULT_DIR/ParTIES/MILORD/MILORD.gff3" ]; then
     echo $BAM
     samtools view $BAM | awk '$6 ~/S/ { print $3 }' | sort -u > chr.lst


     parties MILORD  -tab -list_of_seq chr.lst -min_seq_length 1000 -threads $THREADS -genome $REF -out_dir $RESULT_DIR/ParTIES -bam $BAM  -max_size 5000 -force
   fi      
    
   
done
        
for GFF in `ls $BASE_RESULT_DIR/*/ParTIES/MILORD/*.gff3`
do
  TAB=`echo ${GFF%.gff3}`.tab
  if [ ! -f "$TAB" ]; then
     echo "Processing $GFF file to $TAB..."
     gff2tab.pl -gff $GFF > $TAB
   fi
done


# see annote_MIC-limited_IES.R


GENOME=/data/PARAMECIUM/REFERENCES/tetraurelia/assembly/Mic2/ptetraurelia_mic2.fa
REF_PREF=mic2

IES=IES_on_Imprecisely_Eliminated_Regions.mic2.gff3
OUT_GENOME=ptetraurelia_mic2.without_IES_on_IER.fa
OUT_IES=`basename $IES | perl -p -e 's/.mic2.gff3//'`_sites.mic2_without_IES_on_IER.gff3
perl ../create_genome_without_ies.pl -genome $GENOME -ies $IES -out_genome $OUT_GENOME -genome_suffix '' -out_genome_suffix _without_IES -out_ies $OUT_IES
perl -pi -e 's/^\n//' $OUT_GENOME
bowtie2-build $OUT_GENOME  $OUT_GENOME
samtools faidx $OUT_GENOME


# see MAPPING



  


THREADS=15


BASE_OUT_DIR=results/
mkdir -p $BASE_OUT_DIR


GENOME_WITH_IES=/data/PARAMECIUM/REFERENCES/tetraurelia/assembly/Mic2/ptetraurelia_mic2.fa
WITH_IES_REF_PREF=mic2
IES_ON_GENOME_WITH_IES=IES_on_Imprecisely_Eliminated_Regions.mic2.gff3
    
GENOME_WITHOUT_IES=ptetraurelia_mic2.without_IES_on_IER.fa
WITHOUT_IES_REF_PREF=mic2_without_IES_on_IER
IES_ON_GENOME_WITHOUT_IES=IES_on_Imprecisely_Eliminated_Regions_sites.mic2_without_IES_on_IER.gff3
    
awk '{ print $1 }' $IES_ON_GENOME_WITHOUT_IES | sort -u > chr.lst



AROUND_REGION=100

NB_PREF=${#PREFIXES[@]}
for (( N=0; N<${NB_PREF}; N++ )); do

    PREFIX=${PREFIXES[$N]}
    echo $PREFIX
   
    # WITHOUT IES
    BAM_WITHOUT_IES=mapping/$PREFIX.BOWTIE.$WITHOUT_IES_REF_PREF.pe.sorted.bam
    if [ ! -f $BAM_WITHOUT_IES ]; then
        echo "$BAM_WITHOUT_IES  does not exist"
    fi
    
    BAM_WITH_IES=`find /data/PARAMECIUM/MAPPING/tetraurelia/ptetraurelia_mic2/DNAseq/ -name $PREFIX.BOWTIE.$WITH_IES_REF_PREF.pe.sorted.bam | head -1`
    if [ ! -f $BAM_WITH_IES ]; then
        echo "$PREFIX.BOWTIE.$WITH_IES_REF_PREF.pe.sorted.bam  does not exist"
    fi
    
    
    
    OUT_DIR=$BASE_OUT_DIR/`basename $IES_ON_GENOME_WITH_IES | perl -p -e 's/.gff3//'`/$PREFIX/
    mkdir -p $OUT_DIR   

    if [ ! -f $OUT_DIR/MIRET/MIRET.tab ]; then
        echo $OUT_DIR/MIRET/MIRET.tab
        
        parties MIRET -tab -list_of_seq chr.lst -trace -threads $THREADS -genome $GENOME_WITHOUT_IES -ies $IES_ON_GENOME_WITHOUT_IES  -germline_genome $GENOME_WITH_IES  -germline_ies $IES_ON_GENOME_WITH_IES -out_dir $OUT_DIR  -bam $BAM_WITHOUT_IES -germline_bam $BAM_WITH_IES 


    fi
    
    
    OUT_DIR=$BASE_OUT_DIR/`basename $IES_ON_GENOME_WITH_IES | perl -p -e 's/.gff3//'`/coverage/
    mkdir -p $OUT_DIR   

    if [ ! -f $OUT_DIR/$PREFIX.coverage.tsv ]; then
        perl ../coverage.pl -bam $BAM_WITH_IES -gff $IES_ON_GENOME_WITH_IES -before $AROUND_REGION -after $AROUND_REGION -min_quality 30 > $OUT_DIR/$PREFIX.coverage.tsv &
    fi    
    
done


# see load_MIRET.R

# see heatmap_coverage.R
