#!/bin/sh

file="
file_name
"
for d in $file;
do
#BWA align reads1
bwa aln -t 8 /home/dell/reference/duck_ref_zju.fa \
                /home/dell/cleandata/$d'_1.fq.gz' >$d'_1.sai'
#BWA align reads2
bwa aln -t 8 /home/dell/reference/duck_ref_zju.fa \
                /home/dell/cleandata/$d'_2.fq.gz' >$d'_2.sai'
#BWA generate sam file
bwa sampe /home/dell/reference/duck_ref_zju.fa \
                $d'_1.sai' \
                $d'_2.sai' \
                /home/dell/cleandata/$d'_1.fq.gz' \
                /home/dell/cleandata/$d'_2.fq.gz' \
                >$d'.sam'
#Convert sam to bam
/home/dell/softtools/samtools-1.3.1/bin/samtools view -bS $d'.sam' > $d'.bam'
#Bam file sort
/home/dell/softtools/samtools-1.3.1/bin/samtools sort -o $d'.sorted.bam' -T $d'.sorted.tmp' -@ 8 -O bam $d'.bam'
#Basic statistic for bam file
/home/dell/softtools/samtools-1.3.1/bin/samtools flagstat $d'.sorted.bam' >$d'.flagstat.txt'
# REMOVE process files
rm $d'_1.sai'
rm $d'_2.sai'
rm $d'.sam'
rm $d'.bam'
# Step 1: Remove unmapped and multihit reads
/home/dell/softtools/samtools-1.3.1/bin/samtools view -h -F 4 -b $d'.sorted.bam' >$d'.mapped.sorted.bam'
/home/dell/softtools/samtools-1.3.1/bin/samtools view -bq 1 $d'.mapped.sorted.bam' > $d'.sorted.uniqe.bam'
/home/dell/softtools/samtools-1.3.1/bin/samtools index $d'.sorted.uniqe.bam'
printf "Step 1: remove unmapped and multihit reads finished at `eval date +%Y%m%d"_"%H:%M:%S`\n" >> $d'.calling.log'
# Step 2: Add bam reads group
java -jar -Xmx100g /home/share/bin/AddOrReplaceReadGroups.jar \
		I=$d'.sorted.uniqe.bam' \
		O=$d'.sorted.uniqe.rg.bam' \
		LB=$d \
		PL=illumina \
		PU=IAS \
		SM=$d \
		VALIDATION_STRINGENCY=SILENT
/home/dell/softtools/samtools-1.3.1/bin/samtools index $d'.sorted.uniqe.rg.bam'
printf "Step 2: add bam reads group finished at `eval date +%Y%m%d"_"%H:%M:%S`\n" >> $d'.calling.log'
# Step 3: Mark Duplicates
java -jar -Xmx4g /home/share/bin/MarkDuplicates.jar \
		INPUT=$d'.sorted.uniqe.rg.bam' \
		OUTPUT=$d'.sorted.uniqe.rg.dedup.bam' \
		METRICS_FILE=$d'.dedup.metrics' \
		VALIDATION_STRINGENCY=SILENT
/home/dell/softtools/samtools-1.3.1/bin/samtools index $d'.sorted.uniqe.rg.dedup.bam'
printf "Step 3: Mark Duplicates finished at `eval date +%Y%m%d"_"%H:%M:%S`\n" >> $d'.calling.log'
##################### Confidence SNP calling #################   
## Step 4: Create INDEL position(realigne target)
java -Xmx100g -jar /home/share/bin/GenomeAnalysisTK-3.5.jar \
     -R /home/dell/reference/duck_ref_zju.fa \
     -T RealignerTargetCreator \
     -o $d'.realn.intervals' \
     -I $d'.sorted.uniqe.rg.dedup.bam' 
printf "Step 4: Create INDEL position(realigne target) finished at `eval date +%Y%m%d"_"%H:%M:%S`\n" >> $d'.calling.log'
## Step 5: INDEL realigner
java -Xmx100g -jar /home/share/bin/GenomeAnalysisTK-3.5.jar \
       -R /home/dell/reference/duck_ref_zju.fa \
       -T IndelRealigner \
       -targetIntervals $d'.realn.intervals' \
       -o $d'.sorted.uniqe.rg.dedup.realn.bam' \
       -I $d'.sorted.uniqe.rg.dedup.bam'
printf "Step 5: INDEL realigner finished at `eval date +%Y%m%d"_"%H:%M:%S`\n" >> $d'.calling.log'
## Step 6: calling
java -Xmx200g -jar /home/share/bin/GenomeAnalysisTK-3.5.jar \
        -R /home/dell/reference/duck_ref_zju.fa \
        -T HaplotypeCaller -nct 8 \
        -I $d'.sorted.uniqe.rg.dedup.realn.bam' \
        -o $d'.gvcf' \
        --genotyping_mode DISCOVERY \
        -stand_emit_conf 30 \
        -stand_call_conf 30 \
        -ERC GVCF \
        -variant_index_type LINEAR \
        -variant_index_parameter 128000
## Step 7: file compression
bgzip $d'.gvcf'
tabix -p vcf $d'.gvcf.gz'
#rm $d'.dedup.metrics'
rm $d'.mapped.sorted.bam'
rm $d'.realn.intervals'
rm $d'.sorted.uniqe.bam'
rm $d'.sorted.uniqe.bam.bai'
rm $d'.sorted.uniqe.rg.bam'
rm $d'.sorted.uniqe.rg.bam.bai'
rm $d'.sorted.uniqe.rg.dedup.bam'
rm $d'.sorted.uniqe.rg.dedup.bam.bai'
done
