### assemble to contigs using PopIns2
for sample in $(cat wpcdyb_67_bampath.txt)
do
prefix=$(dirname $sample)
index=$(basename $prefix)
popins2 assemble -t 10 -s ${index} ${sample} >> ${index}.popins.log
done
###filter contig with length <1000 bp
less ${sample}_assembly_final.contigs.fa | awk 'BEGIN{OFS=FS="\t"}{if($0~/>/) name=$0 ;else seq[name]=seq[name]$0;}END{for(i in seq) {if(length(seq[i])>=1000) print i"\n"seq[i]}}' > ${workdir}/filter_contigs/${sample}.assembly_final_filter.contigs.fa

###filter contaminations using FCS-Genome
function run_docker()
{
    sample=$1
    less ${workdir}/filter_contigs/${sample}.assembly_final_filter.contigs.fa | ${seqkit} seq -w 70 | less > ${workdir}/FCS-Genome/${sample}.fix.fa
    gzip ${workdir}/FCS-Genome/${sample}.fix.fa
    mkdir ${sample}
    docker run -i -t \
        -v /dev/shm/gxdb:/app/db/gxdb \
        -v /sdb2_48T/hanjiazheng/FCS-Genome/gxdb:/db-disk-volume \
        -v /sdb2_48T/hanxiaotao/FCS-Genome:/sample-volume \
        -v /sdb2_48T/hanxiaotao/FCS-Genome/${sample}:/output-volume \
        ncbi/cgr-fcs-genome:v1alpha1-latest \
        python3 \
        /app/bin/run_gx \
        --fasta /sample-volume/${sample}.fix.fa.gz \
        --out-dir /output-volume/ \
        --gx-db /app/db/gxdb/all \
        --gx-db-disk /db-disk-volume/all \
        --tax-id 9913 \
        --debug \
        --split-fasta
    echo ${sample} over >> /sdb2_48T/hanxiaotao/FCS-Genome/run_docker_20220625.log
}
export -f run_docker
parallel -j 20 -N 1 run_docker :::: /sdb2_48T/hanxiaotao/FCS-Genome/200_sample.list
echo all over
###merge contigs to supercontigs using PopIns2
docker run -i -t \
    -v /sdb2_48T/hanxiaotao/popins2_results:/root/test/ \
    centos7_popins2 \
    popins2 merge \
        -r /root/test \
        -di \
        -t 50
### filter contaminations using FCS-Genome for supercontigs
export seqkit="/home/hanjiazheng/miniconda3/bin/seqkit"
less supercontigs.fa | ${seqkit} seq -w 70 | less > supercontigs.fix.fa
gzip supercontigs.fix.fa
mkdir results

docker run -i -t \
    -v /dev/shm/gxdb:/app/db/gxdb \
    -v /sdb2_48T/hanjiazheng/FCS-Genome/gxdb:/db-disk-volume \
    -v /sdb2_48T/hanxiaotao/FCS-Genome_filter_supercontigs:/sample-volume \
    -v /sdb2_48T/hanxiaotao/FCS-Genome_filter_supercontigs/results:/output-volume \
    ncbi/cgr-fcs-genome:v1alpha1-latest \
    python3 \
    /app/bin/run_gx \
    --fasta /sample-volume/supercontigs.fix.fa.gz \
    --out-dir /output-volume/ \
    --gx-db /app/db/gxdb/all \
    --gx-db-disk /db-disk-volume/all \
    --tax-id 9913 \
    --debug \
    --split-fasta
### validation using megablast
blastn \
    -query ${filter_super_contigs} \
    -out filter_supercontig_nt.out \
    -db /home/hanxiaotao/data/NCBI_nt/nt.fa \
    -task megablast \
    -outfmt 6 \
    -evalue 1e-5 \
    -num_descriptions 10 \
    -num_threads 15
echo all over
## 7. contigmap in popins2
docker run -it -v popins2_results:/root/test centos7_popins2 /bin/bash
cd /root/test
#!/bin/bash
# BQSR
start_time=`date +%s`  
tmp_fifofile="/tmp/$$.fifo"
mkfifo $tmp_fifofile  
exec 6<>$tmp_fifofile  
rm $tmp_fifofile 
thread_num=6  
for ((i=0;i<${thread_num};i++));do
    echo
done >&6

for sample in $(cat 898_sample.list) 
do
    read -u6
    {
    echo ${sample} start `date`
    echo ${sample} start `date`>> run_contigmap_20220626.log
    popins2 contigmap -r ARS-UCD.1.2.fa -c supercontigs.final.fa -t 10 ${sample}
    echo ${sample} over `date`
    echo ${sample} over `date` >> run_contigmap_20220626.log
        echo >&6
    } &
done
wait 
stop_time=`date +%s` 
echo "TIME:`expr $stop_time - $start_time`" 
exec 6>&- 
echo "all over" 
## 8.placed contig in PopIns2
# step1
popins2 place-refalign --minReads 5 -r ARS-UCD.1.2.fa -c supercontigs.final.fa
# step2
#!/bin/bash
# BQSR
start_time=`date +%s`  
tmp_fifofile="/tmp/$$.fifo"
mkfifo $tmp_fifofile  
exec 6<>$tmp_fifofile 
rm $tmp_fifofile

thread_num=10 
for ((i=0;i<${thread_num};i++));do
    echo
done >&6
for sample in $(cat 898_sample.list)
do
    read -u6
    {
    popins2 place-splitalign -r ARS-UCD.1.2.fa -c supercontigs.final.fa ${sample}
    echo ${sample} over `date` >> run_placed_20220702.log
        echo >&6
    } &
done
wait 
stop_time=`date +%s` 
echo "TIME:`expr $stop_time - $start_time`" 
exec 6>&- 
echo "part2 over" 

# step3
popins2 place-finish -r ARS-UCD.1.2.fa -i insertions.vcf
echo "all over" 
