#1.contig assembly
hifiasm -o LM.asm -t 48 02.ccs.ccs.fasta.gz G-1.ccs.fasta.gz G-2.ccs.fasta.gz M-1.ccs.fasta.gz M-2.ccs.fasta.gz AB856-6/ccs.fasta.gz AB856-7/ccs.fasta.gz AB856-8/ccs.fasta.gz AB856-9/ccs.fasta.gz AB856-10/ccs.fasta.gz 2> LM.asm.log

# get primary contigs in FASTA
awk '/^S/{print ">"$2;print $3}' LM.asm.p_ctg.gfa > LM.asm.p_ctg.fa

#2.Repeat analysis
BuildDatabase -name locustdb -engine ncbi LM.asm.fa
RepeatModeler -database locustdb -pa 32
RepeatMasker -e rmblast -lib locustdb-families.fa -pa 32 LM.asm.fa


#3.Genome annotation

cat all.fasta.transdecoder.genome.gff3 braker_prot.gff3 > gene_predictions.gff3
cp pasa.trinity.pasa_assemblies.gff3 transcript_alignments.gff3

$EVM_HOME/EvmUtils/partition_EVM_inputs.pl --genome ../Lmig_bmk.fa --gene_predictions gene_predictions.gff3 --transcript_alignments transcript_alignments.gff3 --segmentSize 1000000 --overlapSize 200000 --partition_listing partitions_list.out

$EVM_HOME/EvmUtils/write_EVM_commands.pl \
      --genome ../Lmig_bmk.fa --weights `pwd`/weights.txt \
      --gene_predictions gene_predictions.gff3 \
      --transcript_alignments transcript_alignments.gff3 \
      --output_file_name evm.out --partitions partitions_list.out > commands.list

parallel -j 40 < commands.list

$EVM_HOME/EvmUtils/recombine_EVM_partial_outputs.pl --partitions partitions_list.out --output_file_name evm.out

$EVM_HOME/EvmUtils/convert_EVM_outputs_to_GFF3.pl --partitions partitions_list.out --output evm.out  --genome ../Lmig_bmk.fa

find . -regex ".*evm.out.gff3" -exec cat {} \;  > EVM.all.gff3


./interproscan-5.52-86.0/interproscan.sh -appl Pfam,PANTHER -i EVM_locust_pep.fa -cpu 32 -b EVM_locust -goterms -iprlookup -pa

diamond blastp --db ../best/invertebrate_refseq -q EVM.all.pep -o EVM_refseq.xml --evalue 1e-5 --outfmt 5
python2 blast_xml_parse.py -i EVM_refseq.xml -o EVM_all.csv -q EVM.all.pep

#intro analysis
python2 add_introns.py Lmig.gff3 Lmig
awk '$1=="LG1" && $3=="intron"' Lmig_introns.gff3 |awk '{sum+=($5-$4)} END {print "Average = ", sum/NR}'

#4.BUSCO analysis
busco -m geno -i Lmig_new.fa -l insecta_odb10 -o busco_genome -c 20 --offline

#5.SNP calling

for i in $(find . -type f -name "*.sra")
do
fastq-dump --split-3 $i
done

for i in $(ls *_1.fastq)
do
  fastp --thread 16 -l 20 -f 5 -i $i -I ${i%_1.fastq}_2.fastq -o ${i%_1.fastq}_1.fp.fastq.gz -O ${i%_1.fastq}_2.fp.fastq.gz
done

bwa index -a bwtsw Lmig_new.fa
bwa mem -t 16 -R "@RG\tID:SRR6906455\tSM:SRR6906455\tLB:ILLUMINA" Lmig_new.fa /public/home/lixinghua99/lixinghua99/ncbi/WGS/reseq/SRR6906455/SRR6906455_1.fastq.gz /public/home/lixinghua99/lixinghua99/ncbi/WGS/reseq/SRR6906455/SRR6906455_2.fastq.gz | samtools sort - -@ 16 -o SRR6906455.sorted.bam
samtools rmdup SRR6906455.sorted.bam SRR6906455.rmdup.bam
samtools index -@ 16 -c SRR6906455.rmdup.bam
mosdepth -t 3 -n --fast-mode --by 500000 SRR6906455 SRR6906455.rmdup.bam
bcftools mpileup -Ou -f Lmig_new.fa SRR6906455.rmdup.bam | bcftools call -vmO z -o SRR6906455.raw.vcf.gz
#calc SNP density
vcftools --gzvcf SRR6906455.raw.vcf.gz --SNPdensity 100000 --out SRR6906455_SNPdensity

# X-linked conservation analysis
grep HG996560 Rhagonycha_fulva.gff3 | awk '$3=="gene"{print $9".1"}' |sed 's/ID=//g' > Rful_X_genes.txt
seqtk subseq Rhagonycha_fulva.anno.pep.fa Rful_X_genes.txt > Rful_X.pep
./getRBH.pl -q Lmig.faa -t Rful_X.pep
grep RBH Rful_X.pep.rbh|cut -f1,2 > Rful_RBH.txt
cut -f1 Rful_RBH.txt |cut -d"." -f1 > Rful_RBH_id.txt
awk 'NR==FNR{haha[$1]=$2;next}NR>FNR{if($1 in haha){print $0 "\t" haha[$1]} else {print $0 "\t" "None"}}' Lmig_gene_chr.txt Rful_RBH_id.txt |cut -f2|sort|uniq -c

# synteny analysis
MCScanX -s 2 -k 100 -m 50 lm_ap

cut -f2 lm_hix.collinearity |grep -v "#" > lm_hix_genes1.txt
cut -f3 lm_hix.collinearity |grep -v "#" > lm_hix_genes2.txt
for i in `cat lm_hix_genes1.txt`;do grep $i lm_hix.gff |cut -f1,3,4 >> lm_hix_link1.txt;done
for i in `cat lm_hix_genes2.txt`;do grep $i lm_hix.gff |cut -f1,3,4 >> lm_hix_link2.txt;done
paste lm_hix_link1.txt lm_hix_link2.txt > lm_hix_link.txt
awk '{print $0"\t""color=131,178,210"}' lm_hix_link.txt > lm_hix_link_color.txt

circos -conf circos.conf
