# Get sequences with reference and alternative alleles of SNPs
python /hpc/home/imk11/src/IMKHeartEnhancerScripts/getSequencesSurroundingSNPs.py --SNPFILENAME /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis.tsv --GENOMEFILENAME  /hpc/group/gersbachlab/imk11/sei-framework/resources/hg38_UCSC.fa --usePeak --BINDALLELESEQFILENAME /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_refAllele.fa --NONBINDALLELEFILENAME /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_altAllele.fa --chromCol 5 --positionCol 1 --alleleCol 2 --onlyAlternateAllele --peakStartCol 6 --peakEndCol 7
cut -f6-8 /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis.tsv > /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_peaks.bed
sort -k1,1 /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_peaks.bed | fastaFromBed -fi /hpc/home/imk11/HeartEnhancerProject/HumanGenome/hg38.fa -bed stdin -fo /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_peaks.fa

# Propagate reference and alternative alleles through CHIR open versus CHIR closed SVM
/hpc/home/imk11/src/lsgkm/bin/gkmpredict /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_refAllele.fa /hpc/home/imk11/HeartEnhancerProject/CHIRAtac/SVMOutputs/CHIROpenVsClosed_lsgkmt2l10k5w.9.model.txt /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_refAllele_lsgkmt2l10k5w.9Predictions.txt
/hpc/home/imk11/src/lsgkm/bin/gkmpredict /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_altAllele.fa /hpc/home/imk11/HeartEnhancerProject/CHIRAtac/SVMOutputs/CHIROpenVsClosed_lsgkmt2l10k5w.9.model.txt /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_altAllele_lsgkmt2l10k5w.9Predictions.txt

# Propagate reference and alternative alleles through SEI model (requires 16G)
seqkit rmdup -s < /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_refAllele.fa > /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_refAllele_noDup.fa
cd /hpc/group/gersbachlab/imk11/sei-framework/
sh /hpc/group/gersbachlab/imk11/sei-framework/1_sequence_prediction.sh /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_refAllele_noDup.fa hg38 /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/SEIOutputs --cuda
seqkit rmdup -s < /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_altAllele.fa > /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_altAllele_noDup.fa
sh /hpc/group/gersbachlab/imk11/sei-framework/1_sequence_prediction.sh /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/UKB_WGS_ATACseq_SingleVariant_Analysis_altAllele_noDup.fa hg38 /hpc/home/imk11/HeartEnhancerProject/HeartDiseaseVariants/SEIOutputsAlt --cuda
