#!/bin/bash

mkdir new
cd new

# Get annotations
wget https://github.com/mcstitzer/maize_TEs/raw/master/B73.structuralTEv2.disjoined.2018-09-19.gff3.gz
gunzip B73.structuralTEv2.disjoined.2018-09-19.gff3.gz

awk '{print $3 }' B73.structuralTEv2.disjoined.2018-09-19.gff3 | sort | uniq | sed 1d | sed 1d > TE_types.txt 

# Pull TE types
for i in $(cat TE_types.txt)
do
	mkdir $i
done

# Make gff for each type
for i in $(cat TE_types.txt)
do
	grep "$i" B73.structuralTEv2.disjoined.2018-09-19.gff3 > ${i}/${i}.gff3
done

# Get sequences for TEs
for i in $(cat TE_types.txt)
do
	cd $i
	bedtools getfasta -fi /gpool/galentm/structure/Zm/genome.fa -bed ${i}.gff3 -fo ${i}.fa
	cd ../
done

# Make subdirectories for TE types
for i in $(ls | grep -v "fa" | sed 's/\.filtered\.bed//g')
do
	mkdir $i
done

# Add sequences and IDs to separate files in subdirectories
for i in $(cat TE_types.txt)
do
	
	cd $i

	##only sequences
	grep -v ">" ${i}.fa > uq_${i}.seq
	
	##Gene IDs to add to base pair probability files
	cat ${i}.fa | grep ">" | sed 's/>//g' > ${i}_IDs.txt

	cd ../
	
done

# Move beds to subdirectories
for i in $(ls | sed 's/\.filtered\.bed//g')
do
	mv ${i}* $i
done

# Edit template LP script for each TE type
for i in $(ls | grep -v "LinearPartition")
do
	
	cd $i

	sed "s,gene,${i},g" ../LinearPartition_template.sh > LinearPartition_${i}.sh

	cd ../
	
done

# Submit 
for i in $(ls | grep -v "LinearPartition")
do
	
	cd $i

	sbatch LinearPartition_${i}.sh

	cd ../
	
done

# Make unified base pair probability files
for i in $(ls | grep -v "\.")
do
	cd $i
    for f in $(ls | grep "\.bpp")
    do
        length=$(echo $f | awk -F "_" '{print $2}')
        num=$(echo $f | awk -F "_" '{print $1}')
        NID=$(echo $i | sed "s,_,.,g")
        ID=$(echo $f | sed "s,${i},$NID,g" | awk -F "_" '{print $3}' | sed "s,${NID},${i},g")
        #UTRlen=$(grep $ID five_prime_UTR.gff3 | head -n 1 | awk '{print $5 - $4}')
        awk -v myvar=$length -v myvar2=$ID -v myvar3=$i '{print $1, $2, $3, myvar, myvar2, myvar3}' $f >> ../${i}.bpp
    done
    cd ../
done



cd ../

