#sam_to_wig_script_Tn_seq
#looped transfer of sam-files to wig files for TRANSIT
#you need a sam file and a seqkit file of the TA sites in the genome
#you also need the info, how long is the sequence that you are working with e.g. the TNseq location cut with mmeI =13bp; all reads shall start with the TA insertion site, so sort them before
#sam file column 2 (direction of read) and 4(location on genome) are needed for the mapped reads

#EGDe.fasta is the fasta file of the host genome, motifs.fasta is a fasta file containing the integration site of the transposon, in our case "TA"

#first of we need a file, containing all the TA sites with their respective position in the genome
seqkit locate -i -d -P -f motifs.fasta EGDe.fasta | column -t > TA_sites_for_wig.txt


for F in *.sam; do 
M=$(basename $F .sam).tmp ;
N=$(basename $F .sam)_revsam.tmp ;
O=$(basename $F .sam)_fwdsam.tmp ;
P=$(basename $F .sam)_revsam_readytocombine.tmp ;
Q=$(basename $F .sam).mbam ;
R=$(basename $F .sam).prewig ;
S=$(basename $F .sam).sorted.prewig ;
T=$(basename $F .sam).sorted.wig ;
U=$(basename $F .sam).hits.txt ;
V=$(basename $F .sam).final.wig ;

awk '{print $2,$4}' $F > $M
sed -i '/^4/ d' $M
#since all fastqs started with TA, we now extract all loci, were the transposon integrated in the codogen strand. These start with "16" in the bam file
grep '^16' $M > $N
grep '^0' $M > $O
#and now add "11" to the location of them, as this is the position of the TA site in the coding strand, where the transposon intgrated
awk -v s=11 '{print $1, $2+s}' $N > $P
cat $O $P > $Q
#now we only need the column 2 and a new column with the no occurences of the insertion
awk '{print $2}' $Q > $R
sort -n $R | uniq -c > $S
awk '{print $2, $1}' $S > $T
# the TA sites hit will now be replaced form the "TA_sites_for_wig.txt" file 
awk '{print $1}' $T > $U
awk '{print $5}' TA_sites_for_wig.txt > TA_sites.txt
cat TA_sites.txt $U | sed 1d | sort -n | uniq -u > TA_sites_uniq_sorted.txt
awk '{print $1, "0"}' TA_sites_uniq_sorted.txt > TA_sites.wig 
cat $T TA_sites.wig > test.wig
sort -n test.wig > $V 
#remove all the tmp data and unneeded files
rm $M $N $O $P $Q $R $S $T $U
done