# Small RNAs:


####
#### Mapping
####

# my command for mapping all samples to the small RNA library
# consisting of TE sequences, miRNAs, mRNAs, rRNAs, tRNAs
source ~/.zshrc
out="/Volumes/Temp/Robert/eecold/small-RNA/2018-batch-gen62-79/map-piRNA"
ref="/Volumes/Temp/Robert/eecold/refg/piRNA/Dsim-mRNA/mrna-te-mi-misc-tr.nvi"

for i in raw-nucs15-35/*.fastq
do
    n=`basename $i`
    # get rid of file extension
    n=${n%.fastq}

 
novoalign -d $ref -f $i -F STDFQ -o SAM -o FullNW -r RANDOM |samtools view -Sb - |samtools sort -f -m4g - $out/$n.sort.bam

done


####
#### Summary statistics
####


# My command to compute the summary statistic for all samples
# Note: I used 2 mismatches
for i in *sort.bam; do n=${i%.sort.bam}; samtools view $i | python2 /Volumes/Temp2/Robert/te-tools/piRNA/stat-piRNA-overview.py --sam - --min-mq 0 --max-mm 2 --sample-id $n > stat-mm2/${n}.overview.stat ; done


# Finally I computed the statistic publication ready with:
# the script compute-summary is in your te-tools folder now
for i in *.stat; do python compute-summary.py $i; done



####
#### Distribution of piRNAs/siRNAs along chromosome
####


piRNAs
for i in *sort.bam; do n=${i%.sort.bam}; samtools view $i | python /Users/rokofler/dev/te-tools/piRNA/graph-piRNA-distribution-onTE.py --sam - --min-mq 0 --max-mm 2 --sample-id $n > ../02-Pele-distribution/${n}.tedistri ; done



siRNAs
 for i in *sort.bam; do n=${i%.sort.bam}; samtools view $i | python /Users/rokofler/dev/te-tools/piRNA/graph-piRNA-distribution-onTE.py --sam - --min-mq 0 --max-mm 2 --pi-min 20 --pi-max 22 --sample-id $n > ../02-Pele-distribution/siRNAs/${n}.sirna20-22.tedistri ; done
 
 


####
#### Read length distribution
####

 for i in *sort.bam; do n=${i%.sort.bam}; samtools view $i | python /Users/rokofler/dev/te-tools/piRNA/tmp/graph-length-distribution.py --sam - --min-mq 0 --max-mm 2 --sample-id $n > ../03-length-distribution/${n}.lendistr; done


