#!/bin/bash

function gettabledata {
	m=`egrep "^Input Read Pairs" err | cut -d" " -f4`
	n=`egrep "^Input Read Pairs" err | cut -d" " -f7`
	p=`egrep "^Input Read Pairs" err | cut -d"(" -f2 | cut -d")" -f1`
	
	echo "Input reads	$m"
	echo "After trim	$n"
	echo "Percentage	$p"

	irp=`echo - | awk "{print $m / 2}"`
	
	m=`egrep "^Raw read sample size" logfiles/*.stats | cut -d":" -f2 | sed -e "s/ bp//; s/ //g"`
	n=`echo - | awk "{print $m / 1000000}"`
	#echo "Total size (bp)	$m"
	echo "Total size (Mb)	$n"
	
	m=`egrep "successfully extracted" logfiles/*tagdust* | cut -f2`
	p=`egrep "	extracted" logfiles/*tagdust* | cut -f2`
	echo "After tagdust	$m"
	echo "Percentage	$p"
	
	m=`egrep "^Reads accepted" logfiles/select_*_tagdusted_logfile.txt | head -1 | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g"`
	p=`egrep "^Reads accepted" logfiles/select_*_tagdusted_logfile.txt | head -1 | cut -d":" -f2 | cut -d"(" -f2 | cut -d")" -f1 | sed -e "s/ //g"`
	echo "After select	$m"
	echo "Percentage	$p"
	
	m=`egrep "^Pairs written" logfiles/cutadapter_noa_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g; s/,//g"`
	p=`egrep "^Pairs written" logfiles/cutadapter_noa_logfile.txt | cut -d":" -f2 | cut -d"(" -f2 | cut -d")" -f1 | sed -e "s/ //g"`
	#echo "After noastep	$m"
	#echo "Percentage	$p"
	
	
	m=`egrep "^Pairs written" logfiles/cutadapter_nob_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g; s/,//g"`
	p=`egrep "^Pairs written" logfiles/cutadapter_nob_logfile.txt | cut -d":" -f2 | cut -d"(" -f2 | cut -d")" -f1 | sed -e "s/ //g"`
	#echo "After nobstep	$m"
	#echo "Percentage	$p"
	
	
	m=`egrep "^Reads untrimmed" logfiles/adapter_trimming_nob_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g; s/,//g"`
	n=`egrep "^Reads trimmed"   logfiles/adapter_trimming_nob_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g; s/,//g"`
	echo "Ok reads (unt)	$m"
	#echo "Recoverable	$n"
	
	m=`egrep "^Reads accepted" logfiles/select_trm_*_logfile.txt | head -1 | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g"`
	#echo "Recovered	$m"
	
	m=`egrep "^Pairs written" logfiles/cutadapter_nod_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g; s/,//g"`
	echo "Accepted	$m"
	
	m=`egrep "^Number of cleaned read pairs" logfiles/*.stats | cut -d":" -f2 | sed -e "s/ //g"`
	echo "Final count	$m"
	
	m=`egrep "^Cleaned sample size" logfiles/*.stats | cut -d":" -f2 | sed -e "s/ bp//; s/ //g"`
	n=`echo - | awk "{print $m / 1000000}"`
	#echo "Total size (bp)	$m"
	echo "Total size (Mb)	$n"

	m=`egrep "Uniquely mapped reads number" logfiles/*_STAR_Log.final.out | cut -d"|" -f2 | sed -e "s/	//g"`
	echo "Uniquely mapped reads	$m"
	m=`egrep "Number of reads mapped to multiple loci" logfiles/*_STAR_Log.final.out | cut -d"|" -f2 | sed -e "s/	//g"`
	echo "Reads mapping to multiple loci	$m"

	m=`egrep "^Total reads checked" logfiles/scrubbing_*_logfile.txt | cut -d":" -f2 | sed -e "s/ //g"`
	n=`echo - | awk "{print $m / 2}"`
	echo "Read pairs after removal of duplicates	$n"

	m=`egrep "^Accepted read pairs" logfiles/scrubbing_*_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g"`
	o=`echo - | awk "{print $n - $m}"`
	echo "Rejected read pairs	$o"
	m=`egrep "^  Read pairs too soft" logfiles/scrubbing_*_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g"`
	echo "  Read pairs too soft	$m"
	m=`egrep "^  Read pairs with long within-read gaps" logfiles/scrubbing_*_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g"`
	echo "  Read pairs with long gaps	$m"
	m=`egrep "^  Read pairs with long TLEN" logfiles/scrubbing_*_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g"`
	echo "  Read pairs with long TLEN	$m"

	m=`egrep "^Accepted read pairs" logfiles/scrubbing_*_logfile.txt | cut -d":" -f2 | cut -d"(" -f1 | sed -e "s/ //g"`
	echo "Accepted read pairs	$m"

	p=`echo - | awk "{print 100 * $m / ${irp} }"`
	echo "Percentage	$p%"
}	




EXPERIMENT=GSF2268

SAMPLES=`cut -f1 samples.tsv | tail -n +2`

echo ""
echo "Samples are set as:"
echo ""
echo $SAMPLES
echo ""
echo ""


echo "Processing step        /      Sample:"                  > ProcessingAndAlignmentResults.tsv
echo "Initial number of reads" >> ProcessingAndAlignmentResults.tsv
echo "After quality control" >> ProcessingAndAlignmentResults.tsv
echo "Percent retained" >> ProcessingAndAlignmentResults.tsv
#echo "Total size (bp)" >> ProcessingAndAlignmentResults.tsv
echo "Total size (Mb)" >> ProcessingAndAlignmentResults.tsv
echo "After rRNA removal" >> ProcessingAndAlignmentResults.tsv
echo "Percent retained" >> ProcessingAndAlignmentResults.tsv
echo "After removal of artifacts" >> ProcessingAndAlignmentResults.tsv
echo "Percent retained" >> ProcessingAndAlignmentResults.tsv
#echo "After noastep" >> ProcessingAndAlignmentResults.tsv
#echo "Percentage" >> ProcessingAndAlignmentResults.tsv
#echo "After nobstep" >> ProcessingAndAlignmentResults.tsv
#echo "Percentage" >> ProcessingAndAlignmentResults.tsv
echo "Number of accepted reads" >> ProcessingAndAlignmentResults.tsv
#echo "Recoverable" >> ProcessingAndAlignmentResults.tsv
#echo "Number of recovered reads" >> ProcessingAndAlignmentResults.tsv
echo "Number of recovered reads" >> ProcessingAndAlignmentResults.tsv
echo "Final count" >> ProcessingAndAlignmentResults.tsv
#echo "Total size (bp)" >> ProcessingAndAlignmentResults.tsv
echo "Total size (Mb)" >> ProcessingAndAlignmentResults.tsv
echo "Uniquely mapped reads" >> ProcessingAndAlignmentResults.tsv
echo "Reads mapping to multiple loci" >> ProcessingAndAlignmentResults.tsv
echo "Read pairs after removal of duplicates" >> ProcessingAndAlignmentResults.tsv
echo "Rejected read pairs" >> ProcessingAndAlignmentResults.tsv
echo "  Read pairs too soft" >> ProcessingAndAlignmentResults.tsv
echo "  Read pairs with long gaps" >> ProcessingAndAlignmentResults.tsv
echo "  Read pairs with long TLEN" >> ProcessingAndAlignmentResults.tsv
echo "Accepted read pairs" >> ProcessingAndAlignmentResults.tsv
echo "Yield relative to input" >> ProcessingAndAlignmentResults.tsv


i=0
for sample in ${SAMPLES[@]}
do
	sampledir=$(echo "${EXPERIMENT}_s_${sample}")
	samplelabel=${EXPERIMENT}_s_${sample}

	cd ${sampledir}
	echo "Now in directory ${sampledir} ..."
	echo $sample         > ../tmpfa
	gettabledata | cut -f2 >> ../tmpfa
	cd ..
	paste ProcessingAndAlignmentResults.tsv tmpfa > tmptable
	\mv tmptable ProcessingAndAlignmentResults.tsv
	\rm tmpfa
	echo "... done"
	((++i))
done
sed -i -e "s/	$//" ProcessingAndAlignmentResults.tsv
echo ""
echo ""
echo "... all done!"
echo ""
echo "Read processing and alignment results are summarized in the tab-delimited"
echo "file ProcessingAndAlignmentResults.tsv."
echo ""
