#!/bin/sh
mismatch0=0
mismatch1=1
mismatch2=2
mismatch3=3
plot_window_size=50
extend_window=0
cutoff=10

##################
###  QC plots  ###
##################
#$HOME/CORE/NGS/ngs_single_end_pre_mapping_qc $1.fq
#mv $1.fastq.uq $1.uq
#
#total_read=`grep '^>' $1.uq | cut -d':' -f2 | $HOME/CORE/bin/sum`
#echo "$1	total_read	$total_read" >> summary
#
#######################
###  remove linker  ###
#######################
#$HOME/TOOLS/bin/fastx_clipper -Q33 -a CGTCGTATGCCG -M 9 -n -l 20 -c -i $1.uq > $1.uq.clipped
#$HOME/TOOLS/bin/fastx_clipper -Q33 -a CGTCGTATGCCG -M 9 -n -l 20 -C -i $1.uq > $1.uq.noclipped
#cat $1.uq.noclipped | sed 's/CGTCGTAT$/X/' | sed 's/CGTCGTA$/X/' | sed 's/CGTCGT$/X/' > z0.$1
#
#$HOME/TOOLS/bin/fastx_clipper -Q33 -a CGTCAGATCGGA -M 9 -n -l 20 -c -i $1.uq > $1.uq.clipped
#$HOME/TOOLS/bin/fastx_clipper -Q33 -a CGTCAGATCGGA -M 9 -n -l 20 -C -i $1.uq > $1.uq.noclipped
#cat $1.uq.noclipped | sed 's/CGTCAGAT$/X/' | sed 's/CGTCAGA$/X/' | sed 's/CGTCAG$/X/' > z0.$1
#
#$HOME/TOOLS/bin/fastx_clipper -Q33 -a TCGTATGCCGTC -M 9 -n -l 20 -c -i $1.uq > $1.uq.clipped
#$HOME/TOOLS/bin/fastx_clipper -Q33 -a TCGTATGCCGTC -M 9 -n -l 20 -C -i $1.uq > $1.uq.noclipped
#cat $1.uq.noclipped | sed 's/TCGTATGC$/X/' | sed 's/TCGTATG$/X/' | sed 's/TCGTAT$/X/' > z0.$1
#
#$HOME/CORE/bin/exgrep    -b '>' 'X$' z0.$1 | sed 's/X$//' | sed 's/NN*$//' >> $1.uq.clipped
#$HOME/CORE/bin/exgrep -v -b '>' 'X$' z0.$1 | \
#grep -v '[AN][AN][AN][AN][AN][AN][AN][AN][AN][AN][AN][AN][AN]*' | \
#grep -v '[CN][CN][CN][CN][CN][CN][CN][CN][CN][CN][CN][CN][CN]*' | \
#grep -v '[GN][GN][GN][GN][GN][GN][GN][GN][GN][GN][GN][GN][GN]*' | \
#grep -v '[TN][TN][TN][TN][TN][TN][TN][TN][TN][TN][TN][TN][TN]*' | \
#$HOME/CORE/bin/remove_poly_n > $1.uq.noclipped
#
#grep    '^>' $1.uq.clipped | cut -d':' -f2 > z0.$1
#grep -v '^>' $1.uq.clipped | paste - z0.$1 | sort +0 -1 +1 -2n | $HOME/CORE/bin/group -g 0 -a 1 -c -d '+' | sed 's/+$//' > z1.$1
#cut -f2 z1.$1 | bc -l | paste z1.$1 - z1.$1 | cut -f1,3,4 | sed 's/^/>/' | sed 's/	/:/' | tr '\t' '\n' | \
#grep -v '[AN][AN][AN][AN][AN][AN][AN][AN][AN][AN][AN][AN][AN]*' | \
#grep -v '[CN][CN][CN][CN][CN][CN][CN][CN][CN][CN][CN][CN][CN]*' | \
#grep -v '[GN][GN][GN][GN][GN][GN][GN][GN][GN][GN][GN][GN][GN]*' | \
#grep -v '[TN][TN][TN][TN][TN][TN][TN][TN][TN][TN][TN][TN][TN]*' | \
#$HOME/CORE/bin/remove_poly_n > $1.uq.clipped
#
#sed 's/:[0-9][0-9]*/:1/' $1.uq.clipped > $1-merged.uq.clipped
#sed 's/:[0-9][0-9]*/:1/' $1.uq.noclipped > $1-merged.uq.noclipped
#
#read_clipped=`grep '^>' $1.uq.clipped | cut -d':' -f2 | $HOME/CORE/bin/sum`
#echo "$1	read_clipped	$read_clipped" >> summary
#read_noclipped=`grep '^>' $1.uq.noclipped | cut -d':' -f2 | $HOME/CORE/bin/sum`
#echo "$1	read_noclipped	$read_noclipped" >> summary
#
#read_merged_clipped=`grep '^>' $1-merged.uq.clipped | cut -d':' -f2 | $HOME/CORE/bin/sum`
#echo "$1	read_merged_clipped	$read_merged_clipped" >> summary
#read_merged_noclipped=`grep '^>' $1-merged.uq.noclipped | cut -d':' -f2 | $HOME/CORE/bin/sum`
#echo "$1	read_merged_noclipped	$read_merged_noclipped" >> summary
#exit
#
#######################
###  align to virus ###
#######################
$HOME/TOOLS/bin/bowtie -f -v $mismatch1 -S -k 100000 -m 100000 --strata --best -p 12 $HOME/NCBI_GENOME/virus/fly_virus $1.uq.clipped z0.$1
grep -v '	4	\*	0	0	\*	\*	0	0	' z0.$1 > $1.virus.sam
grep    '	4	\*	0	0	\*	\*	0	0	' z0.$1 | cut -f1 > z1.$1
paste z1.$1 z1.$1 | cut -d':' -f1,2 | sed 's/^/>/' | tr '\t' '\n' > $1.clipped.novirus
$HOME/TOOLS/bin/bowtie -f -v $mismatch2 -S -k 100000 -m 100000 --strata --best -p 12 $HOME/NCBI_GENOME/virus/fly_virus $1.uq.noclipped z0.$1
grep -v '	4	\*	0	0	\*	\*	0	0	' z0.$1 | grep -v '^@' >> $1.virus.sam
grep    '	4	\*	0	0	\*	\*	0	0	' z0.$1 | cut -f1 > z1.$1
paste z1.$1 z1.$1 | cut -d':' -f1,2 | sed 's/^/>/' | tr '\t' '\n' > $1.noclipped.novirus


virus=`grep -v '^@' $1.virus.sam | cut -f1 | uniq | cut -d':' -f2 | $HOME/CORE/bin/sum`
echo "$1	virus	$virus" >> summary
rm z0.$1 z1.$1

############################
##  align to struc/miRNA ###
############################
$HOME/TOOLS/bin/bowtie -f -v $mismatch1 -S -k 100000 -m 100000 --strata --best -p 12 $HOME/MIRBASE/hairpin_$2 $1.clipped.novirus z0.$1
grep -v '	4	\*	0	0	\*	\*	0	0	' z0.$1 > $1.struc_mirna.sam
grep    '	4	\*	0	0	\*	\*	0	0	' z0.$1 | cut -f1 > z1.$1
paste z1.$1 z1.$1 | cut -d':' -f1,2 | sed 's/^/>/' | tr '\t' '\n' > $1.clipped.nostruc
$HOME/TOOLS/bin/bowtie -f -v $mismatch2 -S -k 100000 -m 100000 --strata --best -p 12 $HOME/MIRBASE/hairpin_$2 $1.noclipped.novirus z0.$1
grep -v '	4	\*	0	0	\*	\*	0	0	' z0.$1 | grep -v '^@' >> $1.struc_mirna.sam
grep    '	4	\*	0	0	\*	\*	0	0	' z0.$1 | cut -f1 > z1.$1
paste z1.$1 z1.$1 | cut -d':' -f1,2 | sed 's/^/>/' | tr '\t' '\n' > $1.noclipped.nostruc
rm z0.$1 z1.$1
mirna=`grep -v '^@' $1.struc_mirna.sam | grep    '	14|miRNA:' | cut -f1 | uniq | cut -d':' -f2 | $HOME/CORE/bin/sum`
echo "$1	mirna	$mirna" >> summary

#######################
##  align to genome ###
#######################
$HOME/TOOLS/bin/bowtie -f -v $mismatch1 -S -k 100000 -m 100000 --strata --best -p 12 $HOME/UCSC_GENOME/$2/ucsc_$2_genome $1.clipped.nostruc z0.$1
grep -v '	4	\*	0	0	\*	\*	0	0	' z0.$1 > $1.genome.sam
grep    '	4	\*	0	0	\*	\*	0	0	' z0.$1 | cut -f1 > z1.$1
paste z1.$1 z1.$1 | cut -d':' -f1,2 | sed 's/^/>/' | tr '\t' '\n' > $1.clipped.nogenome
$HOME/TOOLS/bin/bowtie -f -v $mismatch2 -S -k 100000 -m 100000 --strata --best -p 12 $HOME/UCSC_GENOME/$2/ucsc_$2_genome $1.noclipped.nostruc z0.$1
grep -v '	4	\*	0	0	\*	\*	0	0	' z0.$1 | grep -v '^@' >> $1.genome.sam
grep    '	4	\*	0	0	\*	\*	0	0	' z0.$1 | cut -f1 > z1.$1
paste z1.$1 z1.$1 | cut -d':' -f1,2 | sed 's/^/>/' | tr '\t' '\n' > $1.noclipped.nogenome
nogenome=`cut -d':' -f2 z1.$1 | $HOME/CORE/bin/sum`
echo "$1	nogenome	$nogenome" >> summary
rm z0.$1 z1.$1

####################################################
#############   Convert to BED file   ##############
####################################################
$HOME/TOOLS/bin/samtools view -bS $1.genome.sam > z0.$1
$HOME/TOOLS/bin/bamToBed -i z0.$1 > $1.genome.bed
rm z0.$1

#############################
###  remove structure RNA ###
#############################
grep -v '^@' $1.genome.sam | cut -f1 | sort -u > z0.$1
$HOME/TOOLS/bin/intersectBed -a $1.genome.bed -b /nlmusr/gchirn/linux/UCSC_GENOME/$2/$2_structurerna.bed | cut -f4 | sort -u > $1.struc-2
comm -23 z0.$1 $1.struc-2 > z1.$1
paste z1.$1 z1.$1 | cut -d':' -f1,2 | sed 's/^/>/' | tr '\t' '\n' > $1.nono
struc=`cat $1.struc_mirna.sam $1.struc-2 | grep -v '^@' | cut -f1 | sort -u | cut -d':' -f2 | $HOME/CORE/bin/sum`
echo "$1	struc	$struc" >> summary
nono=`cut -d':' -f2 z1.$1 | $HOME/CORE/bin/sum`
echo "$1	nono	$nono" >> summary
rm z0.$1 z1.$1
rm $1.genome.bed

#######################
##  align to genome ###
#######################
$HOME/TOOLS/bin/bowtie -f -v $mismatch2 -S -k 100000 -m 100000 --strata --best -p 12 $HOME/UCSC_GENOME/$2/ucsc_$2_genome $1.nono z0.$1
grep -v '	4	\*	0	0	\*	\*	0	0	' z0.$1 > $1.genome.sam
rm z0.$1
genome_mapped=`grep -v '^@' $1.genome.sam | cut -f1 | uniq | cut -d':' -f2 | $HOME/CORE/bin/sum`
echo "$1	genome_mapped	$genome_mapped" >> summary

########################################
####### Append # of mapping to id ######
########################################
grep '^@' $1.genome.sam > z0.$1
grep -v '^@' $1.genome.sam | grep    ':[0-9][0-9]*	4	\*' | cut -f1 | sed 's/^/0	/' > z1.$1
grep -v '^@' $1.genome.sam | grep -v ':[0-9][0-9]*	4	\*' | cut -f1 | $HOME/CORE/bin/count >> z1.$1
touch z22.$1; rm z22.$1

for W in A C G N T
do
  grep "	$W" z1.$1 > z1.$W.$1
  for X in A C G N T
  do
    grep "	$W$X" z1.$W.$1 > z1.$W$X.$1
    for Y in A C G N T
    do
      grep "	$W$X$Y" z1.$W$X.$1 | sort +1 -2 >> z22.$1
    done
  done
done
mv z22.$1 z1.$1

touch z2.$1; rm z2.$1
for W in A C G N T
do
  grep "^$W" $1.genome.sam > z2.$W.$1
  for X in A C G N T
  do
  grep "^$W$X" z2.$W.$1 > z2.$W$X.$1
    for Y in A C G N T
    do
      grep "^$W$X$Y" z2.$W$X.$1 | sort +0 -1 >> z2.$1
    done
  done
done
join -t '	' -1 2 -2 1 z1.$1 z2.$1 | sed 's/	/:/' > z3.$1
cat z0.$1 z3.$1 > $1.genome-v2.sam
rm z*$1
rm $1.genome.sam

################################################
#####   Keep <50 location reads   ##############
################################################
grep '^@'                         $1.genome-v2.sam >  $1.genome-v2-50.sam
grep ':[0-9][0-9]*:[0-9]	' $1.genome-v2.sam | grep -v '	4	\*	0	0	\*	\*	0	0	' >> $1.genome-v2-50.sam
grep ':[0-9][0-9]*:[1-4][0-9]	' $1.genome-v2.sam | grep -v '	4	\*	0	0	\*	\*	0	0	' >> $1.genome-v2-50.sam

################################################################
#########   Convert to BED file (+:red, -:blue)   ##############
################################################################
grep    '^@' $1.genome-v2.sam > z5.$1
grep -v '^@' $1.genome-v2.sam | grep '[ACTGN][ACTGN]*:[0-9][0-9]*:[0-9][0-9]*	0	' >> z5.$1
grep    '^@' $1.genome-v2.sam > z6.$1
grep -v '^@' $1.genome-v2.sam | grep '[ACTGN][ACTGN]*:[0-9][0-9]*:[0-9][0-9]*	16	' >> z6.$1
$HOME/TOOLS/bin/samtools view -bS z5.$1 > z7.$1
$HOME/TOOLS/bin/bamToBed -i z7.$1 > z8.$1
cut -f2,3 z8.$1 | paste z8.$1 - | sed 's/$/	255,0,0/' > z9.$1
$HOME/TOOLS/bin/samtools view -bS z6.$1 > z7.$1
$HOME/TOOLS/bin/bamToBed -i z7.$1 > z8.$1
cut -f2,3 z8.$1 | paste z8.$1 - | sed 's/$/	0,0,255/' >> z9.$1
echo "track name=\"$1\" description=\"$1\" visibility=2 itemRgb=\"On\"" > $1.genome-v2.bed
sort -T ./ +0 -1 +1 -2n +2 -3n z9.$1 >> $1.genome-v2.bed
rm z*.$1

################################################################
#########   Convert to BED file (+:red, -:blue)   ##############
################################################################
grep    '^@' $1.genome-v2-50.sam > z5.$1
grep -v '^@' $1.genome-v2-50.sam | grep '[ACTGN][ACTGN]*:[0-9][0-9]*:[0-9][0-9]*	0	' >> z5.$1
grep    '^@' $1.genome-v2-50.sam > z6.$1
grep -v '^@' $1.genome-v2-50.sam | grep '[ACTGN][ACTGN]*:[0-9][0-9]*:[0-9][0-9]*	16	' >> z6.$1
$HOME/TOOLS/bin/samtools view -bS z5.$1 > z7.$1
$HOME/TOOLS/bin/bamToBed -i z7.$1 > z8.$1
cut -f2,3 z8.$1 | paste z8.$1 - | sed 's/$/	255,0,0/' > z9.$1
$HOME/TOOLS/bin/samtools view -bS z6.$1 > z7.$1
$HOME/TOOLS/bin/bamToBed -i z7.$1 > z8.$1
cut -f2,3 z8.$1 | paste z8.$1 - | sed 's/$/	0,0,255/' >> z9.$1
echo "track name=\"$1\" description=\"$1\" visibility=2 itemRgb=\"On\"" > $1.genome-v2-50.bed
sort -T ./ +0 -1 +1 -2n +2 -3n z9.$1 >> $1.genome-v2-50.bed
rm z*.$1

##################################
######  make genecentric table  ##
##################################
###nomorized_count=$nono
###$HOME/CORE/bin/ngs_genecentric -r $HOME/UCSC_GENOME/$2/$2\_refseq.bed -e $extend_window -v $1.genome-v2-50-coverage-w$plot_window_size-$cutoff -m $1.genome-v2-50.bed -o genecentric_$1-$plot_window_size-$cutoff.xls -n $nomorized_count -l
###exit
###
###grep    '^Gene	' genecentric_$1-$plot_window_size-$cutoff.xls > z0.$1
###grep -v '^Gene	' genecentric_$1-$plot_window_size-$cutoff.xls | sort +23 -24rn >> z0.$1
###mv z0.$1 genecentric_$1-$plot_window_size-$cutoff.xls
###
#############################
##  align to repeat/virus ###
#############################
$HOME/TOOLS/bin/bowtie -f -v $mismatch1 -S -k 100000 -m 100000 --strata --best -p 12 $HOME/REPBASE/repeat_fly $1.uq.clipped z0.$1
grep -v '	4	\*	0	0	\*	\*	0	0	' z0.$1 > $1.rep_vir.sam
$HOME/TOOLS/bin/bowtie -f -v $mismatch2 -S -k 100000 -m 100000 --strata --best -p 12 $HOME/REPBASE/repeat_fly $1.uq.noclipped z0.$1
grep -v '	4	\*	0	0	\*	\*	0	0	' z0.$1 | grep -v '^@' >> $1.rep_vir.sam

grep -v '^@' $1.rep_vir.sam | cut -f1,3 | sort -u | cut -d':' -f2- | sort +1 -2 +0 -12n | group -g 1 -a 0 -c -d '+' | sed 's/+$//' > z0.$1
cut -f2 z0.$1 | bc -l | paste z0.$1 - | cut -f1,3 | sed 's/^/>/' > rep_vir_count.$1
lt_create_idx rep_vir_count.$1 -q


cat $1.genome-v2.sam $1.rep_vir.sam | grep -v '^@' | cut -f1 | cut -d':' -f1-2 | uniq | sort -u > z0.$1
paste z0.$1 z0.$1 | cut -d':' -f1,2 | sed 's/^/>/' | tr '\t' '\n' | $HOME/CORE/bin/seq_stat | grep -v ': ' > z1.$1

coverage_mapped_to_genome_rep_vir=`cut -d':' -f2- z1.$1 | sed 's/:[0-9][0-9]*//' | sed 's/^/(/' | sed 's/	/*/' | sed 's/$/)/' | $HOME/CORE/bin/sum`
raed_count_mapped_to_genome_rep_vir=`cut -f1 z1.$1 | cut -d':' -f2 | $HOME/CORE/bin/sum`
avg_read_length_mapped_to_genome_rep_vir=`echo "$coverage_mapped_to_genome_rep_vir/$raed_count_mapped_to_genome_rep_vir" | bc -l`
echo "$1	raed_count_mapped_to_genome_rep_vir	$raed_count_mapped_to_genome_rep_vir" >> summary
echo "$1	coverage_mapped_to_genome_rep_vir	$coverage_mapped_to_genome_rep_vir" >> summary
echo "$1	avg_read_length_mapped_to_genome_rep_vir	$avg_read_length_mapped_to_genome_rep_vir" >> summary

#####################################################
##   Noise removal   ################################
#####################################################
nn=`echo $1 | sed 's/-merged//'`
read_clipped=`grep "^$nn	read_clipped	" summary | tail -1 | cut -f3`
read_noclipped=`grep "^$nn	read_noclipped	" summary | tail -1 | cut -f3`
nomorized_count=`echo "$read_clipped + $read_noclipped" | bc -l`

cut -f1 exon_rep_vir_length | cut -d'>' -f2 | ltr_gwc base_count.$3 | cut -f2 | sed 's/^.*GWC:.*/0/' | paste exon_rep_vir_length - > g-l-m.$1
total_clip_count=`grep "$1	raed_count_mapped_to_genome_rep_vir" summary | tail -1 | cut -f3`

total_mrna_count=`cut -f3 g-l-m.$1 | sed 's/^/l(/' | sed 's/$/)/' | bc -l | sed 's/^-.*/0/' | $HOME/CORE/bin/sum`

avg_clip_length=`grep "$1	avg_read_length_mapped_to_genome_rep_vir" summary | tail -1 | cut -f3`

echo "$HOME/CORE/bin/ngs_remove_clip_noise_by_mrna -i g-l-m.$1 -l $avg_clip_length -t $total_clip_count -m $total_mrna_count -r $HOME/UCSC_GENOME/fly/fly_refseq-gene.bed -R $1-noise.gene.bed > z-$1-$3.noise"

$HOME/CORE/bin/ngs_remove_clip_noise_by_mrna -i g-l-m.$1 -l $avg_clip_length -t $total_clip_count -m $total_mrna_count -r $HOME/UCSC_GENOME/fly/fly_refseq-gene.bed -R $1-noise.gene.bed -y $nomorized_count > z-$1-$3.noise

grep -v '^>' z-$1-$3.noise > $1-noise.gene.bed
grep    '^>' z-$1-$3.noise > $1-noise
lt_create_idx $1-noise -q

###########################################
####  Genegate coverage plot files       ##
####    summation count within a window  ##
###########################################
nn=`echo $1 | sed 's/-merged//'`
read_clipped=`grep "^$nn	read_clipped	" summary | tail -1 | cut -f3`
read_noclipped=`grep "^$nn	read_noclipped	" summary | tail -1 | cut -f3`
nomorized_count=`echo "$read_clipped + $read_noclipped" | bc -l`
#########  nomorized_count=`grep '^>' $1.uq | cut -d':' -f2 | $HOME/CORE/bin/sum`
#####cat exon_map.$1 $1.rep_vir.sam | grep -v '^@' | cut -f1 | cut -d':' -f1-2 | uniq | sort -u > z0.$1
#####paste z0.$1 z0.$1 | cut -d':' -f1,2 | sed 's/^/>/' | tr '\t' '\n' | $HOME/CORE/bin/seq_stat | grep -v ': ' > z1.$1
#####nomorized_count=`cut -f1 z1.$1 | cut -d':' -f2 | $HOME/CORE/bin/sum`
###echo "$1	nomorized_count	$nomorized_count" >> summary
########### Lack of memory (>96GB), use run_converage  ###
########### $HOME/CORE/bin/ngs_coverage-nelson -i $1.genome-v2-50.sam -o $1.genome-v2-50-coverage-w$plot_window_size-$cutoff -w $plot_window_size -m 4 -a 2 -r $nomorized_count -c $cutoff
$HOME/CORE/NGS/run_converage $1.genome-v2-50.sam 4 $plot_window_size 2 $1.genome-v2-50-coverage-w$plot_window_size-$cutoff-noise $nomorized_count $cutoff $1-noise.gene.bed $HOME/UCSC_GENOME/fly/fly_refseq-gene.bed

sed 's/\.00*	/	/g' $1.genome-v2-50-coverage-w$plot_window_size-$cutoff-noise | sed 's/\.00*$//' > z0.$1
mv z0.$1 $1.genome-v2-50-coverage-w$plot_window_size-$cutoff-noise
grep -v '^#' $1.genome-v2-50-coverage-w$plot_window_size-$cutoff-noise > w1.$1
cut -f1,10 w1.$1 > z0.$1
step=`head -2 z0.$1 | grep -v stop | cut -f1 | cut -d'-' -f2`

cut -f1,9 w1.$1 > z0.$1
cut -d':' -f1 z0.$1 | grep -v '^id' | uniq > z1.$1
echo "track type=wiggle_0 name='$1-noise-Nrm-(-)' description='Read coverage ($1-noise, Normalized, negative strand)' color=0,0,255" > $1-nor-ngtv-$plot_window_size-$cutoff-noise.wig
for i in `cat z1.$1`
do
  echo "fixedStep chrom=$i start=1 step=$step" >> $1-nor-ngtv-$plot_window_size-$cutoff-noise.wig
  grep "^$i\:" z0.$1 | cut -f2 >> $1-nor-ngtv-$plot_window_size-$cutoff-noise.wig
done

cut -f1,8 w1.$1 > z0.$1
cut -d':' -f1 z0.$1 | grep -v '^id' | uniq > z1.$1
echo "track type=wiggle_0 name='$1-noise-Nrm-(+)' description='Read coverage ($1-noise, Normalized, positive strand)' color=255,0,0" > $1-nor-pstv-$plot_window_size-$cutoff-noise.wig
for i in `cat z1.$1`
do
  echo "fixedStep chrom=$i start=1 step=$step" >> $1-nor-pstv-$plot_window_size-$cutoff-noise.wig
  grep "^$i\:" z0.$1 | cut -f2 >> $1-nor-pstv-$plot_window_size-$cutoff-noise.wig
done

cut -f1,6 w1.$1 > z0.$1
cut -d':' -f1 z0.$1 | grep -v '^id' | uniq > z1.$1
echo "track type=wiggle_0 name='$1-noise-Unq-(-)' description='Read coverage ($1-noise, Unique-mapped, negative strand)' color=0,128,128" > $1-unq-ngtv-$plot_window_size-$cutoff-noise.wig
for i in `cat z1.$1`
do
  echo "fixedStep chrom=$i start=1 step=$step" >> $1-unq-ngtv-$plot_window_size-$cutoff-noise.wig
  grep "^$i\:" z0.$1 | cut -f2 >> $1-unq-ngtv-$plot_window_size-$cutoff-noise.wig
done

cut -f1,5 w1.$1 > z0.$1
cut -d':' -f1 z0.$1 | grep -v '^id' | uniq > z1.$1
echo "track type=wiggle_0 name='$1-noise-Unq-(+)' description='Read coverage ($1-noise, Unique-mapped, positive strand)' color=128,128,0" > $1-unq-pstv-$plot_window_size-$cutoff-noise.wig
for i in `cat z1.$1`
do
  echo "fixedStep chrom=$i start=1 step=$step" >> $1-unq-pstv-$plot_window_size-$cutoff-noise.wig
  grep "^$i\:" z0.$1 | cut -f2 >> $1-unq-pstv-$plot_window_size-$cutoff-noise.wig
done

rm z*.$1
rm w*.$1


##########################################
####  mapped reads length distribution  ##
##########################################
grep -v '^@' $1.genome-v2-50.sam | cut -f1,6 | sort -u | cut -f2 | sed 's/M$//' | sort -n | ~/CORE/bin/count > z0.$1
paste z0.$1 z0.$1 | cut -f2,3 | sed 's/^/>/' > $1.lenth_distribution
lt_create_idx $1.lenth_distribution -q

############################################
####  make genecentric table in easy way  ##
############################################
nn=`echo $1 | sed 's/-merged//'`
read_clipped=`grep "^$nn	read_clipped	" summary | tail -1 | cut -f3`
read_noclipped=`grep "^$nn	read_noclipped	" summary | tail -1 | cut -f3`
nomorized_count=`echo "$read_clipped + $read_noclipped" | bc -l`

echo "$1	nomorized_count	$nomorized_count" >> summary
$HOME/TOOLS/bin/intersectBed -wa -wb -a $1.genome-v2-50.bed -b /nlmusr/gchirn/linux/UCSC_GENOME/$2/$2_refseq-gene.bed | cut -f4,6,13,15 | sort -u > gene_map.$1

grep -E '	\+	.*\+$|	-	.*-$' gene_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | cut -d':' -f2- | sed 's/^/(/' | sed 's/	/)	/' | sed 's#:#/#' | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | sed 's/^/1000000*/' | sed 's#$#XXX#' | sed "s#XXX#/$nomorized_count#" | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > gene_count+rpm.$1
lt_create_idx gene_count+rpm.$1 -q
grep -E '	-	.*\+$|	\+	.*-$' gene_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | cut -d':' -f2- | sed 's/^/(/' | sed 's/	/)	/' | sed 's#:#/#' | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | sed 's/^/1000000*/' | sed 's#$#XXX#' | sed "s#XXX#/$nomorized_count#" | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > gene_count-rpm.$1
lt_create_idx gene_count-rpm.$1 -q
grep -E '	\+	.*\+$|	-	.*-$' gene_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | cut -d':' -f2- | sed 's/^/(/' | sed 's/	/)	/' | sed 's#:#/#' | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > gene_count+n.$1
lt_create_idx gene_count+n.$1 -q
grep -E '	-	.*\+$|	\+	.*-$' gene_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | cut -d':' -f2- | sed 's/^/(/' | sed 's/	/)	/' | sed 's#:#/#' | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > gene_count-n.$1
lt_create_idx gene_count-n.$1 -q
grep -E '	\+	.*\+$|	-	.*-$' gene_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | sed 's/:[0-9][0-9]*	/	/' | cut -d':' -f2- | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > gene_count+.$1
lt_create_idx gene_count+.$1 -q
grep -E '	-	.*\+$|	\+	.*-$' gene_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | sed 's/:[0-9][0-9]*	/	/' | cut -d':' -f2- | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > gene_count-.$1
lt_create_idx gene_count-.$1 -q

$HOME/TOOLS/bin/intersectBed -wa -wb -a $1.genome-v2-50.bed -b /nlmusr/gchirn/linux/UCSC_GENOME/$2/$2_refseq-exon.bed | cut -f4,6,13,15 | sort -u > exon_map.$1
grep -E '	\+	.*\+$|	-	.*-$' exon_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | cut -d':' -f2- | sed 's/^/(/' | sed 's/	/)	/' | sed 's#:#/#' | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | sed 's/^/1000000*/' | sed 's#$#XXX#' | sed "s#XXX#/$nomorized_count#" | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > exon_count+rpm.$1
lt_create_idx exon_count+rpm.$1 -q
grep -E '	-	.*\+$|	\+	.*-$' exon_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | cut -d':' -f2- | sed 's/^/(/' | sed 's/	/)	/' | sed 's#:#/#' | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | sed 's/^/1000000*/' | sed 's#$#XXX#' | sed "s#XXX#/$nomorized_count#" | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > exon_count-rpm.$1
lt_create_idx exon_count-rpm.$1 -q
grep -E '	\+	.*\+$|	-	.*-$' exon_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | cut -d':' -f2- | sed 's/^/(/' | sed 's/	/)	/' | sed 's#:#/#' | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > exon_count+n.$1
lt_create_idx exon_count+n.$1 -q
grep -E '	-	.*\+$|	\+	.*-$' exon_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | cut -d':' -f2- | sed 's/^/(/' | sed 's/	/)	/' | sed 's#:#/#' | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > exon_count-n.$1
lt_create_idx exon_count-n.$1 -q
grep -E '	\+	.*\+$|	-	.*-$' exon_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | sed 's/:[0-9][0-9]*	/	/' | cut -d':' -f2- | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > exon_count+.$1
lt_create_idx exon_count+.$1 -q
grep -E '	-	.*\+$|	\+	.*-$' exon_map.$1 | cut -f1,3 | sort -u +1 -2 +0 -1 | sed 's/:[0-9][0-9]*	/	/' | cut -d':' -f2- | group -g 1 -a 0 -d '+' -c | sed 's/+$//' > z1.$1
cut -f2 z1.$1 | bc -l | paste z1.$1 - | cut -f1,3 | sed 's/^/>/' > exon_count-.$1
lt_create_idx exon_count-.$1 -q
exit

######################
##  get exon count  ##
######################
#exon_read=`grep -E '	-	.*	-$|	\+	.*\+$' exon_map.$1 | cut -f1 | sort -u | cut -d':' -f2 | $HOME/CORE/bin/sum`
#echo "$1	exon_read	$exon_read" >> summary

te_vir=`grep -v '^@' $1.rep_vir.sam | cut -f1 | sort -u | cut -d':' -f2 | $HOME/CORE/bin/sum`
echo "$1	te_virus	$te_vir" >> summary

