#!/usr/bin/perl
#
# spa5.pl
# duff 8/2/2009

# "Simple Paired Alignment (SPA) --- 5"

# Runs Bowtie for READ1 and READ2 against reference databse 
# (typically, a concatenation of Genome and Junction DBs); 
# outputs paired-end alignments, harvests uniquely-aligning single reads from non-aligning paired-ends,
# and provides summarizing output file. 

# ****THIS SCRIPT IS RAM-INTENSIVE (~9 GB for -v 10 -m 10;  ~13.3 GB for -v 100 -m 100)*******


# From bowtie alignment output files for READ1 and READ2
# create hash mapping from READ1ID to list of chromo,start,strand, and
# create hash mapping from READ2ID to list of chromo,start,strand
# (list typically includes alignments to BOTH genome AND jctns).
#
# Then consider each READID
#    -does mate exist?
#    -for each possible pairing over cross-product of chromo,start,strand lists,
#        -same chromo?   
#        -correct strand orientation?
#        -compute distance
#
#        
#
# -Output paired-alignment if unique mapping satisfies 
#  chromo,strand, and max-distance criteria
#  (no min-distance resolution of multiple candidate pairs case).
# 
# -Funnel other cases to another output file.
#
# -Bowtie parameter -B 1 (output reference sequence starts at 1)
# -Expanded list of command-line args,
#  including Bowtie trim parameter values, reference database, overhang parameter, and SPA output directory
#  (whose subdirectories are created automatically). 
# -If paired-end alignment candidates that satisfy chromo/strand/dist criteria are not unique, 
#  then output to a separate output file. For these multi-alignment cases, do min-dist resolution and write 
#  to separate output file.
#  SINGLE-READ HARVESTING:
# -If one end of paired-end alignment has no mate, and aligns uniquely, then output to separate output file
# -If no candidate pairs satisfy the chromo/strand/dist criteria, then if read1 or read2 map uniquely,
#  output to a separate output file.


#CAVEATS
#-make sure that the OVERHANG parameter matches that used by jctn_db_util.pl in generating $REF_DB
#-make sure to use $REF_DB whose headers reflect 1-based starts 

use strict;
use FileHandle;

#----SCRIPT PARAMETERS----------

my $BOWTIE_PATH  = "/path_to_bowtie/bowtie-0.10.1";     #path to bowtie aligner directory

#BOWTIE RUN PARAMETERS:
my $VPAR = 2;             # -v   number of mismatches allowed
my $KPAR =10;             # -k   report up to $KPAR alignments per read
my $MPAR =10;             # -m   do not report alignment if more than $MPAR alignments
my $PPAR = 8;             # -p   number of parallel threads run on separate processors/cores
my $YPAR = "-y";          # -y   "try hard" to find valid alignments
my $BESTPAR = "--best";    # report best quality alignments

#SPA SCRIPT PARAMETERS
my $MAX_DIST = 200000;     # maximum insert length allowed


#----COMMAND LINE PARAMETERS-----

my $READS_DIR    = $ARGV[0];   #pathname to reads directory

my $FLOWCELL = $ARGV[1];       #Flowcell ID

my $LANE   = $ARGV[2];

my $TRIM_5_PRIME = $ARGV[3];   #number of bases to trim from 5' end of read

my $TRIM_3_PRIME = $ARGV[4];   #number of bases to trim from 3' end

my $REF_DB       = $ARGV[5];   #full pathname of bowtie reference database 

my $OVERHANG     = $ARGV[6];   #minimal overhang required for junction hit 

my $SPA_RUN_ROOT = $ARGV[7];   #pathname to directory that is root directory for spa output files and subdirectories


#-----------------------PRELIMINARIES-----------------------------------

my $flowcell_string;
my $line;
my @f;
my @g;
my @h;

my $READ_LENGTH_RAW;    #length of raw read from sequence.txt file
my $READ_LENGTH;        #TRIMMED read length
my $N_READS;             #number of reads in the sequence.txt files
my $N_PAIRED_END_ALIGN;

my $wc_string;          #word-count (wc) string returned brom backtick call
my $new_dir;

my $dum;


#from command-line args, parse to construct pathnames to sequence files and append-string for output etc


$new_dir = $SPA_RUN_ROOT;   #create spa run directory
if (! (-e $new_dir)){                          #(if it doesn't exist already)
  mkdir($new_dir);
}

my $filename = "spa_run_summary_".$FLOWCELL."_lane".$LANE."\.txt";

my $SPA_RUN_SUMMARY = new FileHandle ">$SPA_RUN_ROOT/$filename" or die"can't open $SPA_RUN_ROOT/$filename";


printf("Running SPA5 on flowcell $FLOWCELL lane $LANE\n");
$SPA_RUN_SUMMARY->printf("Running SPA5 on flowcell $FLOWCELL lane $LANE $BESTPAR\n");

$filename= $FLOWCELL."_lane".$LANE."_read1.txt";
open(READ1, "<$READS_DIR/$filename") or die("can't open $READS_DIR/$filename"); #determine READ_LENGTH_RAW
<READ1>;           #eat line 
$line = <READ1>;   #read read sequence line
chomp($line);
close(READ1);

$READ_LENGTH_RAW = length($line);
$READ_LENGTH = $READ_LENGTH_RAW - $TRIM_5_PRIME - $TRIM_3_PRIME;
printf("  Raw read length is $READ_LENGTH_RAW \n");
printf("  Trim parameters are: 5' trim = $TRIM_5_PRIME \n");
printf("                       3' trim = $TRIM_3_PRIME \n");
printf("  Trimmed read-length is $READ_LENGTH \n \n ");
$SPA_RUN_SUMMARY->printf("  Raw read length is $READ_LENGTH_RAW \n");
$SPA_RUN_SUMMARY->printf("  Trim parameters are: 5' trim = $TRIM_5_PRIME \n");
$SPA_RUN_SUMMARY->printf("                       3' trim = $TRIM_3_PRIME \n");
$SPA_RUN_SUMMARY->printf("  Trimmed read-length is $READ_LENGTH \n \n ");

$dum = $READS_DIR."/".$filename;
$wc_string = `grep \"\@HW\" $dum | wc -l`;                 #backtick method for getting system call to write to variable
chomp($wc_string);                                                                   #used here to count number of paired end reads
@f = split /\s+/,$wc_string;
$N_READS = $f[0];
printf("There are $N_READS paired-end READ1 reads in the lane. \n");

$filename= $FLOWCELL."_lane".$LANE."_read2.txt";
$dum = $READS_DIR."/".$filename;
$wc_string = `grep \"\@HW\" $dum | wc -l`;                 #backtick method for getting system call to write to variable
chomp($wc_string);                                                                   #used here to count number of paired end reads
@f = split /\s+/,$wc_string;
$N_READS = $f[0];
printf("There are $N_READS paired-end READ2 reads in the lane. \n");

printf("These reads will be aligned to the reference database: $REF_DB \n");
$SPA_RUN_SUMMARY->printf("There are $N_READS paired-end reads in the lane. \n");
$SPA_RUN_SUMMARY->printf("These reads will be aligned to the reference database: $REF_DB \n ");

printf("Bowtie parameters are -v $VPAR -k $KPAR -m $MPAR -p $PPAR $YPAR \n \n");
$SPA_RUN_SUMMARY->printf("Bowtie parameters are -v $VPAR -k $KPAR -m $MPAR -p $PPAR $YPAR \n \n");

$new_dir = $SPA_RUN_ROOT."/"."BOWTIE_ALIGN";   #create directories for alignments and anomaly files 
if (! (-e $new_dir)){                          #(if they don't exist already)
  mkdir($new_dir);
}
$new_dir = $SPA_RUN_ROOT."/"."ANOMALY";
if (! (-e $new_dir)){
  mkdir($new_dir);
}


#
#------------------------------BOWTIE ALIGNMENT INVOCATIONS--------------------------------  


#read1 bowtie align (NOTE -B 1 PARAMETER -- number the first base of reference 1 instead of zero)

printf("Bowtie read1 to $REF_DB\n");

system("$BOWTIE_PATH/bowtie -q -v $VPAR -k $KPAR -m $MPAR -p $PPAR -B 1 $YPAR $BESTPAR --unfq $SPA_RUN_ROOT/BOWTIE_ALIGN/BOWTIE_MISS/bowtie_miss_".$FLOWCELL."_lane".$LANE."_read1_sequence.txt --solexa-quals $REF_DB $READS_DIR/$FLOWCELL"."_lane".$LANE."_read1\.txt $SPA_RUN_ROOT/BOWTIE_ALIGN/bowtie_hit_$FLOWCELL"."_lane".$LANE."_read1.out");


#read2 bowtie align (NOTE -B 1 PARAMETER -- number the first base of reference 1 instead of zero)

printf("Bowtie read2 to $REF_DB\n");


system("$BOWTIE_PATH/bowtie -q -v $VPAR -k $KPAR -m $MPAR -p $PPAR -B 1 $YPAR $BESTPAR --unfq $SPA_RUN_ROOT/BOWTIE_ALIGN/BOWTIE_MISS/bowtie_miss_".$FLOWCELL."_lane".$LANE."_read2_sequence.txt --solexa-quals $REF_DB $READS_DIR/$FLOWCELL"."_lane".$LANE."_read2\.txt $SPA_RUN_ROOT/BOWTIE_ALIGN/bowtie_hit_$FLOWCELL"."_lane".$LANE."_read2.out");


#-------------------------------PAIRED END ALIGNMENT--------------------------------

#open the bowtie alignment files for read1 and read2
open(READ1, "<$SPA_RUN_ROOT/BOWTIE_ALIGN/bowtie_hit_".$FLOWCELL."_lane".$LANE."_read1.out") or die("can't open $SPA_RUN_ROOT/BOWTIE_ALIGN/bowtie_hit_".$FLOWCELL."_lane".$LANE."_read1.out");
open(READ2, "<$SPA_RUN_ROOT/BOWTIE_ALIGN/bowtie_hit_".$FLOWCELL."_lane".$LANE."_read2.out") or die("can't open $SPA_RUN_ROOT/BOWTIE_ALIGN/bowtie_hit_".$FLOWCELL."_lane".$LANE."_read2.out");


#ANOMALY FILES
#read1's with no valid bowtie alignment for mate read2
my $ANOM_NO_MATE1 = new FileHandle ">$SPA_RUN_ROOT"."/ANOMALY/".$FLOWCELL."_lane".$LANE."_no_mate_for_read1.out" or die"can't open $SPA_RUN_ROOT"."/ANOMALY/".$FLOWCELL."_lane".$LANE."_no_mate_for_read1.out";

#read2's with no valid bowtie alignment for mate read1
my $ANOM_NO_MATE2 = new FileHandle ">$SPA_RUN_ROOT"."/ANOMALY/".$FLOWCELL."_lane".$LANE."_no_mate_for_read2.out" or die"can't open $SPA_RUN_ROOT"."/ANOMALY/".$FLOWCELL."_lane".$LANE."_no_mate_for_read2.out";

#reads that have no paired-end alignment that satisfy the chromo/strand/max-dist criteria
my $ANOM_CHROMO_DIST_STRAND = new FileHandle ">$SPA_RUN_ROOT"."/ANOMALY/".$FLOWCELL."_lane".$LANE."_chromo_dist_strand.out" or die"can't open $SPA_RUN_ROOT"."/ANOMALY/".$FLOWCELL."_lane".$LANE."_chromo_dist_strand.out";

#reads that have more than one paired-end alignment that satisfy the chromo/strand/max-dist criteria
my $MULTI_ALIGN_SPA_OUT = new FileHandle ">$SPA_RUN_ROOT"."/ANOMALY/".$FLOWCELL."_lane".$LANE."_multi_align_spa.out" or die"can't open $SPA_RUN_ROOT"."/ANOMALY/".$FLOWCELL."_lane".$LANE."_multi_align_spa.out ";


#ALIGNMENT FILES
#paired-end alignment file --- these are paired-end reads that have exactly 1 alignment satisfying the chrom/strand/max-dist criteria
my $SPA_OUT       = new FileHandle ">$SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_spa.out" or die"can't open $SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_spa.out ";

#paired-end alignment file --- minimum distance resolved multi_align_spa_out cases
my $MULTI_ALIGN_MIN_DIST = new FileHandle ">$SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_multi_align_min_dist_spa.out" or die"can't open $SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_multi_align_min_dist_spa.out ";

#single_read alignment file --- harvested from read1's that map uniquely but have no read2 mate alignments
my $SINGLE_READ_HARVEST_NO_MATE1       = new FileHandle ">$SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_no_mate1_spa.out" or die"can't open $SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_no_mate1_spa.out ";

#single_read alignment file --- harvested from read2's that map uniquely but have no read1 mate alignments
my $SINGLE_READ_HARVEST_NO_MATE2       = new FileHandle ">$SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_no_mate2_spa.out" or die"can't open $SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_no_mate2_spa.out ";

#single_read alignment file --- harvested from read1's that align uniquely but together with read2 candidates fail paired-end criteria
my $SINGLE_READ_HARVEST_FAIL_CRITERIA1       = new FileHandle ">$SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_fail_criteria1_spa.out" or die"can't open $SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_fail_criteria1_spa.out ";

#single_read alignment file --- harvested from read2's that align uniquely but together with read1 candidates fail paired-end criteria
my $SINGLE_READ_HARVEST_FAIL_CRITERIA2       = new FileHandle ">$SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_fail_criteria2_spa.out" or die"can't open $SPA_RUN_ROOT"."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_fail_criteria2_spa.out ";



my @g;
my @h;
my $tile;
my $x;
my $y;

my %read1;   #read1{read_id}[field_index]->array of items of type field_index    field_index=0 => chromo 1=>start  2=>strand   
             #                                                             ---this is a compressed structure to minimize RAM
my %read2;

my $read_id;
my $start;
my $strand;
my $chromo;

my $dist;
my $min_dist;

my $ok_chromo;
my $ok_dist;
my $ok_strand;

my $chromo1;
my $chromo2;
my $start1;
my $start2;
my $end1;
my $end2;
my $strand1;
my $strand2;


my $feasible_pair;

my $offset;

my $closest_pair_dist;
my $closest_pair_i;
my $closest_pair_j;



#READ1 ALIGNMENTS
printf("hashing read1 alignments...\n");

while ($line =<READ1>) {    
  chomp($line);
  @f = split /\s+/,$line;
  $read_id = $f[0];
  $strand  = $f[1];
  $chromo  = $f[2];
  $start   = $f[3];                                                        #STARTS ARE 1-BASED COORDINATES!
 
  @g = split /:/,$read_id;
  $tile = $g[2];
  $x = $g[3];
  $y = $g[4];
  $y =~ s/\/1//;
  $read_id = $tile."_".$x."_".$y;


  if (index($chromo,"_") > -1){    #if junction
    @h = split /_/,$chromo;
    $chromo = $h[1]."_".$h[2]."_".$h[3]."_".$h[6]."_".$h[7]."_".$h[9];      #for junctions, $chromo = chromo_start1_end1_start2_end2_jtype,  **jtype is junction db type
                                                                            #               where starts & ends are coordinates of concatenated intervals
                                                                            #               THESE ARE INCLUSIVE COORDS
                                                                          
  }

  push(@{$read1{$read_id}[0]},$chromo);
  push(@{$read1{$read_id}[1]},$start);
  push(@{$read1{$read_id}[2]},$strand);
}

close(READ1);



#READ2 ALIGNMENTS
printf("hashing read2 genome alignments...\n");

while ($line =<READ2>) {    
  chomp($line);
  @f = split /\s+/,$line;
  $read_id = $f[0];
  $strand  = $f[1];
  $chromo  = $f[2];
  $start   = $f[3];                                                        #STARTS ARE 1-BASED COORDINATES

  
  @g = split /:/,$read_id;
  $tile = $g[2];
  $x = $g[3];
  $y = $g[4];
  $y =~ s/\/2//;
  $read_id = $tile."_".$x."_".$y;

  if (index($chromo,"_") > -1){                         #if junction
    @h = split /_/,$chromo;
    $chromo = $h[1]."_".$h[2]."_".$h[3]."_".$h[6]."_".$h[7]."_".$h[9];     #for junctions, $chromo = chromo_start1_end1_start2_end2_jtype, 
                                                                           #               where starts & ends are coordinates of concatenated intervals
                                                                           #               THESE ARE INCLUSIVE COORDS
                                                                          
  }

  push(@{$read2{$read_id}[0]},$chromo);
  push(@{$read2{$read_id}[1]},$start);
  push(@{$read2{$read_id}[2]},$strand);
}

close(READ2);
  

#-----------------------MAIN PROCESSING OF READ PAIRS------------------------------------

printf("Processing read pairs....\n");

my $linecount=0;

foreach $read_id (keys %read1){

  $linecount++;
  if ($linecount % 1000000 == 0){
    printf("$linecount read1 lines read \n");
  }

  if (!exists $read2{$read_id}){               #READ1's with no READ1 mate
    $ANOM_NO_MATE1->printf("$read_id\n");
    if($#{@{$read1{$read_id}[0]}} == 0){         #if read is unique then write its alignment to single-read harvest file
      $SINGLE_READ_HARVEST_NO_MATE1->printf("$read_id   $read1{$read_id}[0][0]  $read1{$read_id}[1][0]  $read1{$read_id}[2][0] \n");
    }
  }

  else{                                        #both read1 and read2 align

   
    $min_dist = 9999999;
    $closest_pair_dist = 9999999;
    $feasible_pair = 0;
    my @ok_string;               #binary string: ok_chromo.ok_strand.ok_dist

    my @i_star;      #arrays of cross-product indices that satisfy the criteria
    my @j_star;
    my @dist_star;   #array of insert distances associated with above

    for(my $i=0; $i<=$#{@{$read1{$read_id}[0]}}; $i++){       #loop over cross-product of alignments
      for(my $j=0; $j<=$#{@{$read2{$read_id}[0]}}; $j++){ 


        $chromo1 = $read1{$read_id}[0][$i];
        $chromo2 = $read2{$read_id}[0][$j];
        $start1  = $read1{$read_id}[1][$i];   #start is (1-based) offset for junctions
        $start2  = $read2{$read_id}[1][$j];                                          
        $strand1 = $read1{$read_id}[2][$i];
        $strand2 = $read2{$read_id}[2][$j];
        
        #READ1                             
        if(index($chromo1,'_') > 0){   #special parse for junctions (check later for one off on starts, etc)
          @f = split/_/,$chromo1;         # f holds [0]chr [1]interval1_start [2]intrval1_end [3]interval2_start [4]interval2_end [5]jtype
          $chromo1 = $f[0];
          $offset = $start1;
          if($strand1 eq "+"){               #pos strand
            $end1   = $f[3] + ($OVERHANG-1) + ($offset-1) + $TRIM_3_PRIME;
	    $start1 = $f[1] + ($offset-1) - $TRIM_5_PRIME;
	  }
          else{                              #neg strand
            $end1   = $f[3] + ($OVERHANG-1) + ($offset-1) + $TRIM_5_PRIME;
            $start1 = $f[1] + ($offset-1) - $TRIM_3_PRIME;
	  }
	}

        else{                             #not a junction read
          if($strand1 eq "+"){         
            $start1 = $start1 - $TRIM_5_PRIME;
            $end1   = $start1 + $READ_LENGTH_RAW - 1;
	  }
          else{
	    $start1 = $start1 - $TRIM_3_PRIME;
            $end1   = $start1 + $READ_LENGTH_RAW - 1;
	  }
	}

        #READ2
        if(index($chromo2,'_') > 0){  
          @f = split/_/,$chromo2;
          $chromo2 = $f[0];
          $offset = $start2;
          if($strand2 eq "+"){               #pos strand
            $end2   = $f[3] + ($OVERHANG-1) + ($offset-1) + $TRIM_3_PRIME;
	    $start2 = $f[1] + ($offset-1) - $TRIM_5_PRIME;
	  }
          else{                              #neg strand
            $end2   = $f[3] + ($OVERHANG-1) + ($offset-1) + $TRIM_5_PRIME;
            $start2 = $f[1] + ($offset-1) - $TRIM_3_PRIME;
	  }
	}

        else{                             #not a junction read
          if($strand2 eq "+"){         
            $start2 = $start2 - $TRIM_5_PRIME;
            $end2   = $start2 + $READ_LENGTH_RAW - 1;
	  }
          else{
	    $start2 = $start2 - $TRIM_3_PRIME;
            $end2   = $start2 + $READ_LENGTH_RAW - 1;
	  }
	}
        
        #At this point $start1 and $end1 should be the (inclusive) coordinates of the orignal raw untrimmed read1, similarly for read2                 
  
        #CRITERIA CHECKING

        $ok_chromo = 0;
        $ok_strand = 0;
        $ok_dist   = 0;

        #chromo

        if ($chromo1 eq  $chromo2){
          $ok_chromo = 1;
        }

        #strand (and distance calc; if different chromo then distance will be 9999999)

        if ($ok_chromo == 1){
        
          if ( ($strand1 eq '+') && ($strand2 eq '-') && ($end1 <= $end2) ){         #last clause allows reads to overlap but not read "past" each other
            $ok_strand =1;
            $dist = $end2 - $start1;        #dist is "genomic insert length" --- distance between starts of paired-end sequencing
          } 
          elsif ( ($strand2 eq '+') && ($strand1 eq '-') && ($end2 <= $end1) ){
    	    $ok_strand = 1;
            $dist = $end1 - $start2;                  
  	  }
          else{                                   #strand is not ok
            if($strand1 eq '-'){
              $start1 = $end1;
	    }
            if($strand2 eq '-'){
              $start2 = $end2;              #start1 and start2 now mean starts of (strand-specific) sequencing
	    }
            $dist = abs($start1 - $start2);
  	  }
	}

        
            

        if (($dist < $MAX_DIST) && ($ok_chromo == 1)){
          $ok_dist   = 1;

	  if (($ok_chromo==1) && ($ok_dist==1) && ($ok_strand==1)){
            $feasible_pair++;
            push(@i_star,$i);
            push(@j_star,$j);
            push(@dist_star,$dist);

            if($dist < $closest_pair_dist){      #min-distance pair to resolve multiple-criteria-alignment case
	      $closest_pair_dist = $dist;
              $closest_pair_i = $i;
              $closest_pair_j = $j;
	    }

	  }                      
	}

        $ok_string[$i][$j] = $ok_chromo.$ok_strand.$ok_dist;

      }  #next cross-product pair
    }



    if ($feasible_pair == 1){

      $SPA_OUT->printf("$read_id   $read1{$read_id}[0][$i_star[0]]  $read1{$read_id}[1][$i_star[0]]  $read1{$read_id}[2][$i_star[0]]    $read2{$read_id}[0][$j_star[0]]  $read2{$read_id}[1][$j_star[0]]  $read2{$read_id}[2][$j_star[0]]     $ok_string[$i_star[0]][$j_star[0]] $dist_star[0]\n");

    }

    if ($feasible_pair > 1){

      $MULTI_ALIGN_SPA_OUT->printf("Multple feasible alignments for read $read_id:\n");       

      for(my $i=0;$i<=$#i_star;$i++){

        $MULTI_ALIGN_SPA_OUT->printf("$read_id   $read1{$read_id}[0][$i_star[$i]]  $read1{$read_id}[1][$i_star[$i]]  $read1{$read_id}[2][$i_star[$i]]    $read2{$read_id}[0][$j_star[$i]]  $read2{$read_id}[1][$j_star[$i]]  $read2{$read_id}[2][$j_star[$i]]     $ok_string[$i_star[$i]][$j_star[$i]] $dist_star[$i]\n");
      }

      $MULTI_ALIGN_MIN_DIST->printf("$read_id   $read1{$read_id}[0][$closest_pair_i]  $read1{$read_id}[1][$closest_pair_i]  $read1{$read_id}[2][$closest_pair_i]    $read2{$read_id}[0][$closest_pair_j]  $read2{$read_id}[1][$closest_pair_j]  $read2{$read_id}[2][$closest_pair_j]     $ok_string[$closest_pair_i][$closest_pair_j] $closest_pair_dist\n");

    }

    if ($feasible_pair == 0){
    
      $ANOM_CHROMO_DIST_STRAND->printf("$read_id has no satisfying paired-end alignments from the following candidates: \n");
      for(my $i=0; $i<=$#{@{$read1{$read_id}[0]}}; $i++){
        for(my $j=0; $j<=$#{@{$read2{$read_id}[0]}}; $j++){        
          $ANOM_CHROMO_DIST_STRAND->printf("$read1{$read_id}[0][$i]  $read1{$read_id}[1][$i]  $read1{$read_id}[2][$i]    $read2{$read_id}[0][$j]  $read2{$read_id}[1][$j]  $read2{$read_id}[2][$j]     $ok_string[$i][$j]\n");
        }
      }

      if($#{@{$read1{$read_id}[0]}} == 0){         #if read is unique then write its alignment to single-read harvest file
        $SINGLE_READ_HARVEST_FAIL_CRITERIA1->printf("$read_id   $read1{$read_id}[0][0]  $read1{$read_id}[1][0]  $read1{$read_id}[2][0] \n");
      }

      if($#{@{$read2{$read_id}[0]}} == 0){         #if read is unique then write its alignment to single-read harvest file
        $SINGLE_READ_HARVEST_FAIL_CRITERIA2->printf("$read_id   $read2{$read_id}[0][0]  $read2{$read_id}[1][0]  $read2{$read_id}[2][0] \n");
      }


    }     
          

  }  #endelse (there is a mate)

}   #next READ1



#Finally (loose end), READ2's with no READ1 mate

foreach $read_id (keys %read2){
  if (!exists $read1{$read_id}){
    $ANOM_NO_MATE2->printf("$read_id\n");
    if($#{@{$read2{$read_id}[0]}} == 0){         #if read is unique then write its alignment to single-read harvest file
      $SINGLE_READ_HARVEST_NO_MATE2->printf("$read_id   $read2{$read_id}[0][0]  $read2{$read_id}[1][0]  $read2{$read_id}[2][0] \n");
    }
  }
}


#print summary of counts etc to SPA_RUN_SUMMARY  

$dum = $SPA_RUN_ROOT."/".$FLOWCELL."_lane".$LANE."_spa.out";
$wc_string = `wc -l $dum`;                 #paired-end alignments
chomp($wc_string);                                                                                     
@f = split /\s+/,$wc_string;
my $N_PAIRED_END_ALIGNMENTS = $f[0];
$SPA_RUN_SUMMARY->printf("From the $N_READS original total paired-end reads in the lane,\n");
$SPA_RUN_SUMMARY->printf("SPA found $N_PAIRED_END_ALIGNMENTS paired-end alignments that uniquely satisfied the chromo/strand/dist criteria, \n");


$dum = $SPA_RUN_ROOT."/".$FLOWCELL."_lane".$LANE."_multi_align_min_dist_spa.out";
$wc_string = `wc -l $dum`;                 #paired-end alignments resolved via min-dist
chomp($wc_string);                                                                                     
@f = split /\s+/,$wc_string;
my $N_MULTI_ALIGN_MIN_DIST = $f[0];
$SPA_RUN_SUMMARY->printf("along with $N_MULTI_ALIGN_MIN_DIST paired-end alignments that were resolved via minimizing genomic insert length\n");
$SPA_RUN_SUMMARY->printf("over multiple paired-end alignment candidates satisfying the chromo/strand/dist criteria. \n");


$dum = $SPA_RUN_ROOT."/ANOMALY/".$FLOWCELL."_lane".$LANE."_no_mate_for_read1.out";
$wc_string = `wc -l $dum`;                 #no_mate1
chomp($wc_string);                                                                                     
@f = split /\s+/,$wc_string;
my $N_NO_MATE1=$f[0];
$SPA_RUN_SUMMARY->printf("Out of the $N_READS original total read pairs,\n");
$SPA_RUN_SUMMARY->printf("$N_NO_MATE1 read1's that aligned had no aligning read2 mate.\n");

$dum = $SPA_RUN_ROOT."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_no_mate1_spa.out";
$wc_string = `wc -l $dum`;                        #single_read_harvest_no_mate1
chomp($wc_string);                                                                                     
@f = split /\s+/,$wc_string;
my $N_SINGLE_READ_HARVEST_NO_MATE1=$f[0];
$SPA_RUN_SUMMARY->printf("Of these,\n");
$SPA_RUN_SUMMARY->printf("$N_SINGLE_READ_HARVEST_NO_MATE1 read1's aligned uniquely.\n");


$dum = $SPA_RUN_ROOT."/ANOMALY/".$FLOWCELL."_lane".$LANE."_no_mate_for_read2.out";
$wc_string = `wc -l $dum`;                 #no_mate2
chomp($wc_string);                                                                                     
@f = split /\s+/,$wc_string;
my $N_NO_MATE2=$f[0];
$SPA_RUN_SUMMARY->printf("Out of the $N_READS original total read pairs,\n");
$SPA_RUN_SUMMARY->printf("$N_NO_MATE2 read2's that aligned had no aligning read1 mate.\n");

$dum = $SPA_RUN_ROOT."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_no_mate2_spa.out";
$wc_string = `wc -l $dum`;                        #single_read_harvest_no_mate2
chomp($wc_string);                                                                                     
@f = split /\s+/,$wc_string;
my $N_SINGLE_READ_HARVEST_NO_MATE2=$f[0];
$SPA_RUN_SUMMARY->printf("Of these,\n");
$SPA_RUN_SUMMARY->printf("$N_SINGLE_READ_HARVEST_NO_MATE2 read2's aligned uniquely.\n");


$dum = $SPA_RUN_ROOT."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_fail_criteria1_spa.out";
$wc_string = `wc -l $dum`;                 #single_read_harvest_fail_criteria1
chomp($wc_string);                                                                                     
@f = split /\s+/,$wc_string;
my $N_SINGLE_READ_HARVEST_FAIL_CRITERIA1=$f[0];
$SPA_RUN_SUMMARY->printf("For cases where there were multiple paired-end alignment candidates\n");
$SPA_RUN_SUMMARY->printf("but none of them satisfied the chromo/strand/dist criteria,\n");
$SPA_RUN_SUMMARY->printf("$N_SINGLE_READ_HARVEST_FAIL_CRITERIA1 read1's aligned uniquely, and \n");

$dum = $SPA_RUN_ROOT."/".$FLOWCELL."_lane".$LANE."_single_read_harvest_fail_criteria2_spa.out";
$wc_string = `wc -l $dum`;                 #single_read_harvest_fail_criteria2
chomp($wc_string);                                                                                     
@f = split /\s+/,$wc_string;
my $N_SINGLE_READ_HARVEST_FAIL_CRITERIA2=$f[0];
$SPA_RUN_SUMMARY->printf("$N_SINGLE_READ_HARVEST_FAIL_CRITERIA2 read2's aligned uniquely.\n");


$SPA_OUT->close;
$ANOM_NO_MATE1->close;
$ANOM_NO_MATE2->close;
$ANOM_CHROMO_DIST_STRAND->close;
$MULTI_ALIGN_SPA_OUT->close;
$MULTI_ALIGN_MIN_DIST->close;
$SINGLE_READ_HARVEST_NO_MATE1->close;
$SINGLE_READ_HARVEST_NO_MATE2->close;
$SINGLE_READ_HARVEST_FAIL_CRITERIA1->close;
$SINGLE_READ_HARVEST_FAIL_CRITERIA2->close;
$SPA_RUN_SUMMARY->close;