#! /usr/bin/env perl

use strict;
use warnings;
use Getopt::Long;
use Errno;
use POSIX qw(floor);
use Cwd;

use FindBin qw($RealBin);
use lib $RealBin;
use MasurcaConf qw(fail);


my ($help);
my $prefix = ".";

my $usage = "USAGE: masurca-superreads [options]. See --help for details";
my $help_str = <<EOS;
masurca-superreads [options] reads

Create super reads from Illumina reads. The reads file are specified as:
   [MEAN:[STDEV:]]FRAG1[:FRAG2]
  
If unmated, do not specify FRAG2. The MEAN and STDEV of the library,
if mated, are not required. The MEAN defaults to 180 bases and the
STDEV defaults to 11% of the MEAN. Example:

     frag1.fq.gz                  Unmated library
     frag1.fq.gz:frag2.fq.gz      Mated library
     250:frag1.fq:frag2.fq        Mated library with mean = 250

Options:
  --output-dir PATH
       Create the super reads in the directory given by PATH. Create
       the directory if it does not exists.
EOS

GetOptions("output-dir=s" => \$prefix,
           "help"         => \$help)
  or fail($usage);

if($help) {
  print($help_str);
  exit(0);
}
if(!@ARGV) {
  print(STDERR $usage, "\n");
  exit(1);
}

mkdir($prefix) || $!{EEXIST}
  or fail("Failed to create directory $prefix: $!");
chdir($prefix)
  or fail("Failed to change directory to $prefix: $!");

open(my $conf, ">", "super_reads.conf")
  or fail("Failed to open masurca configuration file $prefix/super_reads.conf: $!");

print($conf
      "# DO NOT EDIT\n",
      "# Configuration auto generated by masurca-superreads\n\n",
      "DATA\n");

sub index_to_letter { return chr(ord("a") + int($_[0])); }
sub round { return floor($_[0] + 0.5); }

for(my $i = 0; $i < @ARGV; $i++) {
  my @F = split(/:/, $ARGV[$i]);
  my ($mean, $stdev) = (180, 20);
  @F > 0 && @F <= 4 or
    fail("Invalid pe option line '$ARGV[$i]': expected 1 to 4 column (:) seperated fields, got " . scalar(@F));
  if(@F >= 3) {
    $F[0] =~ /\d+/ or
      fail("Invalid MEAN in pe option line '$ARGV[$i]': not a number");
    $mean = $F[0];
    if(@F == 4) {
      $F[1] =~ /\d+/ or
        fail("Invalid STDEV in pe option line '$ARGV[$i]': not a number");
      $stdev = $F[1];
    } else {
      $stdev = round(0.11 * $mean);
    }
  }
  my $name = "p" . index_to_letter($i);
  print($conf "PE = $name $mean $stdev");
  print($conf " $F[-2]") if(@F >= 2);
  print($conf " $F[-1]\n");
}
print($conf "END\n");

sub nb_threads {
  open(my $io, "<", "/proc/cpuinfo");
  return scalar(grep(/^processor\s*:/, <$io>));
}

print($conf
      "PARAMETERS\n",
      "STOP_AFTER_SUPERREADS=1\n",
      "NUM_THREADS=", nb_threads, "\n",
      "GRAPH_KMER_SIZE=auto\n",
      "END\n");
close($conf);

system($RealBin . "/masurca", "-o", "super_reads.sh", "super_reads.conf")
  and fail("Failed to generated the super_reads.sh script");
system("./super_reads.sh")
  and fail("Failed to create super reads");
symlink("work1/superReadSequences.fasta", "superReadSequences.fasta");
print("The super reads are available in file ", getcwd(), "/superReadSequences.fasta\n");
