#!/usr/bin/env perl

use strict;
use warnings;
use Classes::Chrommap;
use Classes::Regions;
use Classes::Frequencies;

# written by Alexander Nater, December 2013

# Set your options here:
my $chrommap_file="/proj/b2010010/repos/assembly/fAlb15/linkage/fAlb15.chrom.all.20140121.txt";
my $mafsfolder="/proj/nobackup/b2010010/alexn/mafs";
my $mafslist="/home/alexn/glob/genomescan_abba_baba/mafslist.txt";
my $npopulations=10;
my @poplabels=("I", "H", "CZC", "OC", "E", "SP", "CZP", "OP", "spec", "semi");
my @nindividuals=(20, 20, 20, 19, 20, 20, 20, 19, 20, 20);
my @minind=(12, 12, 12, 12, 12, 12, 12, 12, 12, 12);
my @grouplabels=("A", "B", "B");
my $maxsize=5000000;
my $MAFfilter=0.001;
my $minpropcovered=0.3;
my $outfolder=shift @ARGV;
my @ids=split(',', shift @ARGV);
my $windowsize=shift @ARGV;
my $stepsize=shift @ARGV;
my @popgroups=map { [ split(',', $_) ] } split(':', shift @ARGV);
my $locifile=shift @ARGV;

#-----------------------------------------------------------------------------------------------------------------------------

$outfolder=~s{/\z}{};	# remove trailing slash from folder path.
mkdir $outfolder unless (-d "$outfolder");

print STDERR "$_: ", join(',', @{ $popgroups[$_] }), "\n" foreach (0..$#popgroups);

# set output file names:
my $idstring=join('_', @ids);
my $outfile1="abba_baba_windows_" . "$idstring" . ".bed";
my $outfile2="abba_baba_windows_" . "$idstring" . "_translated.bed";

# prepare new arrays for population settings if subset of populations is selected:
my @subpoplabels=map { @poplabels[@$_] } @popgroups;
my @subpopindices=Misc::setSubArrays(\@subpoplabels, \@poplabels, \@nindividuals, \@minind, \@popgroups, \@grouplabels);
print STDERR "Selected population indices: ", join(',', @subpopindices), "\nNumber of individuals per population: ", join(',', @nindividuals), "\nMinimum number of individuals per population: ", join(',', @minind), "\n";
print STDERR "$grouplabels[$_]: ", join(',', @{ $popgroups[$_] }), "\n" foreach (0..$#popgroups);

# prepare chromosome map:
my $chrommap=Chrommap->new();
$chrommap->readChromMap($chrommap_file, 5000, 0);

# get regions from bed file:
my $regions;
if (defined $locifile){
	print "Reading loci from $locifile ...\n";
	$regions=Regions->new();
	$regions->readBED($locifile, 'bed', 0);
	}

# or get regions from chrommap file:
else {
	$regions=$chrommap->locifromChromMap();
	}

# get list of scaffolds for chromosomes:
my @scaffolds;	# figure out if specified regions are chromosomes or scaffolds
if ($ids[0]=~/Chr/){ push @scaffolds, $chrommap->getScaffolds($_) foreach @ids }
else { @scaffolds=@ids }
my $windows=$windowsize ? $regions->windowingLoci(0, $windowsize, $stepsize, 0, @scaffolds) : $regions;
print join(', ', @scaffolds), "\n";


SUBSET: while (1){
	my $subset=$windows->subsetLoci($maxsize, @scaffolds);
	unless ( $subset->getSize() ){ last SUBSET }
	my $concat=$subset->concatRegions();
	my $freqs=Frequencies->new();
	$freqs->setPops($mafsfolder, $mafslist, $npopulations, \@subpopindices);
	$freqs->readMAFs(undef, undef, scalar(@subpopindices), $concat, $MAFfilter, 0, 100);
	$subset->ABBA_BABA_perwindow($freqs, \@nindividuals, \@popgroups, $minpropcovered, \@minind);
	my $chrom_subset=$subset->translateScafftoChrom($chrommap);
	$subset->printLociABBA_BABA("$outfolder/$outfile1", 0);
	$chrom_subset->printLociABBA_BABA("$outfolder/$outfile2", 1);
	}


