#!/usr/bin/env perl

#
# ========================================================================
# Script to find presence or absence of regions
# ========================================================================
#

use strict;

use Getopt::Long;
use File::Basename;

# The axt, gap, block and genomesize filename
my ($blockfile, $gapfile);

# The amount of nucleotids which are "unprecise" on the direct
# connected species
my $delta = 20;

# The minimum and maximum gap length
my ($mingap, $maxgap) = (30, 1000000);

# The maximum overlap in gap
my $maxover = -10;

#
# ------------------------------------------------------------------------
# Usage
# ------------------------------------------------------------------------
#

if (!@ARGV) {
	my $name = basename $0;
	print <<EOS;
USAGE: $name -fb blockfile -fg gapfile [-d delta] [-i mingap] [-m maxagp] [-o maxover]

WHERE
    blockfile - name of the block file
    gapfile   - name of the generated gap bed file
    delta     - max amount of "unprecise" nucleotids for the direct
                connected strand
                (default $delta)
    mingap    - minimal length of gap
                (default $mingap)
    maxgap    - maximal length of gap
                (default $maxgap)
    maxover   - maximal overlap of gap
                (default $maxover)

DESCRIPTION
    Find presence or absencse of regions in a two way alignment.  The
    calculation is based on lines in an "axt" file like:

      0 chr1 123 234 chr2 3222 3333 + 111
      1 chr1 235 412 chr2 3396 3597 + 201
      ...

    describing the alignment.  You see that the end of chr1 234 is
    continued on the next line with 235.  That means both fragments
    fit directly together.  On the other side there is a gap from 3333
    to 3396, meaning theere is an insertion in chr2 or other way round
    a deletion in chr1.

    The parameter delta describes the number of nuleotids of
    unpreciseness between end and start.  So the first fragment would
    fit together with the condition 234 < 235 - delta.

    An example

      # $name -fa hg38.panTro5.net.axt
      0 chr2 sp0 chr1 10918 11386 chrUn_NW_015974624v1 22008 22462 -
      1 chr1 11387 11448 chrUn_NW_015983188v1 1920 1981 +
      2 chr1 11449 14671 chrUn_NW_015974624v1 22463 25700 -
      3 chr1 14672 16562 chrUn_NW_015974624v1 25842 27742 -
EOS
	exit 0;
}

#
# ------------------------------------------------------------------------
# Get parameters and check them
# ------------------------------------------------------------------------
#

exit 1
  if (
	!GetOptions(
		'fb=s' => \$blockfile,
		'fg=s' => \$gapfile,
		'd=i'  => \$delta,
		'i=i'  => \$mingap,
		'm=i'  => \$maxgap,
		'o=i'  => \$maxover
	)
  );

if (!$gapfile && !$blockfile) {
	print qq(Please enter a gap and block filename!\n);
	exit 1;
}
if ($mingap > $maxgap) {
	print qq(Minimum $mingap must be less or equal to maximum $maxgap!\n);
	exit 1;
}
if ($maxover > 0) {
	$maxover = -$maxover;
}

#
# ------------------------------------------------------------------------
# Adjust start end end values for start < end
#
# Params
# $_[0] - start
# $_[1] - end
# $_[2] - ontarget
# ------------------------------------------------------------------------
#
sub _adjust {
	if ($_[0] + 3 > $_[1]) {
		if ($_[2]) {
			$_[0] = $_[1] - 3;
		}
		else {
			$_[1] = $_[0] + 3;
		}
	}
}

#
# ------------------------------------------------------------------------
# Adjust start end end values for target and query so: start < end - 3
# ------------------------------------------------------------------------
#
sub adjust {
	my ($start1, $end1, $start2, $end2, $ontarget) = @_;

	_adjust($start1, $end1, $ontarget);
	_adjust($start2, $end2, $ontarget);

	return ($start1, $end1, $start2, $end2);
}

#
# ------------------------------------------------------------------------
# Check gap sizes
# ------------------------------------------------------------------------
#
sub checkGap {
	my ($gap1, $gap2) = @_;

	return ($gap1 >= $mingap && $gap1 <= $maxgap && $gap2 <= $delta)
	  || ($gap2 >= $mingap && $gap2 <= $maxgap && $gap1 <= $delta);
}

#
# ------------------------------------------------------------------------
# Open axt file and read the positions
# ------------------------------------------------------------------------
#
sub main {
	my ($index1, $chr11, $start11, $end11, $chr12, $start12, $end12, $strand1);
	my ($index2, $chr21, $start21, $end21, $chr22, $start22, $end22, $strand2);

	open(BLOCK, "<", $blockfile);
	open(GAP,   ">", $gapfile);

	while (my $line = <BLOCK>) {
		if ($line =~ m/(\d+)\s+([^\s]+)\s+(\d+)\s+(\d+)\s+([^\s]+)\s+(\d+)\s+(\d+)\s+([\+\-])/) {
			($index2, $chr21, $start21, $end21, $chr22, $start22, $end22, $strand2) = ($1, $2, $3, $4, $5, $6, $7, $8);
			if ($chr11 && $chr11 eq $chr21 && $chr12 eq $chr22 && $strand1 eq $strand2) {
				if ($strand1 eq "+") {
					my $gapt = $start21 - $end11 - 1;
					my $gapq = $start22 - $end12 - 1;
					if (checkGap(abs($gapt), abs($gapq))) {
						if ($gapt > $maxover && $gapq > $maxover) {
							my $ontarget = abs($end12 - $start12) > abs($end22 - $start22);
							my ($s1, $e1, $s2, $e2) = adjust($end11, $start21, $end12, $start22, $ontarget);
							print GAP "$index2 $chr21 $s1 $e1 $chr22 $s2 $e2 $strand2\n";
						}
					}
				}
				else {
					my $gapt = $start21 - $end11 - 1;
					my $gapq = $start12 - $end22 - 1;
					if (checkGap(abs($gapt), abs($gapq))) {
						if ($gapt > $maxover && $gapq > $maxover) {
							my $ontarget = abs($end12 - $start12) > abs($end22 - $start22);
							my ($s1, $e1, $s2, $e2) = adjust($end11, $start21, $end22, $start12, $ontarget);
							print GAP "$index2 $chr21 $s1 $e1 $chr22 $s2 $e2 $strand2\n";
						}
					}
				}
			}

			($index1, $chr11, $start11, $end11, $chr12, $start12, $end12, $strand1) =
			  ($index2, $chr21, $start21, $end21, $chr22, $start22, $end22, $strand2);
		}
	}

	close(BLOCK);
	close(GAP);
}

main();
