use strict;
use Cwd qw(abs_path getcwd);
use Getopt::Long;
use Data::Dumper;
use File::Basename qw(basename dirname);
use FindBin qw($Bin $Script);

my $programe_dir=basename($0);
my $path=dirname($0);

my $ver    = "1.0";
my $Writer = "Daih <daih\@biomarker.com.cn>";
my $Data   = "2015/11/6";
my $BEGIN=time();
#######################################################################################

# ------------------------------------------------------------------
# GetOptions
# ------------------------------------------------------------------
my ($blast_snp,$tc_gatk_snp,$bsb_gatk_snp,$match_file,$tc_gff,$bsb_gff,$od);
GetOptions(
			"h|?" =>\&help,
			"od:s"=>\$od,
			"b:s"=>\$blast_snp,
			"tc:s"=>\$tc_gatk_snp,
			"bsb:s"=>\$bsb_gatk_snp,
			"m:s"=>\$match_file,
			"tc_gff:s"=>\$tc_gff,
			"bsb_gff:s"=>\$bsb_gff,
			) || &help;
&help unless ($blast_snp && $tc_gatk_snp && $bsb_gatk_snp && $match_file && $tc_gff && $bsb_gff &&  $od);

sub help
{
	print <<"	Usage End.";
    Description:
        Writer  : $Writer
        Data    : $Data
        Version : $ver
        function: ......
    Usage:
	perl XXX.pl -b <blast snp file> -tc <tc gatk snp> -bsb <bsb gatk snp> -m <match file> -tc_gff <tc gff file> -bsb_gff <bsb gff file> -od <outfile>
	Usage End.
	exit;
}
# ------------------------------------------------------------------
# GetOptions
# ------------------------------------------------------------------
$od = abs_path ($od);
#mkdir $od if (!-d $od);
$blast_snp = abs_path($blast_snp);
$tc_gatk_snp = abs_path($tc_gatk_snp);
$bsb_gatk_snp = abs_path($bsb_gatk_snp);
$match_file = abs_path($match_file);
$tc_gff = abs_path($tc_gff);
$bsb_gff = abs_path($bsb_gff);
###############Time
my $Time_Start;
$Time_Start = sub_format_datetime(localtime(time()));
print "\nStart $programe_dir Time :[$Time_Start]\n\n";
################
my ($tc_gff_gene, $tc_gff_pos) = ReadGff($tc_gff);
my ($bsb_gff_gene, $tc_gff_pos) = ReadGff($bsb_gff_hash);
my %match_pos = ReadMatch($match_file);

my %bsb_snp_hash;
my %bsb_N_hash;
open my $SNP2, "<$bsb_gatk_snp";
while(<$SNP2>){
	chomp;
	next if(/^#/ || /^$/);
	my @item = split/\t+/;
	my $flag = 0;
	my $count = 0;
	for(my $i=2;$i<$#item;$i+=2){
		$count++;
		if($item[$i] eq "N"){
			$flag++;
		}
	}
	my($geneid, $genepos) = pos2gene($item[0], $item[1], $bsb_gff_gene, $bsb_gff_pos);
	next if(!exists $match_pos{$geneid});
	if($flag > 0 && $flag == $count){
		#存储生物学重复全部是N的位点信息
		$bsb_N_hash{$item[0]}{$item[1]} = 1;
	}else{
		#排除生物学重复全部为N后的信息，包含部分生物学重复为N的位点信息
		@{$bsb_snp_hash{$item[0]}{$item[1]}} = ($item[2], $item[3], $item[4], $item[5], $item[6], $item[7]);
	}
}

my %tc_snp_hash;
my %tc_N_hash;
my %tc_bsb_hash;
open my $SNP1, "<$tc_gatk_snp";
while(<$SNP1>){
	chomp;
	next if(/^#/ || /^$/);
	my @item = split/\t+/;
	my $flag = 0;
	my $count = 0;
	for(my $i=2;$i<$#item;$i+=2){
		$count++;
		if($item[$i] eq "N"){
			$flag++;
		}
	}
	if($flag > 0 && $flag == $count){
		#存储生物学重复全部为N的位点信息
		$tc_N_hash{$item[0]}{$item[1]} = 1;
	}else{
		#排除生物学重复全部为N后的信息，包含部分生物学重复为N的位点信息
		@{$tc_snp_hash{$item[0]}{$item[1]}} = ($item[2], $item[3], $item[4], $item[5], $item[6], $item[7]);
	}
	my ($geneid, $genepos) = pos2gene($item[0], $item[1], $tc_gff_gene, $tc_gff_pos);
	next if(!exists $match_pos{$geneid});
	if(exists $match_pos{$geneid}){
		my @pos_array = @{$match_pos{$geneid}};
		my($newgeneid, $newpos)=();
		for my $idx(@pos_array){
			if(exists $genenpos >= $idx[0] && $genepos <= $idx[1]){
				$newpos = $idx[3] + $genepos - $idx[0];
				$newgeneid = $idx[2];
			}
		}
		my($genome_chr, $genome_pos) = gene2pos($newgeneid, $newpos, $bsb_gff_gene);
		if(exists $bsb_N_hash{$genome_chr}{$genome_pos}){
			next;
		}elsif(exists $bsb_snp_hash{$genome_chr}{$genome_pos}){
			my($sam1, $count1, $sam2, $count2, $sam3, $count3) = @{$bsb_snp_hash{$genome_chr}{$genome_pos}};
			
		}
	}else{
		next;
	}
	
}



sub gene2pos{
	my($geneid, $genepos, $hash) = @_;
	my %hash = %{$hash};
	my $genome_pos;
	my $genome_chr;
	if(exists $hash{$geneid}){
		my @words = @{$hash{$geneid}};
		$genome_chr = $words[0][0];
		my $orien = $words[0][-1];
		my $accum;
		for(my $i=0;$i<=$#words;$i++){
			my $len = abs($words[$i][1]-$words[$i][2])+1;
			$accum += $len;
			if($accum >= $genepos){
				my $dif = $accum - $genepos;
				if($orien eq "+"){
					$genome_pos = $words[$i][2]-$dif;
				}else{
					$genome_pos = $words[$i][1]+$dif;
				}
				last;
			}
		}
	}else{
		print "Error: can not find the gene $geneid\n";
	}
	return($genome_chr, $genome_pos);
}

sub pos2gene{
	my($chr, $genome_pos, $hash_gene, $hash_pos) = @_;
	%hash_gene = %{$hash_gene};
	%hash_pos = %{$hash_pos};
	my ($geneid, $orien);
	if(exists $hash_pos{$chr}){
		for my $idx(@{$hash_pos{$chr}}){
			if($genome_pos >= $idx[0] && $genome_pos <= $idx[1]){
				$geneid = $idx[-1];
				$orien = $idx[-2];
			}
		}
	}else{
		$geneid = "NA";
		$orien = "NA";
		print "Warnning: can not find $chr in the gff file\n";
	}
	
	my $gene_pos;
	if(exists $hash_gene{$geneid}){
		for my $idy(@{$hash_gene{$geneid}}){
			if($orien eq "+"){
				if($genome_pos >= $idy[1] && $genome_pos <= $idy[2]){
					$gene_pos += $genome_pos - $idy[1] + 1;
					last;
				}elsif($genome_pos > $idy[2]){
					$gene_pos += $idy[2] - $idy[1] + 1;
				}
			}elsif($orien eq "-"){
				if($genome_pos >= $idy[1] && $genome_pos <= $idy[2]){
					$gene_pos += $idy[2] - $genome_pos + 1;
					last;
				}elsif($genome_pos < $idy[1]){
					$gene_pos += $idy[2] - $idy[1] + 1;
				}
			}
		}
	}else{
		print "Error: Can not find the gene $geneid in the gff file\n";
	}
	return($geneid, $gene_pos);
}

sub ReadMatch{
	my $file = shift;
	my %hash;
	open my $IN, "<$file";
	while(<$IN>){
		chomp;
		my @aa = split/\t+/;
		push @{$hash{$aa[0]}},[$aa[1], $aa[2], $aa[3], $aa[4], $aa[5]];
		push @{$hash{$aa[3]}},[$aa[4], $aa[5], $aa[0], $aa[1], $aa[2]];
	}
	return(%hash);
}

sub ReadGff{
	my $file = shift;
	my %hash1;
	my %hash2;
	open my $IN, "<$file";
	while(<$IN>){
		chomp;
		next if(/\tCDS\t/);
		my @aa = split/\t+/;
		my ($id) = $_ =~ /Parent=(\w+?);/;
		if($aa[3] > $aa[4]){
			($aa[3], $aa[4]) = ($aa[4], $aa[3]);
		}
		push @{$hash1{$id}},[$aa[0], $aa[3], $aa[4], $aa[6]];
		push @{$hash2{$aa[0]}},[$aa[3], $aa[4], $aa[6], $id];
	}
	return(\%hash1, \%hash2);
}
###############Time
my $Time_End;
$Time_End = sub_format_datetime(localtime(time()));
print "\nEnd $programe_dir Time :[$Time_End]\n\n";
&Runtime($BEGIN);
###############Subs
sub sub_format_datetime #Time calculation subroutine
{
	my($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = @_;
	$wday = $yday = $isdst = 0;
	sprintf("%4d-%02d-%02d %02d:%02d:%02d", $year+1900, $mon+1, $day, $hour, $min, $sec);
}

sub Runtime # &Runtime($BEGIN);
{
	my ($t1)=@_;
	my $t=time()-$t1;
	print "Total $programe_dir elapsed time : [",&sub_time($t),"]\n";
}
sub sub_time
{
	my ($T)=@_;chomp $T;
	my $s=0;my $m=0;my $h=0;
	if ($T>=3600) {
		my $h=int ($T/3600);
		my $a=$T%3600;
		if ($a>=60) {
			my $m=int($a/60);
			$s=$a%60;
			$T=$h."h\-".$m."m\-".$s."s";
		}else{
			$T=$h."h-"."0m\-".$a."s";
		}
	}else{
		if ($T>=60) {
			my $m=int($T/60);
			$s=$T%60;
			$T=$m."m\-".$s."s";
		}else{
			$T=$T."s";
		}
	}
	return ($T);
}

sub ABSOLUTE_DIR #$pavfile=&ABSOLUTE_DIR($pavfile);
{
	my $cur_dir=`pwd`;chomp($cur_dir);
	my ($in)=@_;
	my $return="";
	if(-f $in){
		my $dir=dirname($in);
		my $file=basename($in);
		chdir $dir;$dir=`pwd`;chomp $dir;
		$return="$dir/$file";
	}elsif(-d $in){
		chdir $in;$return=`pwd`;chomp $return;
	}else{
		warn "Warning just for file and dir\n";
		exit;
	}
	chdir $cur_dir;
	return $return;
}

