#!/usr/bin/perl

###############################
#
# Copyright Stanford University 2016
# Author: John Bell
#
# This script extracts chromosome-arm-wise t-test information 
# from a 2-sample output of the R scripts generated by 
# make_rscr_script.double.pl .  It is entirely dependent on 
# text of R script -- cf.  /c(\d+)._phased_het_total/
# 
# INPUT:  text file of output of R script 
# OUTPUT: text file listing proportion of each haplotype on 
# each chrom arm with respect to rest of genome, i.e. whether 
# it is increased or decreased relative to haplotype overall;
# treat the two haplotypes separately as major > minor by definition 
#
################################

use strict;

#--------------------------- I/O --------------------------------#
my $file = shift(@ARGV) or die "need file to read\n";
open(F,$file) or die "can't read $file\n";
my @LN = <F>;

#--------------------------- end of I/O -------------------------#

#--------------------------- subroutines ------------------------#

# subroutine
use subs qw(rd3);

sub rd3 {
   my $a = (int($_[0] * 1000 + .5))/1000;
   return $a;
}

#--------------------------- end of subroutines------------------#

#--------------------------- initializations --------------------#

# initialized
my $chr = 0;

my $het_total = 0;
my $p_het_total = 0;
my $q_het_total = 0;

my $normp_hapmax_rat = 0;
my $normp_hapmin_rat = 0;
my $tump_hapmax_rat = 0;
my $tump_hapmin_rat = 0;

my $normq_hapmax_rat = 0;
my $normq_hapmin_rat = 0;
my $tumq_hapmax_rat = 0;
my $tumq_hapmin_rat = 0;

#-------------------------- end of initializations --------------#

print "chr\tarm\tarm_het\tN_M\tN_m\tM_M\tM_m\thet_tot\n";

#-------------------------- loop --------------------------------# 

my $ct = 0;
while($LN[$ct]) {

# extract numbers
   if ($LN[$ct] =~ /#---------------- count -- chr ave/) { 

      ($chr) = $LN[$ct+4] =~ /c(\d+)._phased_het_total/;
      if ($LN[$ct+4] =~ /p_phased_het_total/) { 
         ($het_total) = $LN[$ct+3] =~ /\[1\] (\d+)/;  
         ($p_het_total) = $LN[$ct+5] =~ /\[1\] (\d+)/;  
         (my $normp_hapmax_rat_raw) = $LN[$ct+7] =~ /\[1\] (\d+\.\d+)/;  
         (my $normp_hapmin_rat_raw) = $LN[$ct+9] =~ /\[1\] (\d+\.\d+)/;  
         (my $tump_hapmax_rat_raw) = $LN[$ct+11] =~ /\[1\] (\d+\.\d+)/;  
         (my $tump_hapmin_rat_raw) = $LN[$ct+13] =~ /\[1\] (\d+\.\d+)/;  

         $normp_hapmax_rat = rd3 $normp_hapmax_rat_raw;
         $normp_hapmin_rat = rd3 $normp_hapmin_rat_raw;
         $tump_hapmax_rat = rd3 $tump_hapmax_rat_raw;
         $tump_hapmin_rat = rd3 $tump_hapmin_rat_raw;

         print "$chr\tp\t$p_het_total\t$normp_hapmax_rat\t$normp_hapmin_rat\t$tump_hapmax_rat\t$tump_hapmin_rat\t$het_total\n";

      } elsif ($LN[$ct+4] =~ /q_phased_het_total/) { 
         ($het_total) = $LN[$ct+3] =~ /\[1\] (\d+)/;  
         ($q_het_total) = $LN[$ct+5] =~ /\[1\] (\d+)/;  
         (my $normq_hapmax_rat_raw) = $LN[$ct+7] =~ /\[1\] (\d+\.\d+)/;  
         (my $normq_hapmin_rat_raw) = $LN[$ct+9] =~ /\[1\] (\d+\.\d+)/;  
         (my $tumq_hapmax_rat_raw) = $LN[$ct+11] =~ /\[1\] (\d+\.\d+)/;  
         (my $tumq_hapmin_rat_raw) = $LN[$ct+13] =~ /\[1\] (\d+\.\d+)/;  

         $normq_hapmax_rat = rd3 $normq_hapmax_rat_raw;
         $normq_hapmin_rat = rd3 $normq_hapmin_rat_raw;
         $tumq_hapmax_rat = rd3 $tumq_hapmax_rat_raw;
         $tumq_hapmin_rat = rd3 $tumq_hapmin_rat_raw;

         print "$chr\tq\t$q_het_total\t$normq_hapmax_rat\t$normq_hapmin_rat\t$tumq_hapmax_rat\t$tumq_hapmin_rat\t$het_total\n";

      }

      $chr = $het_total = 0;
      $p_het_total = $normp_hapmax_rat = $normp_hapmin_rat = $tump_hapmax_rat = $tump_hapmin_rat = "NA";
      $q_het_total = $normq_hapmax_rat = $normq_hapmin_rat = $tumq_hapmax_rat = $tumq_hapmin_rat = "NA";
   }

   $ct++;
}

#-------------------------- end of loop -------------------------# 
