#!/usr/bin/perl

###############################
#
# Copyright Stanford University 2018
# Author: John Bell
#
# This script takes a double sample barcode blocks file and 
# creates R scripts to produce graphics images, runs t-tests,
# and finds regions showing a threshold difference from the normal.
#
# INPUT:  a barcode blocks file based on two samples, a chromosome 
# #, and a minimum block size, and optionally build number.
# OUTPUT:  an R script.
#
# ---------------------- notes
#
# done this way to allow easy manual modification of R scripts 
# as needed for display, testing, etc.
#
# There are multiple hard-coded values based on experience with 
# results, starting with arb_height
#
# note that centromere definitions for build 38 are "by hand", based
# on visual inspection of the pattern & preponderance of Ns.
#
# ---------------------- end of notes
#
################################

use strict;
use warnings;
use Getopt::Long;

# to allow testing and printing of p-arm files only if needed
my $there_is_a_p_arm = 0;
 
my $usage = "usage: $0  --build [38] <input file -- combined barcode blocks> <chrom> <# phased hets needed>\n";

my $build = 38;

&GetOptions("build=i" => \$build); 

my $input_file = shift(@ARGV) or die $usage;
my $chrom = shift(@ARGV) or die $usage;
my $phets = shift(@ARGV) or die $usage;


die $usage unless $chrom =~ /\d|X|Y/;
die $usage unless $phets =~ /\d+/;
die $usage unless -f $input_file;
die $usage unless ($build == 37 || $build == 38);

if ($chrom eq "X") { $chrom = 23; }
if ($chrom eq "Y") { $chrom = 24; }

my $patient = "";
if ($input_file =~ /_/) { 
($patient) = $input_file =~ /\A([^_]+)_/;
} elsif ($patient eq "") {
 ($patient) = $input_file =~ /\A([^\.]+)\./;
}
else {
  die "can't find patient name\n";
}

my $out1 = $patient . "_chr$chrom" . "p_barcode_blocks_figs.$phets" . "h.r";
my $out2 = $patient . "_chr$chrom" . "q_barcode_blocks_figs.$phets" . "h.r";

open (OUT1,">$out1") or die "can't write to $out1\n";
open (OUT2,">$out2") or die "can't write to $out2\n";

print OUT1 "library(ggplot2)\n";
print OUT2 "library(ggplot2)\n";
print OUT1 "library(reshape2)\n";
print OUT2 "library(reshape2)\n";
print OUT1 "library(grid)\n";
print OUT2 "library(grid)\n";
print OUT1 "library(gridExtra)\n";
print OUT2 "library(gridExtra)\n";
print OUT1 "\n";
print OUT2 "\n";

print OUT1 "arb_height=15\n\n";
print OUT2 "arb_height=15\n\n";
print OUT1 "arb_height_p=15\n\n";
print OUT2 "arb_height_p=15\n\n";
print OUT1 "pretty_factor = .9\n\n";
print OUT2 "pretty_factor = .9\n\n";
print OUT1 "density_lim = 1\n\n";
print OUT2 "density_lim = 1\n\n";

print OUT1 "desired_ci_edge = .975\n";
print OUT2 "desired_ci_edge = .975\n";
print OUT1 "sd_multiplier = qnorm(desired_ci_edge)\n\n";
print OUT2 "sd_multiplier = qnorm(desired_ci_edge)\n\n";

print OUT1 "multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {\n";
print OUT2 "multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {\n";
print OUT1 "  library(grid)\n";
print OUT2 "  library(grid)\n";
print OUT1 "  # Make a list from the ... arguments and plotlist\n";
print OUT2 "  # Make a list from the ... arguments and plotlist\n";
print OUT1 "  plots <- c(list(...), plotlist)\n";
print OUT2 "  plots <- c(list(...), plotlist)\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "  numPlots = length(plots)\n";
print OUT2 "  numPlots = length(plots)\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "  # If layout is NULL, then use 'cols' to determine layout\n";
print OUT2 "  # If layout is NULL, then use 'cols' to determine layout\n";
print OUT1 "  if (is.null(layout)) {\n";
print OUT2 "  if (is.null(layout)) {\n";
print OUT1 "      # Make the panel\n";
print OUT2 "      # Make the panel\n";
print OUT1 "      # ncol: Number of columns of plots\n";
print OUT2 "      # ncol: Number of columns of plots\n";
print OUT1 "      # nrow: Number of rows needed, calculated from # of cols\n";
print OUT2 "      # nrow: Number of rows needed, calculated from # of cols\n";
print OUT1 "      layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),\n";
print OUT2 "      layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),\n";
print OUT1 "           ncol = cols, nrow = ceiling(numPlots/cols))\n";
print OUT2 "           ncol = cols, nrow = ceiling(numPlots/cols))\n";
print OUT1 "   }\n";
print OUT2 "   }\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "   if (numPlots==1) {\n";
print OUT2 "   if (numPlots==1) {\n";
print OUT1 "      print(plots[[1]])\n";
print OUT2 "      print(plots[[1]])\n";
print OUT1 "   } else {\n";
print OUT2 "   } else {\n";
print OUT1 "   # Set up the page\n";
print OUT2 "   # Set up the page\n";
print OUT1 "      grid.newpage()\n";
print OUT2 "      grid.newpage()\n";
print OUT1 "   # pushViewport(viewport(width=1,height=.5,layout = grid.layout(nrow(layout), ncol(layout))))\n";
print OUT2 "   # pushViewport(viewport(width=1,height=.5,layout = grid.layout(nrow(layout), ncol(layout))))\n";
print OUT1 "      pushViewport(viewport(width=1,height=.6,layout = grid.layout(2, ncol(layout),heights = unit(c(1,5), \"null\"))))\n";
print OUT2 "      pushViewport(viewport(width=1,height=.6,layout = grid.layout(2, ncol(layout),heights = unit(c(1,5), \"null\"))))\n";
print OUT1 "      grid.text(\"$patient chr $chrom barcode blocks by SNV density\", gp=gpar(fontsize=16), vp = viewport(layout.pos.row = 1, layout.pos.col = 1:2))\n";
print OUT2 "      grid.text(\"$patient chr $chrom barcode blocks by SNV density\", gp=gpar(fontsize=16), vp = viewport(layout.pos.row = 1, layout.pos.col = 1:2))\n";
print OUT1 "   # Make each plot, in the correct location\n";
print OUT2 "   # Make each plot, in the correct location\n";
print OUT1 "      for (i in 1:numPlots) {\n";
print OUT2 "      for (i in 1:numPlots) {\n";
print OUT1 "   # Get the i,j matrix positions of the regions that contain this subplot\n";
print OUT2 "   # Get the i,j matrix positions of the regions that contain this subplot\n";
print OUT1 "         matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))\n";
print OUT2 "         matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))\n";
print OUT1 " #        print(plots[[i]], vp = viewport(layout.pos.row = matchidx\$row,\n";
print OUT2 " #        print(plots[[i]], vp = viewport(layout.pos.row = matchidx\$row,\n";
print OUT1 "         print(plots[[i]], vp = viewport(layout.pos.row = 2,\n";
print OUT2 "         print(plots[[i]], vp = viewport(layout.pos.row = 2,\n";
print OUT1 "             layout.pos.col = matchidx\$col))\n";
print OUT2 "             layout.pos.col = matchidx\$col))\n";
print OUT1 "         }\n";
print OUT2 "         }\n";
print OUT1 "   }\n";
print OUT2 "   }\n";
print OUT1 "}\n";
print OUT2 "}\n";


print OUT1 "r_table <- read.table(\"$input_file" . "\",header=T)\n";
print OUT2 "r_table <- read.table(\"$input_file" . "\",header=T)\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "#chr	beg_pos	end_pos	dist	normal_PS	tumor_PS	all_SNVs	phased_het	barcode_blocks_match	norm_total	norm_unique	norm_hap1_total	norm_hap1_unique	norm_hap2_total	norm_hap2_unique	tum_total	tum_unique	tum_hap1_total	tum_hap1_unique	tum_hap2_total	tum_hap2_unique\n";
print OUT2 "#chr	beg_pos	end_pos	dist	normal_PS	tumor_PS	all_SNVs	phased_het	barcode_blocks_match	norm_total	norm_unique	norm_hap1_total	norm_hap1_unique	norm_hap2_total	norm_hap2_unique	tum_total	tum_unique	tum_hap1_total	tum_hap1_unique	tum_hap2_total	tum_hap2_unique\n";
print OUT1 "#1	1404016	1547566	143551	101404016	101381253	101386016	205	87	ambiguous	ambiguous	822	476	454	265	368	211	822	482	340	205	482	277	974	644	468	298	506	346\n";
print OUT2 "#1	1404016	1547566	143551	101404016	101381253	101386016	205	87	ambiguous	ambiguous	822	476	454	265	368	211	822	482	340	205	482	277	974	644	468	298	506	346\n";
print OUT1 "\n";
print OUT2 "\n";

print OUT1 "#following palette from Winston Chang's cookbook-r.com, used under CC0 license\n";
print OUT2 "#following palette from Winston Chang's cookbook-r.com, used under CC0 license\n";
print OUT1 "cbbPalette <- c(\"#000000\", \"#E69F00\", \"#56B4E9\", \"#009E73\", \"#F0E442\", \"#0072B2\", \"#D55E00\", \"#CC79A7\")\n";
print OUT2 "cbbPalette <- c(\"#000000\", \"#E69F00\", \"#56B4E9\", \"#009E73\", \"#F0E442\", \"#0072B2\", \"#D55E00\", \"#CC79A7\")\n";
print OUT1 "this_wid = 2\n";
print OUT2 "this_wid = 2\n";
print OUT1 "medianPalette <- c(\"#2B93CE\", \"#DE7F00\", \"#663D54\")\n";
print OUT2 "medianPalette <- c(\"#2B93CE\", \"#DE7F00\", \"#663D54\")\n";
print OUT1 "\n";
print OUT2 "\n";

if ($build == 37) {
	print OUT1 "chr_size = c(249250621,243199373,198022430,191154276,180915260,171115067,159138663,146364022,141213431,135534747,135006516,133851895,115169878,107349540,102531392,90354753,81195210,78077248,59128983,63025520,48129895,51304566,155270560,59034049)\n";
	print OUT2 "chr_size = c(249250621,243199373,198022430,191154276,180915260,171115067,159138663,146364022,141213431,135534747,135006516,133851895,115169878,107349540,102531392,90354753,81195210,78077248,59128983,63025520,48129895,51304566,155270560,59034049)\n";
	# modified chr21 p_end to 0 to avoid problems 12/1/15
	print OUT1 "p_end = c(121502139,90545103,90504858,49338939,46405642,58780170,58054335,43838889,47317680,39154938,51594203,34856698,0,0,0,35285802,22263008,15410901,24631783,26319573,0,0,58582018,0)\n";
	print OUT2 "p_end = c(121502139,90545103,90504858,49338939,46405642,58780170,58054335,43838889,47317680,39154938,51594203,34856698,0,0,0,35285802,22263008,15410901,24631783,26319573,0,0,58582018,0)\n";
	print OUT1 "q_beg = c(142535154,95326168,93504852,52660118,49405641,61880162,61054321,46838887,65467681,42354786,54694243,37856695,19019998,19000001,20000000,46369840,25263004,18510898,27731695,29419570,14338120,16050038,61682008,0)\n";
	print OUT2 "q_beg = c(142535154,95326168,93504852,52660118,49405641,61880162,61054321,46838887,65467681,42354786,54694243,37856695,19019998,19000001,20000000,46369840,25263004,18510898,27731695,29419570,14338120,16050038,61682008,0)\n";
} elsif ($build == 38) {
	print OUT1 "chr_size = c(248956422,242193529,198295559,190214555,181538259,170805979,159345973,145138636,138394717,133797422,135086622,133275309,114364328,107043718,101991189,90338345,83257441,80373285,58617616,64444167,46709983,50818468,156040895,57227415)\n";
	print OUT2 "chr_size = c(248956422,242193529,198295559,190214555,181538259,170805979,159345973,145138636,138394717,133797422,135086622,133275309,114364328,107043718,101991189,90338345,83257441,80373285,58617616,64444167,46709983,50818468,156040895,57227415)\n";
	print OUT1 "p_end = c(121976459,90402511,90550102,49336924,46435900,58453888,60828234,43983744,45518558,38529907,50821348,34719407,0,0,0,33214595,21795850,15410899,24448980,26348365,0,0,58555579,0)\n";
	print OUT2 "p_end = c(121976459,90402511,90550102,49336924,46435900,58453888,60828234,43983744,45518558,38529907,50821348,34719407,0,0,0,33214595,21795850,15410899,24448980,26348365,0,0,58555579,0)\n";
	print OUT1 "q_beg = c(143184587,91402511,91553419,49712061,47309184,60229934,62506779,45927265,68220552,42066265,54000000,37460128,16282173,16404448,17000000,46380682,23195018,15460899,24908689,26608145,10000000,15000000,58605579,0)\n";
	print OUT2 "q_beg = c(143184587,91402511,91553419,49712061,47309184,60229934,62506779,45927265,68220552,42066265,54000000,37460128,16282173,16404448,17000000,46380682,23195018,15460899,24908689,26608145,10000000,15000000,58605579,0)\n";
} else { die "build is not recognized\n"; }

print OUT1 "pat_c$chrom" . "_hap_raw <- subset(r_table,r_table\$chr == $chrom" . ")\n";
print OUT2 "pat_c$chrom" . "_hap_raw <- subset(r_table,r_table\$chr == $chrom" . ")\n";
print OUT1 "pat_c$chrom" . "_hap = subset(pat_c$chrom" . "_hap_raw,pat_c$chrom" . "_hap_raw\$phased_het >= $phets" . ")\n";
print OUT2 "pat_c$chrom" . "_hap = subset(pat_c$chrom" . "_hap_raw,pat_c$chrom" . "_hap_raw\$phased_het >= $phets" . ")\n";


my @p_arm_test = ("DUMMY","Y","Y","Y","Y","Y","Y","Y","Y","Y","Y","Y","Y","n","n","n","Y","Y","Y","Y","Y","n","n","Y","n");


if ($p_arm_test[$chrom] eq "Y") { $there_is_a_p_arm = 1; }

# get normalizer across whole genome 

print OUT1 "# get normalizer across genome\n";
print OUT2 "# get normalizer across genome\n";
 
print OUT1 "uselen_all = length(r_table\$beg_pos)\n";
print OUT2 "uselen_all = length(r_table\$beg_pos)\n";
print OUT1 "norm_hapmax_all = numeric(uselen_all)\n";
print OUT2 "norm_hapmax_all = numeric(uselen_all)\n";
print OUT1 "norm_hapmin_all = numeric(uselen_all)\n";
print OUT2 "norm_hapmin_all = numeric(uselen_all)\n";
print OUT1 "tum_hapmax_all = numeric(uselen_all)\n";
print OUT2 "tum_hapmax_all = numeric(uselen_all)\n";
print OUT1 "tum_hapmin_all = numeric(uselen_all)\n";
print OUT2 "tum_hapmin_all = numeric(uselen_all)\n";

print OUT1 "i = 1\n";
print OUT2 "i = 1\n";
print OUT1 "while (i <= uselen_all) {\n";
print OUT2 "while (i <= uselen_all) {\n";
print OUT1 "  norm_hapmax_all[i] = max(r_table\$norm_hap1_unique[i],r_table\$norm_hap2_unique[i])\n";
print OUT2 "  norm_hapmax_all[i] = max(r_table\$norm_hap1_unique[i],r_table\$norm_hap2_unique[i])\n";
print OUT1 "  norm_hapmin_all[i] = min(r_table\$norm_hap1_unique[i],r_table\$norm_hap2_unique[i])\n";
print OUT2 "  norm_hapmin_all[i] = min(r_table\$norm_hap1_unique[i],r_table\$norm_hap2_unique[i])\n";
print OUT1 "  tum_hapmax_all[i] = max(r_table\$tum_hap1_unique[i],r_table\$tum_hap2_unique[i])\n";
print OUT2 "  tum_hapmax_all[i] = max(r_table\$tum_hap1_unique[i],r_table\$tum_hap2_unique[i])\n";
print OUT1 "  tum_hapmin_all[i] = min(r_table\$tum_hap1_unique[i],r_table\$tum_hap2_unique[i])\n";
print OUT2 "  tum_hapmin_all[i] = min(r_table\$tum_hap1_unique[i],r_table\$tum_hap2_unique[i])\n";
print OUT1 "  i = i+1\n";
print OUT2 "  i = i+1\n";
print OUT1 "}\n\n";
print OUT2 "}\n\n";
 
print OUT1 "norm_hapmax_all_sum = sum(norm_hapmax_all)\n";
print OUT2 "norm_hapmax_all_sum = sum(norm_hapmax_all)\n";
print OUT1 "norm_hapmin_all_sum = sum(norm_hapmin_all)\n";
print OUT2 "norm_hapmin_all_sum = sum(norm_hapmin_all)\n";
print OUT1 "tum_hapmax_all_sum = sum(tum_hapmax_all)\n";
print OUT2 "tum_hapmax_all_sum = sum(tum_hapmax_all)\n";
print OUT1 "tum_hapmin_all_sum = sum(tum_hapmin_all)\n";
print OUT2 "tum_hapmin_all_sum = sum(tum_hapmin_all)\n";

print OUT1 "norm_hap_all_comb_sum = norm_hapmax_all_sum + norm_hapmin_all_sum\n";
print OUT2 "norm_hap_all_comb_sum = norm_hapmax_all_sum + norm_hapmin_all_sum\n";
print OUT1 "tum_hap_all_comb_sum = tum_hapmax_all_sum + tum_hapmin_all_sum\n";
print OUT2 "tum_hap_all_comb_sum = tum_hapmax_all_sum + tum_hapmin_all_sum\n";

# ------------- average barcodes / snp across genome

print OUT1 "phased_het_total = sum(r_table\$phased_het)\n";
print OUT2 "phased_het_total = sum(r_table\$phased_het)\n";
print OUT1 "norm_hapmax_mean_barcodes_per_snp = norm_hapmax_all_sum / phased_het_total\n";
print OUT2 "norm_hapmax_mean_barcodes_per_snp = norm_hapmax_all_sum / phased_het_total\n";
print OUT1 "norm_hapmin_mean_barcodes_per_snp = norm_hapmin_all_sum / phased_het_total\n";
print OUT2 "norm_hapmin_mean_barcodes_per_snp = norm_hapmin_all_sum / phased_het_total\n";
print OUT1 "tum_hapmax_mean_barcodes_per_snp = tum_hapmax_all_sum / phased_het_total\n";
print OUT2 "tum_hapmax_mean_barcodes_per_snp = tum_hapmax_all_sum / phased_het_total\n";
print OUT1 "tum_hapmin_mean_barcodes_per_snp = tum_hapmin_all_sum / phased_het_total\n";
print OUT2 "tum_hapmin_mean_barcodes_per_snp = tum_hapmin_all_sum / phased_het_total\n";
 
# ------------- end of average barcodes / snp across genome

print OUT1 "tum_vs_norm_cov_all  = norm_hap_all_comb_sum / tum_hap_all_comb_sum \n";
print OUT2 "tum_vs_norm_cov_all  = norm_hap_all_comb_sum / tum_hap_all_comb_sum \n";



print OUT1 "diff_ylim_max = .005\n";
print OUT2 "diff_ylim_max = .005\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "uselen_c$chrom = length(pat_c$chrom" . "_hap\$beg_pos)\n";
print OUT2 "uselen_c$chrom = length(pat_c$chrom" . "_hap\$beg_pos)\n";
print OUT1 "norm_hapmax_c$chrom = numeric(uselen_c$chrom)\n";
print OUT2 "norm_hapmax_c$chrom = numeric(uselen_c$chrom)\n";
print OUT1 "norm_hapmin_c$chrom = numeric(uselen_c$chrom)\n";
print OUT2 "norm_hapmin_c$chrom = numeric(uselen_c$chrom)\n";
print OUT1 "tum_hapmax_c$chrom = numeric(uselen_c$chrom)\n";
print OUT2 "tum_hapmax_c$chrom = numeric(uselen_c$chrom)\n";
print OUT1 "tum_hapmin_c$chrom = numeric(uselen_c$chrom)\n";
print OUT2 "tum_hapmin_c$chrom = numeric(uselen_c$chrom)\n";


print OUT1 "\n";
print OUT2 "\n";
print OUT1 "j = 1\n";
print OUT2 "j = 1\n";
print OUT1 "while (j <= uselen_c$chrom) {\n";
print OUT2 "while (j <= uselen_c$chrom) {\n";
print OUT1 "  norm_hapmax_c$chrom" . "[j] = max(pat_c$chrom" . "_hap\$norm_hap1_unique[j],pat_c$chrom" . "_hap\$norm_hap2_unique[j])\n";
print OUT2 "  norm_hapmax_c$chrom" . "[j] = max(pat_c$chrom" . "_hap\$norm_hap1_unique[j],pat_c$chrom" . "_hap\$norm_hap2_unique[j])\n";
print OUT1 "  norm_hapmin_c$chrom" . "[j] = min(pat_c$chrom" . "_hap\$norm_hap1_unique[j],pat_c$chrom" . "_hap\$norm_hap2_unique[j])\n";
print OUT2 "  norm_hapmin_c$chrom" . "[j] = min(pat_c$chrom" . "_hap\$norm_hap1_unique[j],pat_c$chrom" . "_hap\$norm_hap2_unique[j])\n";
print OUT1 "  tum_hapmax_c$chrom" . "[j] = max(pat_c$chrom" . "_hap\$tum_hap1_unique[j],pat_c$chrom" . "_hap\$tum_hap2_unique[j])\n";
print OUT2 "  tum_hapmax_c$chrom" . "[j] = max(pat_c$chrom" . "_hap\$tum_hap1_unique[j],pat_c$chrom" . "_hap\$tum_hap2_unique[j])\n";
print OUT1 "  tum_hapmin_c$chrom" . "[j] = min(pat_c$chrom" . "_hap\$tum_hap1_unique[j],pat_c$chrom" . "_hap\$tum_hap2_unique[j])\n";
print OUT2 "  tum_hapmin_c$chrom" . "[j] = min(pat_c$chrom" . "_hap\$tum_hap1_unique[j],pat_c$chrom" . "_hap\$tum_hap2_unique[j])\n";
print OUT1 "  j = j+1\n";
print OUT2 "  j = j+1\n";
print OUT1 "}\n";
print OUT2 "}\n";

print OUT1 "use_mean_all = mean(c(norm_hapmax_c$chrom" . ",tum_hapmax_c$chrom" . ")) + 1000\n";
print OUT2 "use_mean_all = mean(c(norm_hapmax_c$chrom" . ",tum_hapmax_c$chrom" . ")) + 1000\n";

print OUT1 "\n";
print OUT2 "\n";
print OUT1 "norm_rat = (norm_hapmax_c$chrom - norm_hapmin_c$chrom)/pat_c$chrom" . "_hap\$phased_het\n";
print OUT2 "norm_rat = (norm_hapmax_c$chrom - norm_hapmin_c$chrom)/pat_c$chrom" . "_hap\$phased_het\n";
print OUT1 "tum_rat = (tum_hapmax_c$chrom - tum_hapmin_c$chrom)/pat_c$chrom" . "_hap\$phased_het\n";
print OUT2 "tum_rat = (tum_hapmax_c$chrom - tum_hapmin_c$chrom)/pat_c$chrom" . "_hap\$phased_het\n";
print OUT1 "gen_ylim = max(c(norm_rat,tum_rat)) * 1.1\n";
print OUT2 "gen_ylim = max(c(norm_rat,tum_rat)) * 1.1\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "use_max = max(norm_hapmax_c$chrom,tum_hapmax_c$chrom) + 1000\n";
print OUT2 "use_max = max(norm_hapmax_c$chrom,tum_hapmax_c$chrom) + 1000\n";
print OUT1 "use_max_rat = max(norm_rat,tum_rat)\n";
print OUT2 "use_max_rat = max(norm_rat,tum_rat)\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "norm_hapmax_c$chrom" . "_sum = sum(norm_hapmax_c$chrom)\n";
print OUT2 "norm_hapmax_c$chrom" . "_sum = sum(norm_hapmax_c$chrom)\n";
print OUT1 "norm_hapmin_c$chrom" . "_sum = sum(norm_hapmin_c$chrom)\n";
print OUT2 "norm_hapmin_c$chrom" . "_sum = sum(norm_hapmin_c$chrom)\n";
print OUT1 "tum_hapmax_c$chrom" . "_sum = sum(tum_hapmax_c$chrom)\n";
print OUT2 "tum_hapmax_c$chrom" . "_sum = sum(tum_hapmax_c$chrom)\n";
print OUT1 "tum_hapmin_c$chrom" . "_sum = sum(tum_hapmin_c$chrom)\n";
print OUT2 "tum_hapmin_c$chrom" . "_sum = sum(tum_hapmin_c$chrom)\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "norm_hap_c$chrom" . "_comb_sum = norm_hapmax_c$chrom" . "_sum + norm_hapmin_c$chrom" . "_sum\n";
print OUT2 "norm_hap_c$chrom" . "_comb_sum = norm_hapmax_c$chrom" . "_sum + norm_hapmin_c$chrom" . "_sum\n";
print OUT1 "tum_hap_c$chrom" . "_comb_sum = tum_hapmax_c$chrom" . "_sum + tum_hapmin_c$chrom" . "_sum\n";
print OUT2 "tum_hap_c$chrom" . "_comb_sum = tum_hapmax_c$chrom" . "_sum + tum_hapmin_c$chrom" . "_sum\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "tum_vs_norm_cov  = norm_hap_c$chrom" . "_comb_sum / tum_hap_c$chrom" . "_comb_sum \n";
print OUT2 "tum_vs_norm_cov  = norm_hap_c$chrom" . "_comb_sum / tum_hap_c$chrom" . "_comb_sum \n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "norm_hapmax_c$chrom" . "_quot = norm_hapmax_c$chrom / pat_c$chrom" . "_hap\$phased_het\n";
print OUT2 "norm_hapmax_c$chrom" . "_quot = norm_hapmax_c$chrom / pat_c$chrom" . "_hap\$phased_het\n";
print OUT1 "norm_hapmin_c$chrom" . "_quot = norm_hapmin_c$chrom / pat_c$chrom" . "_hap\$phased_het\n";
print OUT2 "norm_hapmin_c$chrom" . "_quot = norm_hapmin_c$chrom / pat_c$chrom" . "_hap\$phased_het\n";
print OUT1 "tum_hapmax_c$chrom" . "_quot = tum_hapmax_c$chrom / pat_c$chrom" . "_hap\$phased_het * tum_vs_norm_cov \n";
print OUT2 "tum_hapmax_c$chrom" . "_quot = tum_hapmax_c$chrom / pat_c$chrom" . "_hap\$phased_het * tum_vs_norm_cov \n";
print OUT1 "tum_hapmin_c$chrom" . "_quot = tum_hapmin_c$chrom / pat_c$chrom" . "_hap\$phased_het * tum_vs_norm_cov \n";
print OUT2 "tum_hapmin_c$chrom" . "_quot = tum_hapmin_c$chrom / pat_c$chrom" . "_hap\$phased_het * tum_vs_norm_cov \n";
print OUT1 "mean_quot_gen = mean(c(norm_hapmax_c$chrom" . "_quot,tum_hapmax_c$chrom" . "_quot))\n";
print OUT2 "mean_quot_gen = mean(c(norm_hapmax_c$chrom" . "_quot,tum_hapmax_c$chrom" . "_quot))\n";


print OUT1 "\n";
print OUT2 "\n";
print OUT1 "use_max_quot = max(norm_hapmax_c$chrom" . "_quot,tum_hapmax_c$chrom" . "_quot)\n";
print OUT2 "use_max_quot = max(norm_hapmax_c$chrom" . "_quot,tum_hapmax_c$chrom" . "_quot)\n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "norm_hapmax_c$chrom" . "_wt_mean = norm_hapmax_c$chrom" . "_sum / sum(pat_c$chrom" . "_hap\$phased_het)\n";
print OUT2 "norm_hapmax_c$chrom" . "_wt_mean = norm_hapmax_c$chrom" . "_sum / sum(pat_c$chrom" . "_hap\$phased_het)\n";
print OUT1 "norm_hapmin_c$chrom" . "_wt_mean = norm_hapmin_c$chrom" . "_sum / sum(pat_c$chrom" . "_hap\$phased_het)\n";
print OUT2 "norm_hapmin_c$chrom" . "_wt_mean = norm_hapmin_c$chrom" . "_sum / sum(pat_c$chrom" . "_hap\$phased_het)\n";
print OUT1 "tum_hapmax_c$chrom" . "_wt_mean = tum_hapmax_c$chrom" . "_sum / sum(pat_c$chrom" . "_hap\$phased_het) * tum_vs_norm_cov \n";
print OUT2 "tum_hapmin_c$chrom" . "_wt_mean = tum_hapmin_c$chrom" . "_sum / sum(pat_c$chrom" . "_hap\$phased_het) * tum_vs_norm_cov \n";
print OUT1 "\n";
print OUT2 "\n";
print OUT1 "\n";
print OUT2 "\n";

# density plot for quotients
print OUT1 "dnames = rep(c(\"normal major hap.\", \"normal minor hap.\", \"tumor major hap.\", \"tumor minor hap.\"), each = uselen_c$chrom" . ")\n";
print OUT2 "dnames = rep(c(\"normal major hap.\", \"normal minor hap.\", \"tumor major hap.\", \"tumor minor hap.\"), each = uselen_c$chrom" . ")\n";

# now break up by arms 

if ($there_is_a_p_arm) {

print OUT1 "#-------------------  P-arm   --------------------------------#\n\n";


print OUT1 "#------------------- HAPLOTYPES P-arm QUOTIENT   --------------------------------#\n\n";

# first calculate the regions to use

print OUT1 "pat_c$chrom" . "p_hap = subset(pat_c$chrom" . "_hap,pat_c$chrom" . "_hap\$beg_pos < p_end[$chrom]) \n";
print OUT1 "uselen_c$chrom" . "p = length(pat_c$chrom" . "p_hap\$beg_pos)\n";


print OUT1 "norm_hapmax_c$chrom" . "p = norm_hapmax_c$chrom" . "[1:uselen_c$chrom" . "p]\n";
print OUT1 "norm_hapmin_c$chrom" . "p = norm_hapmin_c$chrom" . "[1:uselen_c$chrom" . "p]\n";
print OUT1 "tum_hapmax_c$chrom" . "p = tum_hapmax_c$chrom" . "[1:uselen_c$chrom" . "p]\n";
print OUT1 "tum_hapmin_c$chrom" . "p = tum_hapmin_c$chrom" . "[1:uselen_c$chrom" . "p]\n";
print OUT1 "use_mean_p = mean(c(norm_hapmax_c$chrom" . "p,tum_hapmax_c$chrom" . "p)) \n";
print OUT1 "use_max_p = max(norm_hapmax_c$chrom" . "p,tum_hapmax_c$chrom" . "p) + 1000\n";

print OUT1 "normp_rat = (norm_hapmax_c$chrom" . "p - norm_hapmin_c$chrom" . "p)/pat_c$chrom" . "p_hap\$phased_het\n";
print OUT1 "tump_rat = (tum_hapmax_c$chrom" . "p - tum_hapmin_c$chrom" . "p)/pat_c$chrom" . "p_hap\$phased_het\n";
print OUT1 "p_ylim = max(c(normp_rat,tump_rat)) * 1.1\n";
print OUT1 "use_max_rat_p = max(normp_rat,tump_rat)\n";

print OUT1 "\n";
print OUT1 "norm_hapmax_c$chrom" . "p_sum = sum(norm_hapmax_c$chrom" . "p)\n";
print OUT1 "norm_hapmin_c$chrom" . "p_sum = sum(norm_hapmin_c$chrom" . "p)\n";
print OUT1 "tum_hapmax_c$chrom" . "p_sum = sum(tum_hapmax_c$chrom" . "p)\n";
print OUT1 "tum_hapmin_c$chrom" . "p_sum = sum(tum_hapmin_c$chrom" . "p)\n";
print OUT1 "\n";
print OUT1 "norm_hap_c$chrom" . "p_comb_sum = norm_hapmax_c$chrom" . "p_sum + norm_hapmin_c$chrom" . "p_sum\n";
print OUT1 "tum_hap_c$chrom" . "p_comb_sum = tum_hapmax_c$chrom" . "p_sum + tum_hapmin_c$chrom" . "p_sum\n";
print OUT1 "\n";
print OUT1 "tum_vs_norm_cov_p = norm_hap_c$chrom" . "p_comb_sum /tum_hap_c$chrom" . "p_comb_sum  \n";
print OUT1 "\n";
print OUT1 "norm_hapmax_c$chrom" . "p_quot =  norm_hapmax_c$chrom" . "p / pat_c$chrom" . "p_hap\$phased_het\n";
print OUT1 "norm_hapmin_c$chrom" . "p_quot =  norm_hapmin_c$chrom" . "p / pat_c$chrom" . "p_hap\$phased_het \n";
print OUT1 "tum_hapmax_c$chrom" . "p_quot =  tum_hapmax_c$chrom" . "p / pat_c$chrom" . "p_hap\$phased_het * tum_vs_norm_cov_p\n";
print OUT1 "tum_hapmin_c$chrom" . "p_quot =  tum_hapmin_c$chrom" . "p / pat_c$chrom" . "p_hap\$phased_het * tum_vs_norm_cov_p\n";
print OUT1 "mean_quot_p = mean(c(norm_hapmax_c$chrom" . "p_quot,tum_hapmax_c$chrom" . "p_quot))\n";
print OUT1 "use_max_quot_p = max(norm_hapmax_c$chrom" . "p_quot,tum_hapmax_c$chrom" . "p_quot)\n";

print OUT1 "\n";


#-------------- average barcodes / snp on this chr

print OUT1 "c$chrom" . "p_phased_het_total = sum(pat_c$chrom" . "p_hap\$phased_het)\n";
print OUT1 "c$chrom" . "p_norm_hapmax_mean_barcodes_per_snp = norm_hapmax_c$chrom" . "p_sum / c$chrom" . "p_phased_het_total\n";
print OUT1 "c$chrom" . "p_norm_hapmin_mean_barcodes_per_snp = norm_hapmin_c$chrom" . "p_sum / c$chrom" . "p_phased_het_total\n";
print OUT1 "c$chrom" . "p_tum_hapmax_mean_barcodes_per_snp = tum_hapmax_c$chrom" . "p_sum / c$chrom" . "p_phased_het_total\n";
print OUT1 "c$chrom" . "p_tum_hapmin_mean_barcodes_per_snp = tum_hapmin_c$chrom" . "p_sum / c$chrom" . "p_phased_het_total\n";

#-------------- end of average barcodes / snp on this chr


print OUT1 "#------------------------------ P-ARM PICTURES --------------#\n";

print OUT1 "#Normal\n";
print OUT1 "dfp = data.frame(pat_c$chrom" ."p_hap\$beg_pos,norm_hapmax_c$chrom" ."p_quot,pat_c$chrom" ."p_hap\$end_pos,norm_hapmax_c$chrom" ."p_quot)\n";
print OUT1 "g1p = ggplot(dfp, aes(`chr $chrom" ."p position`,`total barcodes by haplotype/snv density`), show.legend=NA) + geom_segment(aes(x=pat_c$chrom" ."p_hap.beg_pos,y=norm_hapmax_c$chrom" ."p_quot,xend=pat_c$chrom" ."p_hap.end_pos,yend=norm_hapmax_c$chrom" ."p_quot), size=1, colour = cbbPalette[3]) + xlim(c(";
if ($chrom == 21) {
  print OUT1 "9411191";
} else {
  print OUT1 "0";
}
print OUT1 ",p_end[$chrom])) + ylim(c(0,arb_height_p)) + theme_bw() + theme(panel.grid=element_blank())\n";
print OUT1 "g2p = g1p + xlab(\"\") + ylab(\"\") + geom_segment(aes(x=pat_c$chrom" ."p_hap.beg_pos,y=norm_hapmin_c$chrom" ."p_quot,xend=pat_c$chrom" ."p_hap.end_pos,yend=norm_hapmin_c$chrom" ."p_quot), size=1, colour=cbbPalette[6])\n";
print OUT1 "g3pn = g2p + theme(legend.position=\"none\") + theme_classic()\n\n";


# Tumor
print OUT1 "#Tumor\n";
print OUT1 "dfp = data.frame(pat_c$chrom" ."p_hap\$beg_pos,tum_hapmax_c$chrom" ."p_quot,pat_c$chrom" ."p_hap\$end_pos,tum_hapmax_c$chrom" ."p_quot)\n";
print OUT1 "g1p = ggplot(dfp, show.legend=NA) + xlab(\"chr" . $chrom . " pos\") + ylab(\"\") + geom_segment(aes(x=pat_c$chrom" ."p_hap.beg_pos,y=tum_hapmax_c$chrom" ."p_quot,xend=pat_c$chrom" ."p_hap.end_pos,yend=tum_hapmax_c$chrom" ."p_quot), size=1, colour = cbbPalette[1]) + xlim(c(";
if ($chrom == 21) {
  print OUT1 "9411191";
} else {
  print OUT1 "0";
}
print OUT1 ",p_end[$chrom])) + ylim(c(0,arb_height)) + theme_bw() + theme(panel.grid=element_blank())\n";
print OUT1 "g2p = g1p + xlab(\"\") + ylab(\"\") + geom_segment(aes(x=pat_c$chrom" ."p_hap.beg_pos,y=tum_hapmin_c$chrom" ."p_quot,xend=pat_c$chrom" ."p_hap.end_pos,yend=tum_hapmin_c$chrom" ."p_quot), size=1, colour=cbbPalette[8])\n";
print OUT1 "g3pt = g2p  + theme(legend.position=\"none\") + theme_classic()\n\n";


# density plot for quotients
print OUT1 "#--------- Density Plots\n";

print OUT1 "dnames = rep(c(\"Major hap.\", \"minor hap.\", \"Major hap.\", \"minor hap.\"), each = uselen_c$chrom" . "p)\n";

# Normal haplotype quotients
print OUT1 "# Normal\n";
print OUT1 "haps_N <- data.frame(dens = c(norm_hapmax_c$chrom" . "p_quot,norm_hapmin_c$chrom" . "p_quot), Normal = dnames)\n";
print OUT1 "haps_N_plot <- ggplot(haps_N, aes(x = dens, fill = Normal),colour=Normal) + xlab(\"\") + geom_density(alpha = .6) + scale_fill_manual(values=c(cbbPalette[3],cbbPalette[6])) + xlim(c(0,arb_height_p)) + ylim(c(0,density_lim)) + theme_bw() + theme(panel.grid=element_blank()) + ylab(\"\")\n";
print OUT1 "hap2pn <- last_plot() + xlab(\"\") + ylab(\"\") + coord_flip() + theme_classic() + theme(legend.position=c(.7,.8), legend.text = element_text(size=10), legend.title = element_text(size=10, face=\"bold\"))\n";
print OUT1 "\n";

# Tumor haplotype quotients
print OUT1 "# Tumor\n";
print OUT1 "haps_T <- data.frame(dens = c(tum_hapmax_c$chrom" . "p_quot,tum_hapmin_c$chrom" . "p_quot), Malignant = dnames)\n";
print OUT1 "haps_T_plot <- ggplot(haps_T, aes(x = dens, fill = Malignant),colour=Malignant) +  xlab(\"\") + geom_density(alpha = .6) + scale_fill_manual(values=c(cbbPalette[1],cbbPalette[8])) + xlim(c(0,arb_height_p)) + ylim(c(0,density_lim)) + theme_bw() + theme(panel.grid=element_blank())\n";
print OUT1 "hap2pt <- last_plot() + coord_flip() + theme_classic() + theme(legend.position=c(.7,.8), legend.text = element_text(size=10), legend.title = element_text(size=10, face=\"bold\"))\n";
print OUT1 "\n";

print OUT1 "\n#------------------- END OF HAPLOTYPE P-arm QUOTIENTS --------------------------------\n\n";

print OUT1 "#-------------------  P-arm HAPLOTYPES RATIO  --------------------------------#\n\n";

print OUT1 "#P Ratios\n";
print OUT1 "dfp = data.frame(pat_c$chrom" ."p_hap\$beg_pos,normp_rat,pat_c$chrom" ."p_hap\$end_pos,tump_rat)\n";
print OUT1 "g1p = ggplot(dfp, aes(`chr $chrom" ."p position`,`total barcodes by haplotype/snv density`), show.legend=NA) + geom_segment(aes(x=pat_c$chrom" ."p_hap.beg_pos,y=normp_rat,xend=pat_c$chrom" ."p_hap.end_pos,yend=normp_rat), size=1, colour = medianPalette[1]) + xlim(c(";
if ($chrom == 21) {
  print OUT1 "9411191";
} else {
  print OUT1 "0";
}
print OUT1 ",p_end[$chrom])) + ylim(c(-.0001,use_max_rat)) + theme_bw() + theme(panel.grid=element_blank())\n";
print OUT1 "g2p = g1p + geom_segment(aes(x=pat_c$chrom" ."p_hap.beg_pos,y=tump_rat,xend=pat_c$chrom" ."p_hap.end_pos,yend=tump_rat), size=1, colour=medianPalette[3])\n";
print OUT1 "g3pr = g2p  + theme(legend.position=\"none\") + theme_classic()\n\n";

#print OUT1 "paintCytobands($chrom" . ",pos=c(0,use_max_rat_p),units = c(\"bases\"), width = use_max_rat_p*.0625, bands=\"major\", orientation = c(\"h\"), legend=T)\n";

print OUT1 "dnames = rep(c(\"1 norm\", \"2 malig\"), each = uselen_c$chrom" . "p)\n";
print OUT1 "haps <- data.frame(dens = c(normp_rat, tump_rat), group = dnames)\n";
print OUT1 "haps_plot <- ggplot(haps, aes(x = dens, fill = group),colour=dnames) +  geom_density(alpha = .6) + xlab(\"normalized barcode count difference\")  + scale_fill_manual(values=c(medianPalette[1],medianPalette[3])) + xlim(c(0,use_max_rat_p)) + theme_bw() + theme(panel.grid=element_blank())\n";
print OUT1 "hap2pr <- last_plot() + coord_flip() + theme_classic() + theme(legend.position=c(.8,.8), legend.text = element_text(size=10), legend.title = element_text(size=10, face=\"bold\"))\n";
print OUT1 "\n";

# combined plot
print OUT1 "png(\"$patient" . "_chr$chrom" . "p_NT_barcode_blocks_quotients.$phets" . "h.png\")\n";
print OUT1 "grid.arrange(g3pn, hap2pn, g3pt, hap2pt, g3pr, hap2pr, nrow=3,ncol=2, heights=c(0.3,0.3,0.3))\n";
print OUT1 "dev.off()\n";
print OUT1 "\n";

print OUT1 "#------------------- END OF P-arm HAPLOTYPES RATIO  --------------------------------#\n\n";

print OUT1 "#------------------- HAPLOTYPES END OF P-arm QUOTIENT   --------------------------------#\n\n";

} # end of if there is a p-arm

print OUT2 "#------------------- HAPLOTYPES Q-arm  --------------------------------#\n\n";

print OUT2 "pat_c$chrom" . "q_hap = subset(pat_c$chrom" . "_hap,pat_c$chrom" . "_hap\$beg_pos > q_beg[$chrom" . "])\n";
print OUT2 "uselen_c$chrom" . "q = length(pat_c$chrom" . "q_hap\$beg_pos)\n";
print OUT2 "beg_c$chrom" . "q = uselen_c$chrom" . " - uselen_c$chrom" . "q + 1\n";

print OUT2 "norm_hapmax_c$chrom" . "q = norm_hapmax_c$chrom" . "[beg_c$chrom" . "q:uselen_c$chrom" . "]\n";
print OUT2 "norm_hapmin_c$chrom" . "q = norm_hapmin_c$chrom" . "[beg_c$chrom" . "q:uselen_c$chrom" . "]\n";
print OUT2 "tum_hapmax_c$chrom" . "q = tum_hapmax_c$chrom" . "[beg_c$chrom" . "q:uselen_c$chrom" . "]\n";
print OUT2 "tum_hapmin_c$chrom" . "q = tum_hapmin_c$chrom" . "[beg_c$chrom" . "q:uselen_c$chrom" . "]\n";
print OUT2 "use_mean_q = mean(c(norm_hapmax_c$chrom" . "q,tum_hapmax_c$chrom" . "q)) + 1000\n";
print OUT2 "use_max_q = max(norm_hapmax_c$chrom" . "q,tum_hapmax_c$chrom" . "q) + 1000\n";

print OUT2 "\n";
print OUT2 "\n";
print OUT2 "normq_rat = (norm_hapmax_c$chrom" . "q - norm_hapmin_c$chrom" . "q)/pat_c$chrom" . "q_hap\$phased_het\n";
print OUT2 "tumq_rat = (tum_hapmax_c$chrom" . "q - tum_hapmin_c$chrom" . "q)/pat_c$chrom" . "q_hap\$phased_het\n";
print OUT2 "q_ylim = max(c(normq_rat,tumq_rat)) * 1.1\n";
print OUT2 "use_max_rat_q = max(normq_rat,tumq_rat)\n";

print OUT2 "\n";
print OUT2 "norm_hapmax_c$chrom" . "q_sum = sum(norm_hapmax_c$chrom" . "q)\n";
print OUT2 "norm_hapmin_c$chrom" . "q_sum = sum(norm_hapmin_c$chrom" . "q)\n";
print OUT2 "tum_hapmax_c$chrom" . "q_sum = sum(tum_hapmax_c$chrom" . "q)\n";
print OUT2 "tum_hapmin_c$chrom" . "q_sum = sum(tum_hapmin_c$chrom" . "q)\n";
print OUT2 "\n";
print OUT2 "norm_hap_c$chrom" . "q_comb_sum = norm_hapmax_c$chrom" . "q_sum + norm_hapmin_c$chrom" . "q_sum\n";
print OUT2 "tum_hap_c$chrom" . "q_comb_sum = tum_hapmax_c$chrom" . "q_sum + tum_hapmin_c$chrom" . "q_sum\n";
print OUT2 "\n";
print OUT2 "tum_vs_norm_cov_q  = norm_hap_c$chrom" . "q_comb_sum / tum_hap_c$chrom" . "q_comb_sum\n";
print OUT2 "\n";
print OUT2 "norm_hapmax_c$chrom" . "q_quot =  norm_hapmax_c$chrom" . "q / pat_c$chrom" . "q_hap\$phased_het\n";
print OUT2 "norm_hapmin_c$chrom" . "q_quot =  norm_hapmin_c$chrom" . "q / pat_c$chrom" . "q_hap\$phased_het \n";
print OUT2 "tum_hapmax_c$chrom" . "q_quot =  tum_hapmax_c$chrom" . "q / pat_c$chrom" . "q_hap\$phased_het * tum_vs_norm_cov_q\n";
print OUT2 "tum_hapmin_c$chrom" . "q_quot =  tum_hapmin_c$chrom" . "q / pat_c$chrom" . "q_hap\$phased_het * tum_vs_norm_cov_q\n";
print OUT2 "use_max_quot_q = max(norm_hapmax_c$chrom" . "q_quot,tum_hapmax_c$chrom" . "q_quot)\n";
print OUT2 "mean_quot_q = mean(c(norm_hapmax_c$chrom" . "q_quot,tum_hapmax_c$chrom" . "q_quot))\n";

print OUT2 "\n";

#-------------- average barcodes / snp on this chr 
 
print OUT2 "c$chrom" . "q_phased_het_total = sum(pat_c$chrom" . "q_hap\$phased_het)\n";
print OUT2 "c$chrom" . "q_norm_hapmax_mean_barcodes_per_snp = norm_hapmax_c$chrom" . "q_sum / c$chrom" . "q_phased_het_total\n";
print OUT2 "c$chrom" . "q_norm_hapmin_mean_barcodes_per_snp = norm_hapmin_c$chrom" . "q_sum / c$chrom" . "q_phased_het_total\n";
print OUT2 "c$chrom" . "q_tum_hapmax_mean_barcodes_per_snp = tum_hapmax_c$chrom" . "q_sum / c$chrom" . "q_phased_het_total\n";
print OUT2 "c$chrom" . "q_tum_hapmin_mean_barcodes_per_snp = tum_hapmin_c$chrom" . "q_sum / c$chrom" . "q_phased_het_total\n";

#-------------- end of average barcodes / snp on this chr



print OUT2 "#------------------- HAPLOTYPES Q-arm QUOTIENT   --------------------------------#\n\n";

print OUT2 "#--------------------------- Q-arm FIGURES ----------------------#\n";

# Normal
print OUT2 "#Normal\n";
print OUT2 "dfq = data.frame(pat_c$chrom" ."q_hap\$beg_pos,norm_hapmax_c$chrom" ."q_quot,pat_c$chrom" ."q_hap\$end_pos,norm_hapmax_c$chrom" ."q_quot)\n";
print OUT2 "g1q = ggplot(dfq, aes(`chr $chrom" ."q position`,`total barcodes by haplotype/snv density`), show.legend=NA) + geom_segment(aes(x=pat_c$chrom" ."q_hap.beg_pos,y=norm_hapmax_c$chrom" ."q_quot,xend=pat_c$chrom" ."q_hap.end_pos,yend=norm_hapmax_c$chrom" ."q_quot), size=1, colour = cbbPalette[3]) + xlim(c(q_beg[$chrom" . "],chr_size[$chrom" . "])) + ylim(c(0,arb_height_p)) + theme_bw() + theme(panel.grid=element_blank())\n";
print OUT2 "g2q = g1q + xlab(\"\") + ylab(\"\") + geom_segment(aes(x=pat_c$chrom" ."q_hap.beg_pos,y=norm_hapmin_c$chrom" ."q_quot,xend=pat_c$chrom" ."q_hap.end_pos,yend=norm_hapmin_c$chrom" ."q_quot), size=1, colour=cbbPalette[6])\n";
print OUT2 "g3qn = g2q + theme(legend.position=\"none\") + theme_classic()\n\n";

# Tumor
print OUT2 "#Tumor\n";
print OUT2 "dfq = data.frame(pat_c$chrom" ."q_hap\$beg_pos,tum_hapmax_c$chrom" ."q_quot,pat_c$chrom" ."q_hap\$end_pos,tum_hapmax_c$chrom" ."q_quot)\n";
print OUT2 "g1q = ggplot(dfq, aes(`chr $chrom" ."q position`,`total barcodes by haplotype/snv density`), show.legend=NA) + geom_segment(aes(x=pat_c$chrom" ."q_hap.beg_pos,y=tum_hapmax_c$chrom" ."q_quot,xend=pat_c$chrom" ."q_hap.end_pos,yend=tum_hapmax_c$chrom" ."q_quot), size=1, colour = cbbPalette[1]) + xlim(c(q_beg[$chrom" . "],chr_size[$chrom" . "])) + ylim(c(0,arb_height_p)) + theme_bw() + theme(panel.grid=element_blank())\n";
print OUT2 "g2q = g1q + xlab(\"\") + ylab(\"\") + geom_segment(aes(x=pat_c$chrom" ."q_hap.beg_pos,y=tum_hapmin_c$chrom" ."q_quot,xend=pat_c$chrom" ."q_hap.end_pos,yend=tum_hapmin_c$chrom" ."q_quot), size=1, colour=cbbPalette[8])\n";
print OUT2 "g3qt = g2q + theme(legend.position=\"none\") + theme_classic()\n\n";


# density plot for quotients

# Normal
print OUT2 "#Normal\n";
print OUT2 "dnames = rep(c(\"Major hap.\", \"minor hap.\"), each = uselen_c$chrom" . "q)\n";
print OUT2 "haps <- data.frame(dens = c(norm_hapmax_c$chrom" . "q_quot,norm_hapmin_c$chrom" . "q_quot), Normal = dnames)\n";
print OUT2 "haps_plot <- ggplot(haps, aes(x = dens, fill = Normal),colour=Normal) +  geom_density(alpha = .6) + xlab(\"\") + scale_fill_manual(values=c(cbbPalette[3],cbbPalette[6])) + xlim(c(0,arb_height)) + theme_bw() + theme(panel.grid=element_blank()) + ylim(c(0,density_lim))\n";
print OUT2 "hap2qn <- last_plot() + xlab(\"\") + ylab(\"\") + coord_flip() + theme_classic() + theme(legend.position=c(.8,.8), legend.text = element_text(size=10), legend.title = element_text(size=10, face=\"bold\"))\n";
print OUT2 "\n";


# Tumor
print OUT2 "#Tumor\n";
print OUT2 "dnames = rep(c(\"Major hap.\", \"minor hap.\"), each = uselen_c$chrom" . "q)\n";
print OUT2 "haps_T <- data.frame(dens = c(tum_hapmax_c$chrom" . "q_quot,tum_hapmin_c$chrom" . "q_quot), Malignant = dnames)\n";
print OUT2 "haps_T_plot <- ggplot(haps_T, aes(x = dens, fill = Malignant),colour=Malignant) + geom_density(alpha = .6) + xlab(\"\") + ylab(\"\") + scale_fill_manual(values=c(cbbPalette[1],cbbPalette[8])) + xlim(c(0,arb_height)) + theme_bw() + theme(panel.grid=element_blank()) + ylim(c(0,density_lim))\n";
print OUT2 "hap2qt <- last_plot() + xlab(\"\") + coord_flip() + theme_classic() + theme(legend.position=c(.8,.8), legend.text = element_text(size=10), legend.title = element_text(size=10, face=\"bold\"))\n";
print OUT2 "\n";

#--------- end of density plots
print OUT2 "#----------- End of Density pieces -------------#\n";


print OUT2 "\n#------------------- END OF Q-arm HAPLOTYPE QUOTIENTS --------------------------------\n\n";

print OUT2 "#------------------- Q-arm HAPLOTYPES RATIO  -------------------------------#\n\n";

print OUT2 "#Q Ratios\n";
print OUT2 "dfq = data.frame(pat_c$chrom" ."q_hap\$beg_pos,normq_rat,pat_c$chrom" ."q_hap\$end_pos,tumq_rat)\n";
print OUT2 "g1q = ggplot(dfq, aes(`chr $chrom" ."q position`,`total barcodes by haplotype/snv density`), show.legend=NA) + geom_segment(aes(x=pat_c$chrom" ."q_hap.beg_pos,y=normq_rat,xend=pat_c$chrom" ."q_hap.end_pos,yend=normq_rat), size=1, colour = medianPalette[1]) + xlim(c(q_beg[$chrom" . "],chr_size[$chrom" . "])) + ylim(c(-.0001,use_max_rat_q)) + xlab(\"\") + theme_bw() + theme(panel.grid=element_blank())\n";
print OUT2 "g2q = g1q + geom_segment(aes(x=pat_c$chrom" ."q_hap.beg_pos,y=tumq_rat,xend=pat_c$chrom" ."q_hap.end_pos,yend=tumq_rat), size=1, colour=medianPalette[3])\n";
print OUT2 "g3qr = g2q  + theme(legend.position=\"none\") + theme_classic()\n\n";


print OUT2 "dnames = rep(c(\"1 norm\", \"2 malig\"), each = uselen_c$chrom" . "q)\n";
print OUT2 "haps <- data.frame(dens = c(normq_rat, tumq_rat), group = dnames)\n";
print OUT2 "haps_plot <- ggplot(haps, aes(x = dens, fill = group),colour=dnames) +  geom_density(alpha = .6) + xlab(\"\") + scale_fill_manual(values=c(medianPalette[1],medianPalette[3])) + xlim(c(0,use_max_rat_q)) + theme_bw() + theme(panel.grid=element_blank())\n";
print OUT2 "hap2qr <- last_plot() + coord_flip() + theme_classic() + theme(legend.position=c(.8,.8), legend.text = element_text(size=10), legend.title = element_text(size=10, face=\"bold\"))\n";
print OUT2 "\n";

# combined plot
print OUT2 "png(\"$patient" . "_chr$chrom" . "q_NT_barcode_blocks_quotients.$phets" . "h.png\")\n";
print OUT2 "grid.arrange(g3qn, hap2qn, g3qt, hap2qt, g3qr, hap2qr, nrow=3,ncol=2, heights=c(0.3,0.3,0.3))\n";
print OUT2 "dev.off()\n";
print OUT2 "\n";

print OUT2 "#------------------- END OF Q-arm HAPLOTYPES RATIO  --------------------------------#\n\n";

print OUT2 "#--------------------------- END OF Q-arm FIGURES ----------------------#\n";

print OUT2 "#--------------------------- END OF Q-arm  ----------------------#\n";

print OUT1 "\n#------------------------ t-TEST ---------------------------#\n\n";
print OUT2 "\n#------------------------ t-TEST ---------------------------#\n\n";

if ($there_is_a_p_arm) {
print OUT1 "\n";
print OUT1 "t.test(tump_rat,normp_rat,alternative=\"greater\")\n";
}

print OUT2 "\n";
print OUT2 "t.test(tumq_rat,normq_rat,alternative=\"greater\")\n";


print OUT1 "\n#------------------------ threshold_info ---------------------------#\n\n";
print OUT2 "\n#------------------------ threshold_info ---------------------------#\n\n";

if ($there_is_a_p_arm) {
print OUT1 "# thresholds\n";
print OUT1 "normp_rat_test = mean(normp_rat) + sd_multiplier * sd(normp_rat)\n";
print OUT1 "# loop through ratios, printing all and adding a marker for whether above threshold or not\n";
print OUT1 "tump_thresh_info = numeric(length(pat_c$chrom" . "p_hap\$beg_pos))\n";

print OUT1 "k = 1\n";
print OUT1 "p_threshold_loop_len = length(tump_rat)\n";
print OUT1 "while (k <= p_threshold_loop_len) {\n";
print OUT1 "  if (tump_rat[k] > normp_rat_test) {\n";
print OUT1 "      tump_thresh_info[k] = 1\n";
print OUT1 "  }\n";
print OUT1 "  k = k + 1\n";
print OUT1 "}\n\n";

print OUT1 "# p-arm\n";
print OUT1 "write.table(data.frame(pat_c$chrom" . "p_hap\$chr,pat_c$chrom" . "p_hap\$beg_pos,pat_c$chrom" . "p_hap\$end_pos,tump_thresh_info),\"$patient" . "_chr$chrom" . "p_thresh_info_tum.txt\")\n";

print OUT1 "# try normalizing by TOTAL of all -- not factors\n";
print OUT1 "tump_hapmax_mean_norm = mean(tum_hapmax_c$chrom" . "p_quot) * tum_vs_norm_cov_all\n";
print OUT1 "tump_hapmin_mean_norm = mean(tum_hapmin_c$chrom" . "p_quot) * tum_vs_norm_cov_all\n";
print OUT1 "tump_mean_sum = tump_hapmax_mean_norm +  tump_hapmin_mean_norm\n\n";
 
print OUT1 "print(\"these are the mean values for calculating barcode-based CNVs:  norm max - norm min - tum1 max - tum1 min - tum2 max - tum2 min\")\n";
print OUT1 "print(mean(norm_hapmax_c$chrom" . "p_quot))\n";
print OUT1 "print(mean(norm_hapmin_c$chrom" . "p_quot))\n";
print OUT1 "print(tump_hapmax_mean_norm)\n";
print OUT1 "print(tump_hapmin_mean_norm)\n\n";

print OUT1 "print(\"these are the ratios of each non-normal to the total:\")\n";
print OUT1 "print(tump_hapmax_mean_norm/tump_mean_sum)\n";
print OUT1 "print(tump_hapmin_mean_norm/tump_mean_sum)\n";

}



print OUT2 "# thresholds\n";
print OUT2 "normq_rat_test = mean(normq_rat) + sd_multiplier * sd(normq_rat)\n";
print OUT2 "# loop through ratios, printing all and adding a marker for whether above threshold or not\n";
print OUT2 "tumq_thresh_info = numeric(length(pat_c$chrom" . "q_hap\$beg_pos))\n";

print OUT2 "k = 1\n";
print OUT2 "q_threshold_loop_len = length(tumq_rat)\n";
print OUT2 "while (k <= q_threshold_loop_len) {\n";
print OUT2 "  if (tumq_rat[k] > normq_rat_test) {\n";
print OUT2 "      tumq_thresh_info[k] = 1\n";
print OUT2 "  }\n";
print OUT2 "  k = k + 1\n";
print OUT2 "}\n";

print OUT2 "# q-arm\n";
print OUT2 "write.table(data.frame(pat_c$chrom" . "q_hap\$chr,pat_c$chrom" . "q_hap\$beg_pos,pat_c$chrom" . "q_hap\$end_pos,tumq_thresh_info),\"$patient" . "_chr$chrom" . "q_thresh_info_tum.txt\")\n";

print OUT2 "# try normalizing by TOTAL of all -- not factors\n";
print OUT2 "tumq_hapmax_mean_norm = mean(tum_hapmax_c$chrom" . "q_quot) * tum_vs_norm_cov_all \n";
print OUT2 "tumq_hapmin_mean_norm = mean(tum_hapmin_c$chrom" . "q_quot) * tum_vs_norm_cov_all \n";
print OUT2 "tumq_mean_sum = tumq_hapmax_mean_norm +  tumq_hapmin_mean_norm\n\n";

print OUT2 "print(\"these are the mean values for calculating barcode-based CNVs:  norm max - norm min - tum max - tum min\")\n";
print OUT2 "print(mean(norm_hapmax_c$chrom" . "q_quot))\n";
print OUT2 "print(mean(norm_hapmin_c$chrom" . "q_quot))\n";
print OUT2 "print(tumq_hapmax_mean_norm)\n";
print OUT2 "print(tumq_hapmin_mean_norm)\n\n";

print OUT2 "print(\"these are the ratios of each non-normal to the total:\")\n";
print OUT2 "print(tumq_hapmax_mean_norm/tumq_mean_sum)\n";
print OUT2 "print(tumq_hapmin_mean_norm/tumq_mean_sum)\n";


print OUT1 "#---------------- count -- chr ave vs toatal ave --------#\n\n";
print OUT2 "#---------------- count -- chr ave vs toatal ave --------#\n\n";

#-------------- barcode CNVs

if ($there_is_a_p_arm) {
print OUT1 "print(phased_het_total)\n";
print OUT1 "print(c$chrom" . "p_phased_het_total)\n";
print OUT1 "print(c$chrom" . "p_norm_hapmax_mean_barcodes_per_snp / norm_hapmax_mean_barcodes_per_snp)\n";
print OUT1 "print(c$chrom" . "p_norm_hapmin_mean_barcodes_per_snp / norm_hapmin_mean_barcodes_per_snp)\n";
print OUT1 "print(c$chrom" . "p_tum_hapmax_mean_barcodes_per_snp / tum_hapmax_mean_barcodes_per_snp)\n";
print OUT1 "print(c$chrom" . "p_tum_hapmin_mean_barcodes_per_snp / tum_hapmin_mean_barcodes_per_snp)\n";

}


print OUT2 "print(phased_het_total)\n";
print OUT2 "print(c$chrom" . "q_phased_het_total)\n";
print OUT2 "print(c$chrom" . "q_norm_hapmax_mean_barcodes_per_snp / norm_hapmax_mean_barcodes_per_snp)\n";
print OUT2 "print(c$chrom" . "q_norm_hapmin_mean_barcodes_per_snp / norm_hapmin_mean_barcodes_per_snp)\n";
print OUT2 "print(c$chrom" . "q_tum_hapmax_mean_barcodes_per_snp / tum_hapmax_mean_barcodes_per_snp)\n";
print OUT2 "print(c$chrom" . "q_tum_hapmin_mean_barcodes_per_snp / tum_hapmin_mean_barcodes_per_snp)\n\n";


print OUT1 "#---------------- END of count -- chr ave vs toatal ave --------#\n\n";
print OUT2 "#---------------- END of count -- chr ave vs toatal ave --------#\n\n";

print OUT1 "quit()\n";
print OUT2 "quit()\n";

close(OUT1);
close(OUT2);
