#!/usr/bin/env python3

###############################
#
# Copyright Stanford University 2018
# Author: John Bell
#
# This script converts a ttest file into a bed file for chrom arms
#  It assumes build 38
# 
# INPUT: a ttest file (output of filter_t_test_vals.pl)
# OUTPUT:  a bed file 
# 
###############################

#------------------------------------- 	PROLOGOMENA --------------------------------#

# this script converts ttest list to bed file, where the interval is the whole arm
# 
# input:
# 1       p       2.999e-09
# 
# output:
# 1	0	121502139	2.999e-09
#
# but this means that the script is genome build-dependent

#------------------------------------- 	END OF PROLOGOMENA --------------------------------#

#------------------------------------- I/O & SETUP --------------------------------#

import sys,re

try:
   ttestfile = open(sys.argv[1],'r')
   ttlines = ttestfile.read().split("\n")
except IndexError:
   sys.exit('need t-test file')

try:
   buildval = sys.argv[2]
except IndexError:
   buildval = '38'


#-----------------------------END OF I/O --------------------------------#

#--------------------------- REFERENCE DEFINITIONS -----------------------#

# test t-test
max_ttest = .001

# chrom info

if buildval == '37':
   chr_size = (249250621,243199373,198022430,191154276,180915260,171115067,159138663,146364022,141213431,135534747,135006516,133851895,115169878,107349540,102531392,90354753,81195210,78077248,59128983,63025520,48129895,51304566,155270560,59034049)
   p_end = (121502139,90545103,90504858,49338939,46405642,58780170,58054335,43838889,47317680,39154938,51594203,34856698,0,0,0,35285802,22263008,15410901,24631783,26319573,0,0,58582018,0)
   q_beg = (142535154,95326168,93504852,52660118,49405641,61880162,61054321,46838887,65467681,42354786,54694243,37856695,19019998,19000001,20000000,46369840,25263004,18510898,27731695,29419570,14338120,16050038,61682008,0)
elif buildval == '38':
   chr_size = (248956422, 242193529, 198295559, 190214555, 181538259, 170805979, 159345973, 145138636, 138394717, 133797422, 135086622, 133275309, 114364328, 107043718, 101991189, 90338345, 83257441, 80373285, 58617616, 64444167, 46709983, 50818468, 156040895, 57227415)
   # below is built from gap.txt file
   p_end = (121976459, 90402511, 90550102, 49336924, 46435900, 58453888, 60828234, 43983744, 45518558, 38529907, 50821348, 34719407, 0, 0, 0, 33214595, 21795850, 15410899, 24448980, 26348365, 0, 0, 58555579, 0)
   q_beg = (143184587, 91402511, 91553419, 49712061, 47309184, 60229934, 62506779, 45927265, 68220552, 42066265, 54000000, 37460128, 16282173, 16404448, 17000000, 46380682, 23195018, 15460899, 24908689, 26608145, 10000000, 15000000, 58605579, 0)
else:
   sys.exit('problem with build:  must be either 37 or 38')


#------------------------END OF REFERENCE DEFINITIONS -------------------#

#--------------------------- PATTERN DEFINITIONS -----------------------#

chr_nuke = re.compile(r"""chr""")

#----------------------- END OF PATTERN DEFINITIONS -----------------------#


#-------------------------- LOOP ----------------------------------------------#

for line in ttlines:
   if line.__contains__('arm'):
      continue
   if line == '':
      continue

   ttspl = line.split()
   ttchr = ttspl[0]
   ttarm = ttspl[1]
   ttval = float(ttspl[2])

   if ttval > max_ttest:
      continue

   # clean up chrom name if needed
   if ttchr.__contains__('chr'):
      usechr = chr_nuke.sub(r'', ttchr)

   else:
      usechr = ttchr

   if usechr == 'X':
      chr_mark = 22
   elif usechr == 'Y':
      chr_mark = 23
   else:
      chr_mark = int(usechr) - 1

   if ttarm == 'p':
      chrbeg = 0
      chrend = p_end[chr_mark]

   elif ttarm == 'q':
      chrbeg = q_beg[chr_mark]
      chrend = chr_size[chr_mark] 

   else:
      print('problem with ',ttspl)
      sys.exit()

   print(ttchr,chrbeg,chrend,ttval,sep='\t')

#-------------------------- END OF LOOP ---------------------------------------#

