#!/usr/bin/env python
import pandas as pd
import numpy as np
import argparse

parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, \
description="""

Take the midpoint of each interval in a bed file (--bed) and extend
that midpoint upstream and downstream by 1/2 the desired interval_length
(--interval_length). Genome sizes (--genome_sizes) needed to properly
deal with border cases in which an newly extended interval goes beyond
a chromosome's endpoints. 

""")

##################################################
# required args:

parser.add_argument("-i", "--bed", help="""required, file paths to bed file, of format:
chrom	start	end	...	...	...	etc.
""", required=True)
parser.add_argument("-l", "--interval_length", type=int, help="required, desired length for bed intervals", required=True)
parser.add_argument("-g", "--genome_sizes", help="required, path to UCSC-style 'genome sizes' file", required=True)
parser.add_argument("-o", "--out", help="required, file paths to output bed file", required=True)

##################################################

args = parser.parse_args()

##################################################

bed = pd.read_csv(args.bed, sep='\t', header=None)
cols = list(bed.columns)

genome_sizes = pd.read_csv(args.genome_sizes, sep='\t', header=None, names=['chrom','length'])
genome_sizes = {chrom:length for chrom, length in zip(genome_sizes['chrom'], genome_sizes['length'])}

bed['center'] = ( bed[2] + bed[1] ) / 2
bed['center'] = bed['center'].astype('int')

bed['newstart'] = bed['center'] - args.interval_length / 2
bed['newend'] = bed['center'] + args.interval_length / 2 

# if newstart < 0, make 0
bed.ix[bed['newstart'] < 0, 'newstart'] = 0

# if newend > chrom length, make chrom length
newnewends = []
for chrom, newend in zip(bed[0], bed['newend']):
    try:
        if newend > genome_sizes[chrom]:
            newnewends.append(genome_sizes[chrom])
        else:
            newnewends.append(newend)
    except KeyError: # chromosome in bed doesn't match .sizes so ignore
        newnewends.append(-1)
            
bed['newend'] = newnewends
bed = bed[bed['newend'] != -1]
cols[1] = 'newstart'
cols[2] = 'newend'
bed[list(cols)].to_csv(args.out, sep='\t', index=False, header=False)
