#!/usr/bin/env python

#Requires genome library from MAGOT: https://github.com/biorover/MAGOT

import genome
import sys
import numpy as np
import subprocess

repeats = genome.read_gff(sys.argv[1], presets='RepeatMasker')
my_genome = genome.Genome(sys.argv[2])
window_size = int(sys.argv[3])
percentile_threshold = int(sys.argv[4])

if len(sys.argv) > 5:
    windows_file = sys.argv[5]

repeat_pd = genome.position_dic(my_genome.genome_sequence)
repeat_pd.fill_from_annotations(repeats,'similarity')

if len(sys.argv) < 6:
    windows_file = 'temp_swrc.temp'
    out = open('temp_swrc.temp','w')
    repeat_pd.sliding_window_calculate(window_size,1,'average',out)
    out.close()

window_number = 0
for seqid in my_genome.genome_sequence:
    if len(my_genome.genome_sequence[seqid]) > window_size:
        window_number = window_number + len(my_genome.genome_sequence[seqid][:-1 * window_size])


windows = np.zeros(window_number)
count = 0
for window in open(windows_file):
    windows[count] = float(window.split()[1])
    count = count + 1



threshold = np.percentile(windows,percentile_threshold)
te_islands = repeat_pd.sliding_window_calculate(window_size,1,'average',"annotation_set",threshold)
gff_out=open('swrc_out.gff','w')
for region in te_islands.region:
   gff_out.write(te_islands.region[region].get_gff() + '\n')

gff_out.close()

excell_display = open('swrc_out.subsampled.tab','w')

count = 0
running_seqid = ""
coord = 0
for window in open(windows_file):
    fields = window.split()
    seqid = fields[0]
    if seqid != running_seqid:
        running_seqid = seqid
        coord = 0
    coord = coord + 1
    if count % 500 == 0:
        in_te_island = False
        for region in te_islands.region:
            robj = te_islands.region[region]
            if robj.seqid == seqid and robj.coords[0] <= coord <= robj.coords[1]:
               in_te_island = True
        if in_te_island:
            excell_display.write(seqid + '\t' + str(coord) + '\t\t' + fields[1] + '\n')
        else:
            excell_display.write(seqid + '\t' + str(coord) + '\t' + fields[1] + '\t\n')
    count = count + 1

excell_display.close()

#if len(sys.argv) < 6:
#    subprocess.call('rm temp_swrc.temp',shell = True)
