#!/usr/bin/env python

from Bio import AlignIO
from Bio.Align import MultipleSeqAlignment
import pandas as pd
import sys

ali_input_filename=sys.argv[1]
prop_gap_max=float(sys.argv[2])
ali_output_filename=sys.argv[3]
out_prop_gap=sys.argv[4]

def cut_a_gap(align):
    n_seq = float(len(align))
    n_site = float(len(align[0]))
    nb_gap={}
    i = 0
    while i < n_site:
        p_gap_site=align[:, i].count('-') / n_seq
        if p_gap_site > prop_gap_max :
            if i == 0:
                align = align[:, 1:]
            elif i+1 == n_site:
                align = align[:, :i]
            else:
                align = align[:, :i] + align[:, i+1:]
            n_site -= 1  #  seq. 1 shorter
        else:  #  nothing to delete, proceed
            nb_gap[i]=p_gap_site
            i += 1

    return align,nb_gap


ali_input=AlignIO.read(open(ali_input_filename), "fasta")
ali_output,prop_gap=cut_a_gap(ali_input)

AlignIO.write(ali_output, ali_output_filename, "fasta")

df_prop_gap = pd.DataFrame(list(prop_gap.items()), columns=["site","prop_gap"])
df_prop_gap.to_csv(out_prop_gap,index=False)
