"""
Module containing functions for parsing primer .bed files.
"""

from parse_field import parse_field
from default_primer_name_parser import default_primer_name_parser


def generate_primermap(bedfile, name_parser=default_primer_name_parser, strand_index=5,
                  region_index=None, column_names=None):
    """
    Parameters
    ----------
    bedfile : str
        String reference to a primer bedfile to use to generate the primermap.
    name_parser : Optional[Callable[[str], Dict[str, Any]]]
        Function that takes in the primer name column of the bedfile (the fourth
        column) and returns a dict containing key-value pairs to be added to the
        dict that represents that primer. At a minimum, this dict must have the
        following structure::

            {
                'region': string
            }

        If the dict includes any keys that are already typically included in the
        primer dict, the values returned by this function will overwrite the
        usual values.
    strand_index : Optional[int]
        If an int is passed, the column with that index will be used to
        determine strand information for the primer. If ``None`` is passed, the
        algorithm will try to guess which column contains this information. If
        this fails, strand information will not be included in the primer dict.
        Acceptable strings to indicate primer strand are 'F'/'R', 'FOR'/'REV',
        and '+'/'-'.
    region_index : Optional[int]
        If an int is passed, the column with that index will be used to
        determine the region the primer is in. This makes specifying
        ``region_parser`` optional and overrides the region it returns.
    column_names : Optional[List[str]]
        Pass a list of strings equal to the number of columns in the bedfile,
        describing the columns. The first four elements will be ignored. Special
        values include 'strand', which will set ``strand_index``, and 'region',
        which will override ``region_index``. All other values will end up as
        keys in the primer dicts. If this is not passed, this function will look
        for a header line in the primerfile, and if one is not found, a default
        header will be assumed.

    Returns
    -------
    Dict[str, List[Dict[str, Any]]]
        The keys of the outer dict are region names. The values are lists, where
        the :math:`i` th entry represents the :math:`i` th primer in that
        region. Primers are represented as dicts with the following structure::

            {
                'region': str
                'chrom' : str,
                'start' : int,
                'end'   : int,
                'name'  : str,
                'strand': '+' or '-'
            }

        though strand may not be present, and additional keys may be present if
        returned by ``name_parser``, passed in ``column_names``, or if a header
        line is present.

    Notes
    -----
    A primermap is a mapping from primers (specified by a region name and primer
    index) to the genomic range covered by those primers.
    """
    # acceptable strand identifiers
    plus_strand_identifiers = ['F', 'FOR', '+']
    minus_strand_identifiers = ['R', 'REV', '-']

    # dict to store the primermap
    primermap = {}

    # parse column_names
    if column_names is not None:
        try:
            strand_index = column_names.index('strand')
        except ValueError:
            pass
        try:
            region_index = column_names.index('region')
        except ValueError:
            pass

    # parse bedfile
    with open(bedfile, 'r') as handle:
        # parse the bedfile
        for line in handle:
            # skip comments, unless they contain bedtools nuc information
            if line.startswith('#'):
                if column_names is None:
                    pieces = line.strip().strip('#').split('\t')
                    if len(pieces) > 4:
                        column_names = pieces
                        try:
                            strand_index = column_names.index('strand')
                        except ValueError:
                            pass
                        try:
                            region_index = column_names.index('region')
                        except ValueError:
                            pass
                continue

            # split bedfile line
            feature_columns = line.strip().split('\t')

            # parse bed feature information
            chrom = feature_columns[0]
            start = int(feature_columns[1])
            end = int(feature_columns[2])

            # parse name
            name = feature_columns[3]
            name_fields = None
            if name_parser is not None:
                name_fields = name_parser(name)
                region = name_fields['region']
            elif region_index is not None:
                region = feature_columns[region_index]
            else:
                raise ValueError(
                    'region could not be identfied (must pass one of '
                    'name_parser, region_index, or column_names with "region" '
                    'column)')

            # parse strand
            strand = None
            if strand_index and strand_index < len(feature_columns):
                strand_string = feature_columns[strand_index]
                if strand_string in plus_strand_identifiers:
                    strand = '+'
                elif strand_string in minus_strand_identifiers:
                    strand = '-'
            else:
                for feature_column in feature_columns:
                    if feature_column in plus_strand_identifiers:
                        strand = '+'
                    elif feature_column in minus_strand_identifiers:
                        strand = '-'

            # if this is a new region, make a new list for it
            if region not in primermap:
                primermap[region] = []
            
            # assemble the dict describing this primer
            # always-present, required fields
            primer_dict = {'chrom': chrom,
                           'start': start,
                           'end'  : end,
                           'name' : name}

            # potentially present fields
            if strand:
                primer_dict['strand'] = strand

            # additional fields from name_parser
            if name_fields is not None:
                primer_dict.update(name_fields)

            # arbitrary fields from column_names or parsed from header
            if column_names is not None and len(column_names) > 4:
                for i in range(4, len(column_names)):
                    primer_dict[column_names[i]] = parse_field(
                        feature_columns[i])

            # add this region to the map
            primermap[region].append(primer_dict)

    # sort primers within each region
    for region in primermap.keys():
        primermap[region].sort(key=lambda x: x['start'])

    return primermap

