package org.broadinstitute.sting.gatk.refdata.features.refseq;

import org.broad.tribble.Feature;
import org.broadinstitute.sting.gatk.refdata.Transcript;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;

import java.util.ArrayList;
import java.util.List;

/**
 * the ref seq feature
 */
public class RefSeqFeature implements Transcript, Feature {

    private String transcript_id;
    private int strand;
    private GenomeLoc transcript_interval;
    private GenomeLoc transcript_coding_interval;
    private List<GenomeLoc> exons;
    private String gene_name;
    private List<Integer> exon_frames;
    private String name;

    // store the contig, start, and stop for this record
    private final String contig;
    private final int start;
    private final int stop;

    public RefSeqFeature(String contig, int start, int stop) {
        this.contig = contig;
        this.start = start;
        this.stop = stop;
    }

    /** Returns id of the transcript (RefSeq NM_* id) */
    public String getTranscriptId() { return transcript_id; }

    /** Returns coding strand of the transcript, 1 or -1 for positive or negative strand, respectively */
    public int getStrand() { return strand; }

    /** Returns transcript's full genomic interval (includes all exons with UTRs) */
    public GenomeLoc getLocation() {
        if (transcript_interval == null)
            transcript_interval = GenomeLocParser.parseGenomeLoc(contig,start,stop);
        return transcript_interval;
    }

    /** Returns genomic interval of the coding sequence (does not include UTRs, but still includes introns, since it's a single interval on the DNA) */
    public GenomeLoc getCodingLocation() { return transcript_coding_interval; }

    /** Name of the gene this transcript corresponds to (NOT gene id such as Entrez etc) */
    public String getGeneName() { return gene_name; }

    /** Number of exons in this transcript */
    public int getNumExons() { return exons.size(); }

    /** Genomic location of the n-th exon; throws an exception if n is out of bounds */
    public GenomeLoc getExonLocation(int n) {
        if ( n >= exons.size() || n < 0 ) throw new StingException("Index out-of-bounds. Transcript has " + exons.size() +" exons; requested: "+n);
        return exons.get(n);
    }

    /** Returns the list of all exons in this transcript, as genomic intervals */
    public List<GenomeLoc> getExons() { return exons; }

    /** Returns all exons falling ::entirely:: inside an interval **/
    public List<GenomeLoc> getExonsInInterval( GenomeLoc interval ) {
        List<GenomeLoc> relevantExons = new ArrayList<GenomeLoc>(exons.size());
        for ( GenomeLoc exon : getExons() ) {
            if ( interval.containsP(exon) ) {
                relevantExons.add(exon);
            }
        }

        return relevantExons;
    }

    /** convenience method; returns the numbers of the exons in the interval **/
    public List<Integer> getExonNumbersInInterval( GenomeLoc interval ) {
        List<Integer> numbers = new ArrayList<Integer>();
        int iNo = 0;
        for ( GenomeLoc exon : getExons() ) {
            if ( interval.containsP(exon) ) {
                numbers.add(iNo);
            }
            iNo++;
        }

        return numbers;
    }

    public String getTranscriptUniqueGeneName() {
        return String.format("%s(%s)",getGeneName(),getTranscriptId());
    }

    public String getOverlapString(GenomeLoc position) {
        boolean is_exon = false;
        StringBuilder overlapString = new StringBuilder();
        int exonNo = 1;

        for ( GenomeLoc exon : exons ) {
            if ( exon.containsP(position) ) {
                overlapString.append(String.format("exon_%d",exonNo));
                is_exon = true;
                break;
            }
            exonNo ++;
        }

        if ( ! is_exon ) {
            if ( overlapsCodingP(position) ) {
                overlapString.append("Intron");
            } else {
                overlapString.append("UTR");
            }
        }

        return overlapString.toString();
    }

    /** Returns true if the specified interval 'that' overlaps with the full genomic interval of this transcript */
    public boolean overlapsP (GenomeLoc that) {
        return getLocation().overlapsP(that);
    }

    /** Returns true if the specified interval 'that' overlaps with the coding genomic interval of this transcript.
     * NOTE: since "coding interval" is still a single genomic interval, it will not contain UTRs of the outermost exons,
     * but it will still contain introns and/or exons internal to this genomic locus that are not spliced into this transcript.
     * @see #overlapsExonP
     */
    public boolean overlapsCodingP (GenomeLoc that) {
        return transcript_coding_interval.overlapsP(that);
    }

    /** Returns true if the specified interval 'that' overlaps with any of the exons actually spliced into this transcript */
    public boolean overlapsExonP (GenomeLoc that) {
        for ( GenomeLoc e : exons ) {
            if ( e.overlapsP(that) ) return true;
        }
        return false;
    }
    public String toString() {
            StringBuilder b = new StringBuilder("000\t"); // first field is unused but required in th ecurrent format; just set to something
            b.append(transcript_id);   // #1
            b.append('\t');
            b.append(getLocation().getContig()); // #2
            b.append('\t');
            b.append( (strand==1?'+':'-') ); // #3
            b.append('\t');
            b.append( (getLocation().getStart() - 1) ); // #4
            b.append('\t');
            b.append( getLocation().getStop());  // #5
            b.append('\t');
            b.append( (transcript_coding_interval.getStart() - 1) ); // #6
            b.append('\t');
            b.append( transcript_coding_interval.getStop());  // #7
            b.append('\t');
            b.append(exons.size()); // #8
            b.append('\t');
            for ( GenomeLoc loc : exons ) { b.append( (loc.getStart()-1) ); b.append(','); } // #9
            b.append('\t');
            for ( GenomeLoc loc : exons ) { b.append( loc.getStop() ); b.append(','); } // #10
            b.append("\t0\t"); // # 11 - unused?
            b.append(gene_name); // # 12
            b.append("\tcmpl\tcmpl\t"); // #13, #14 - unused?
            for ( Integer f : exon_frames ) { b.append( f ); b.append(','); } // #15


            return b.toString();
        }

        /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to
         * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether
         * this position is fully whithin an exon of <i>any</i> of those transcripts. Passing null is safe (will return false).
         * NOTE: position can be still within a UTR, see #isCoding
         * @return true if it's an exon
         */
        public static boolean isExon(RODRecordList l) {

            if ( l == null ) return false;

            GenomeLoc loc = l.getLocation();

            for ( GATKFeature t : l ) {
                if ( ((RefSeqFeature)t.getUnderlyingObject()).overlapsExonP(loc) ) return true;
            }
            return false;

        }

        /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to
         * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether
         * this position is fully whithin a coding region of <i>any</i> of those transcripts.
         * Passing null is safe (will return false).
         * NOTE: "coding" interval is defined as a single genomic interval, so it
         * does not include the UTRs of the outermost exons, but it includes introns between exons spliced into a
         * transcript, or internal exons that are not spliced into a given transcript. To check that a position is
         * indeed within an exon but not in UTR, use #isCodingExon().
         * @return
         */
        public static boolean isCoding(RODRecordList l) {

            if ( l == null ) return false;

            GenomeLoc loc = l.getLocation();

            for ( GATKFeature t : l ) {
                if ( ((RefSeqFeature)t.getUnderlyingObject()).overlapsCodingP(loc) ) return true;
            }
            return false;

        }

        /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to
         * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether
         * this position is fully whithin a coding exon portion (i.e. true coding sequence) of <i>any</i> of those transcripts.
         * Passing null is safe (will return false). In other words, this method returns true if the list contains a transcript,
         * for which the current position is within an exon <i>and</i> within a coding interval simultaneously.
         * @return
         */
        public static boolean isCodingExon(RODRecordList l) {

            if ( l == null ) return false;

            GenomeLoc loc = l.getLocation();

            for ( GATKFeature t : l ) {
                if ( ((RefSeqFeature)t.getUnderlyingObject()).overlapsCodingP(loc) && ((RefSeqFeature)t.getUnderlyingObject()).overlapsExonP(loc) ) return true;
            }
            return false;

        }


    public void setTranscript_id(String transcript_id) {
        this.transcript_id = transcript_id;
    }

    public void setStrand(int strand) {
        this.strand = strand;
    }

    public void setTranscript_interval(GenomeLoc transcript_interval) {
        this.transcript_interval = transcript_interval;
    }

    public void setTranscript_coding_interval(GenomeLoc transcript_coding_interval) {
        this.transcript_coding_interval = transcript_coding_interval;
    }

    public void setExons(List<GenomeLoc> exons) {
        this.exons = exons;
    }

    public void setGene_name(String gene_name) {
        this.gene_name = gene_name;
    }

    public void setExon_frames(List<Integer> exon_frames) {
        this.exon_frames = exon_frames;
    }

    public void setName(String name) {
        this.name = name;
    }

    @Override
    public String getChr() {
        return contig;
    }

    @Override
    public int getStart() {
        return start;
    }

    @Override
    public int getEnd() {
        return stop;
    }
}
