/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.gene.model.exons;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import org.rhwlab.gene.model.Annotation;
import org.rhwlab.gene.model.AnnotationBoundary;
import org.rhwlab.gene.model.AnnotationGFF;
import org.rhwlab.gene.model.AnnotationModel;
import org.rhwlab.gene.model.AnnotationStart;
import org.rhwlab.gene.model.Exon;
import org.rhwlab.gene.model.ModelFromGFF;
import org.rhwlab.gene.model.ModelFromGTF;

/**
 *
 * @author gevirl
 */
// a gff file, modified from a standard gff model, so that all the exons in a gene are non-overlapping
// each exon appears only once in the file
// exons from genes on different strands can overlap in the utrs
public class NonOverlapping extends AnnotationModel {

    public NonOverlapping() {
        super();
    }

    public NonOverlapping(File file) throws Exception {
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();
        while (line != null) {
            if (line.charAt(0) != '#') {
                String[] tokens = line.split("\t");
                Annotation annot = new Exon(tokens);
                String gene = (String) annot.getAttributeValue("Parent");
                this.addAnnotation(annot, gene, null);
            }
            line = reader.readLine();
        }
        reader.close();
    }

    static public NonOverlapping fromGFF(File gff) throws Exception {
        ModelFromGFF model = new ModelFromGFF(gff);
        NonOverlapping ret = new NonOverlapping();

        List<ExonID> exons = buildExonIds(model);
        for (ExonID exon : exons) {
            Annotation geneAnnot = exon.geneAnnot;
            AnnotationGFF annot = new AnnotationGFF();
            annot.setChromo(geneAnnot.getChromosome());
            annot.setSource(geneAnnot.getSource());
            annot.setType("exon");
            annot.setStart(exon.start);
            annot.setEnd(exon.end);
            annot.setStrand(geneAnnot.getStrand());
            annot.setPhase(geneAnnot.getPhase());
            annot.addAttribute("Parent", exon.geneAnnot.getGeneID());
            annot.addAttribute("Name", exon.id);
            String seqName = (String) geneAnnot.getAttributeValue("sequence_name");
            String locus = (String) geneAnnot.getAttributeValue("locus");
            if (seqName != null) {
                annot.addAttribute("sequence_name", seqName);
            }
            if (locus != null) {
                annot.addAttribute("locus", locus);
            }
            ret.addAnnotation(annot, exon.geneAnnot.getGeneID(), null);
        }

        return ret;
    }

    static public NonOverlapping fromEnsemblGTF(File gtf) throws Exception {
        ModelFromGTF model = new ModelFromGTF(gtf);
        NonOverlapping ret = new NonOverlapping();

        List<ExonID> exons = buildExonIds(model);
        for (ExonID exon : exons) {
            Annotation geneAnnot = exon.geneAnnot;
            AnnotationGFF annot = new AnnotationGFF();
            annot.setChromo(geneAnnot.getChromosome());
            annot.setSource(geneAnnot.getSource());
            annot.setType("exon");
            annot.setStart(exon.start);
            annot.setEnd(exon.end);
            annot.setStrand(geneAnnot.getStrand());
            annot.setPhase(geneAnnot.getPhase());
            annot.addAttribute("Parent", (String) geneAnnot.getAttributeValue("gene_id"));
            annot.addAttribute("Name", exon.id);
            annot.addAttribute("gene_name", (String) geneAnnot.getAttributeValue("gene_name"));
//            annot.addAttribute("sequence_name", (String)geneAnnot.getAttributeValue("sequence_name"));
            ret.addAnnotation(annot, exon.geneAnnot.getGeneID(), null);
        }
        return ret;
    }

//    static public List<ExonID> buildExonIds(ModelFromGFF model){
    static public List<ExonID> buildExonIds(AnnotationModel model) {
        ArrayList<ExonID> ids = new ArrayList<>();
        List<Annotation> geneAnnots = model.getGenesByBiotype("protein_coding");
        for (Annotation proteinGene : geneAnnots) {
            ArrayList<ExonID> geneExons = new ArrayList<>();

            String geneID = proteinGene.getGeneID();

            List<AnnotationBoundary> bounds = model.getExonBoundaries(geneID);
            if (!bounds.isEmpty()) {
                AnnotationBoundary[] boundsArray = bounds.toArray(new AnnotationBoundary[0]);

                AnnotationBoundary left = boundsArray[0];
                int p = -1;
                for (int i = 1; i < boundsArray.length; ++i) {

                    AnnotationBoundary right = boundsArray[i];

                    if (p < 0) {
                        if (left.getLocation() < right.getLocation()) {
                            ExonID exonID = new ExonID();
                            exonID.geneAnnot = proteinGene;
                            exonID.start = left.getLocation();
                            exonID.end = right.getLocation();
                            geneExons.add(exonID);
                        }
                    }
                    if (right instanceof AnnotationStart) {
                        --p;
                    } else {
                        ++p;
                    }
                    left = right;
                }
                ExonID exon = geneExons.get(0);
                for (int i = 1; i < geneExons.size(); ++i) {
                    ExonID next = geneExons.get(i);
                    if (exon.end == next.start) {
                        --exon.end;
                    }
                    exon = next;
                }

                // name the exons (they are nonoverlapping in the gene) in this gene
                ExonID.nameExons(geneExons);

                ids.addAll(geneExons);
            }
        }

        return ids;
    }

    public void toBedFormat(File bedFile) throws Exception {
        PrintStream stream = new PrintStream(bedFile);
        for (Annotation exon : this.getAll()) {
            String geneName = null;
            Object locus = exon.getAttributeValue("locus");
            if (locus != null) {
                geneName = (String) locus;
            } else {
                Object seqName = exon.getAttributeValue("sequence_name");
                geneName = (String) seqName;
            }

            if (geneName == null) {
                geneName = (String) exon.getAttributeValue("gene_name");
            }
            String exonName = String.format("%s_%s", geneName, (String) exon.getAttributeValue("Name"));
            stream.printf("%s\t%d\t%d\t%s\t1000\t%s\n",
                    exon.getChromosome(), exon.getStart(), exon.getEnd(), exonName, exon.getStrand());

        }
    }

    // form a gff of nonOverlapping exons for the protein coding genes
    static public void main(String[] args) throws Exception {

//        NonOverlappingGFF non = new NonOverlappingGFF(new File("/net/waterston/vol9/References/WS235/NonOverlappingExons.gff3"));
//        non.toBedFormat(new File("/net/waterston/vol9/References/WS235/NonOverlappingExons.bed"));        
/*        
        // open the nonoverlapping exon file and form a bed
        NonOverlappingGFF non = new NonOverlappingGFF(new File("/net/waterston/vol9/References/WS245/NonOverlappingExons.gff3"));
        non.toBedFormat(new File("/net/waterston/vol9/References/WS245/NonOverlappingExons.bed"));
        
        // build the nonoverlapping exon file
        File gff = new File("/net/waterston/vol9/References/WS245/AllWormBase.withTransposon.gff3");
        NonOverlappingGFF non = NonOverlappingGFF.fromGFF(gff);
        PrintStream stream = new PrintStream("/net/waterston/vol9/References/WS245/NonOverlappingExons.gff3");
        non.save(stream);
      
        File gtf = new File("/net/waterston/vol9/References/Ensembl/Caenorhabditis_elegans.WBcel235.92.gtf");
        NonOverlappingGFF non = NonOverlappingGFF.fromEnsemblGTF(gtf);
        PrintStream stream = new PrintStream("/net/waterston/vol9/References/Ensembl/NonOverlappingExons.gff3");
        non.save(stream);
         */
        File gtf = new File("/net/waterston/vol9/References/WS260/c_elegans.PRJNA13758.WS260.annotations.WormBase.gff3");
        NonOverlapping non = NonOverlapping.fromGFF(gtf);
        PrintStream stream = new PrintStream("/net/waterston/vol9/References/WS260/NonOverlappingExons.gff3");
        non.save(stream);
        int iuuinfgfg = 0;

    }

}
