/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.gene.model;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

/**
 *
 * @author gevirl
 */
public class ModelFromGFF extends AnnotationModel {

    ArrayList<Annotation> othersNoID = new ArrayList<>();
    ArrayList<Annotation> othersByID = new ArrayList<>();
    TreeMap<String, String> parentMap = new TreeMap<>();
    TreeMap<String, String> wbGenes = new TreeMap<>();  // maps common name and sequence name  to WBGeneID

    public ModelFromGFF(File gff) throws Exception {

        // add all the genes and transposons
        int geneCount = 0;
        BufferedReader reader = new BufferedReader(new FileReader(gff));
        String line = reader.readLine();
        while (line != null) {
            if (line.charAt(0) != '#') {
                String[] tokens = line.split("\t");
                Annotation annot = new AnnotationGFF(tokens);

                Object p = annot.getAttributeValue("Parent");
                if (p == null) {
                    ++geneCount;
                    String name = (String) annot.getAttributeValue("Name");

                    this.addAnnotation(annot, name, null);

                    Object locus = annot.getAttributeValue("locus");
                    if (locus != null) {
                        wbGenes.put((String) locus, name);
                    }
                    Object obj = annot.getAttributeValue("sequence_name");
                    if (obj != null) {
                        wbGenes.put((String) obj, name);
                    }

                } else if (p instanceof String && !annot.getType().equals("CDS")) {
                    String id = (String) annot.getAttributeValue("ID");

                    if (id != null) {
                        if (id.startsWith("CDS")) {
                            int aiusdhfuis = 0;
                        }
                        parentMap.put(id, (String) p);
                    }
                }

            }
            line = reader.readLine();
        }
        reader.close();

        int nonGeneCount = 0;
        reader = new BufferedReader(new FileReader(gff));
        line = reader.readLine();
        while (line != null) {
            if (line.charAt(0) != '#') {
                String[] tokens = line.split("\t");
                Annotation annot = new AnnotationGFF(tokens);
                if (annot.getType().equals("exon")) {
                    annot = new Exon(tokens);
                }

                Object p = annot.getAttributeValue("Parent");
                if (p != null) {
                    ++nonGeneCount;
                    if (p instanceof String) {
                        String parent = (String) p;
                        String id = (String) annot.getAttributeValue("ID");
                        String name = (String) annot.getAttributeValue("Name");

                        if (id != null) {
                            if (id.startsWith("Transcript")) {
                                this.addAnnotation(annot, parent.split(":")[1], name);
                            } else if (id.startsWith("CDS")) {
                                if (tokens[2].equals("mRNA")) {
                                    this.addAnnotation(annot, parent.split(":")[1], name);  // this adds transposon mRNA
                                } else {
                                    String transcript = parent.split(":")[1];
                                    String gene = parentMap.get(parent).split(":")[1];
                                    this.addAnnotation(annot, gene, transcript);
                                }
                            } else if (id.startsWith("Pseudogene")) {
                                this.addAnnotation(annot, parent.split(":")[1], name);
                            } else {
                                othersByID.add(annot);
                            }
                        } else if (parent.startsWith("Transcript")) {
                            try {
                                String par = parentMap.get(parent);
                                if (par == null){
                                    int asufushd=0;
                                }
                                String gene = parentMap.get(parent).split(":")[1];
                                this.addAnnotation(annot, gene, parent.split(":")[1]);
                            } catch (Exception exc) {
                                int aiosdfs = 0;
                            }

                        } else if (parent.startsWith("Pseudogene")) {
                            String pseudogenic_transcript = parent.split(":")[1];
                            String gene = parentMap.get(parent).split(":")[1];
                            this.addAnnotation(annot, gene, pseudogenic_transcript);
                        } else if (parent.startsWith("CDS")) {
                            String transcript = parent.split(":")[1];
                            String gene = parentMap.get(parent).split(":")[1];
                            this.addAnnotation(annot, gene, transcript);
                        } else {
                            this.othersNoID.add(annot);
                        }
                    } else {
                        List<String> parents = (List<String>) p;
                        String gene = parentMap.get(parents.get(0)).split(":")[1];
                        List<String> transcripts = new ArrayList<>();
                        for (String parent : parents) {
                            transcripts.add(parent.split(":")[1]);
                        }
                        this.addAnnotation(annot, gene, transcripts);
                        int jksdafjf = 0;
                    }
                }
            }
            line = reader.readLine();
        }
        reader.close();
    }

    // not finished
    public void formExonGFF(File outGFF) {
        Set<String> genes = this.getAllGenes();
        for (String gene : genes) {
            Set<Annotation> exonSet = new TreeSet<>();
            List<Annotation> exons = new ArrayList<>();
            List<Annotation> annots = this.getGeneAnnotations(gene);
            for (Annotation annot : annots) {
                if (annot.getType().equals("exon")) {
                    exons.add(annot);
                    exonSet.add(annot);
                }
            }
            int auishdfush = 0;
        }
    }

    // not finished
    public void formGTF(PrintStream stream) {
        for (String gene : this.getAllGenes()) {
            if (gene.startsWith("WBGene")) {
                for (Annotation annot : this.getGeneAnnotations(gene)) {
                    annot.printAsGTF(stream);
                    int iohdfui = 0;
                }
            }
        }
    }

    public List<Annotation> getTranscriptsForGene(String gene) {
        ArrayList<Annotation> ret = new ArrayList<>();
        List<Annotation> geneAnnots = this.getGeneAnnotations(gene);
        if (geneAnnots != null) {
            for (Annotation annot : geneAnnots) {
                if (annot.getType().equals("mRNA") || annot.getType().equals("pseudogenic_transcript")) {
                    ret.add(annot);
                }
            }
        }
        ret.sort(null);
        return ret;
    }

    public String getWBGene(String seqName) {
        return this.wbGenes.get(seqName);
    }

    @Override
    public Annotation getGeneAnnotation(String name) {
        String wbGene = name;
        if (!name.startsWith("WBGene")) {
            wbGene = wbGenes.get(name);
        }
        if (wbGene != null) {
            return super.getGeneAnnotation(wbGene);
        }
        return null;
    }

    public void addSequenceNameToFile(File file, int wb, PrintStream stream) throws Exception {
        TreeSet<String> set = new TreeSet<>();
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();  // skip the header
        line = reader.readLine();
        while (line != null) {
            String[] tokens = line.split("\t|,");
            String wbGene = tokens[wb];
            Annotation geneAnnot = this.getGeneAnnotation(wbGene);
            String seqName = "NULL";
            if (geneAnnot != null) {
                seqName = (String) geneAnnot.getAttributeValue("sequence_name");
            } else {
                set.add(wbGene);
            }
            stream.printf("%s", tokens[0]);
            for (int i = 1; i <= wb; ++i) {
                stream.printf(",%s", tokens[i]);
            }
            stream.printf(",%s", seqName);
            for (int i = wb + 1; i < tokens.length; ++i) {
                stream.printf(",%s", tokens[i]);
            }
            stream.println();
            line = reader.readLine();
        }
        for (String wbGene : set) {
            System.out.println(wbGene);
        }
    }

    // find the three gene names (Cosmid,common, and wbgene) given any gene name or transcript name
    public String[] geneNameTriplet(String anyname) {
        String[] ret = null;
        String wbGene = anyname;
        
        if (!anyname.startsWith("WBGene")){
            wbGene = wbGenes.get(anyname);
        }
        // see if it is in the wbGenes which maps cosmid and commn names to wbgene names
        
        if (wbGene != null) {
            Annotation geneAnnot = this.getGeneAnnotation(wbGene);
            if (geneAnnot != null) {
                ret = nameTripletFromWBGene(wbGene);

            }
        } // see if it is a transcript name not a gene name
        else {
            List<Annotation> annots = this.transcriptMap.get(anyname);
            if (annots != null) {
                for (Annotation annot : annots) {
                    wbGene = annot.getGeneID();
                    if (wbGene != null) {
                        ret = nameTripletFromWBGene(wbGene);
                    }
                }
            }
        }
        return ret;
    }

    public String[] nameTripletFromWBGene(String wbGene) {
        String[] ret = new String[3];
        Annotation geneAnnot = this.getGeneAnnotation(wbGene);
        if (geneAnnot != null) {
            ret[0] = (String) geneAnnot.getAttributeValue("sequence_name");
            Object locus = geneAnnot.getAttributeValue("locus");
            if (locus != null) {
                ret[1] = (String) locus;
            } else {
                ret[1] = ret[0];
            }
            ret[2] = wbGene;
        }
        return ret;
    }

    public void expandNames(File file, int position, boolean header, File outFile) throws Exception {
        PrintStream stream = new PrintStream(outFile);

        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();
        if (header) {
            String[] heads = line.split(",");
            for (int i = 0; i < heads.length; ++i) {
                if (i > 0) {
                    stream.print(",");
                }
                stream.printf("%s", heads[i]);
                if (i == position) {
                    stream.print(",Sequence,Common,WBGene");
                }
            }
            stream.println();
            line = reader.readLine();
        }
        while (line != null) {
            String[] tokens = line.split(",");
            String name = tokens[position];
            String[] names = this.geneNameTriplet(name);
            for (int i = 0; i < tokens.length; ++i) {
                if (i > 0) {
                    stream.print(",");
                }
                stream.printf("%s", tokens[i]);
                if (i == position) {
                    stream.printf(",%s,%s,%s", names[0], names[1], names[2]);
                }
            }
            stream.println();
            line = reader.readLine();
        }
        reader.close();
        stream.close();
    }

    static public void main(String[] args) throws Exception {

        ModelFromGFF gff3 = new ModelFromGFF(new File("/net/waterston/vol9/References/WS260/c_elegans.PRJNA13758.WS260.annotations.WormBase.gff3"));
/*
        File inFile = new File("/net/waterston/vol2/home/gevirl/FACS/HistoneReadCounts.csv");
        File outFile = new File("/net/waterston/vol2/home/gevirl/FACS/HistoneReadCountsNamed.csv");
        gff3.expandNames(inFile, 0, true, outFile);

        File csv = new File("/net/waterston/vol2/home/gevirl/FACS/EnsemblExonCounts.csv");
        PrintStream stream = new PrintStream("/net/waterston/vol2/home/gevirl/FACS/EnsemblExonCounts.out");

        gff3.addSequenceNameToFile(csv, 1, stream);
        stream.close();
        */

//        gff3.formGTF(new PrintStream("/net/waterston/vol9/References/WS245/AllWormBase.withTransposon.gtf"));
/*        
        List<Annotation> proteinGenes = gff3.getGenesByBiotype("protein_coding");
        for (Annotation proteinGene : proteinGenes){
            gff3.getDisjointExons(proteinGene.getGeneID());
            int uisdush=0;
        }
        
        // count the processed annotations
        int count = 0;
        for (String gene : gff3.getAllGenes()) {
            List<Annotation> annots = gff3.getGeneAnnotations(gene);
            count = count + annots.size();
        }
        
        int tCount = 0;
        for (String transcript : gff3.getAllTranscripts()){
            List<Annotation> annots = gff3.getTranscriptAnnotations(transcript);
            tCount = tCount + annots.size();
        }
        
        
        // find all the steep gene transcripts
        PrintStream stream = new PrintStream("/net/waterston/vol9/References/WS245/SteepTranscriptSet");
        BufferedReader reader = new BufferedReader(new FileReader("/net/waterston/vol9/References/WS245/SteepGeneSet"));
        String line = reader.readLine();
        while (line != null){
            String wbGene = gff3.getWBGene(line);
            if (wbGene == null){
                System.out.println(line);
            } else {
                List<Annotation> annots = gff3.getTranscriptsForGene(wbGene);
                for (Annotation annot : annots){
                    stream.printf("%s\n",annot.transcript_id);
                }
            }
            line = reader.readLine();
        }
         */
    }

}
