/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.genemodel;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

/**
 *
 * @author gevirl
 */
public class Transcriptome {
    AnnotationModel model;
    GenomeSequence genome;
    
    public Transcriptome(AnnotationModel model,GenomeSequence genome){
        this.genome = genome;
        this.model = model;
    }
    public void reportParentTypes(PrintStream stream,String[] parentTypes,String subtype){
        for (String parentType : parentTypes){
            reportParentType(stream,parentType,subtype);
        }
    }
    // write the transcriptome fasta for annotations of parentTypePrefix that contain any annotions of subtype
    public void reportParentType(PrintStream stream,String parentTypePrefix,String subtype){
        TreeMap<String,List<Annotation>> annotMap = model.getAnnotationsByParentPrefix(parentTypePrefix, subtype);
        for (Entry entry : annotMap.entrySet()){
            List<Annotation> exons = model.getExons((List<Annotation>)entry.getValue());
            String sequence = genome.getExonSequence(exons);
            reportSequence(stream,(String)entry.getKey(),sequence);
        }        
    }
    

    // write the transcriptome fasta for annotations with the given parent attribute name and parent types
    public void reportByParentType(PrintStream stream,String attrName,Set<String> parentTypes){
        TreeMap<String,List<Annotation>> parentMap = model.formTranscripts(attrName);
        
        for (String parent : parentMap.keySet()){
            
            // does the model have an annotation matching the types provided?
            boolean found = false;
            Object annotsByID = model.getAnnotations(parent);
            if (annotsByID instanceof Annotation){
                Annotation annot = (Annotation)annotsByID;
                found = parentTypes.contains(annot.getType());
            }else {
                for (Annotation annot : (List<Annotation>)annotsByID){
                    if (parentTypes.contains(annot.getType())){
                        found = true;
                        break;
                    }
                }
            }
            
            if (found){
                List<Annotation> exons = parentMap.get(parent);
                String sequence = genome.getExonSequence(exons);
                reportSequence(stream,parent,sequence); 
                
                List<Annotation> annots = parentMap.get(parent);
                for (Annotation annot : annots){
                    if (parentTypes.contains(annot.getType())){
                        
                    }
                }
            }else {
                int jsadifsd=0;
            }
        }
    }
    private void reportSequence(PrintStream stream,String label,String seq){
        if (label.contains(":")){
            label = label.split(":")[1];
        }
        int index = 0;
        int len = seq.length();
        stream.printf(">%s\n",label );
        while (index < len){
            stream.printf("%s\n", seq.substring(index, Math.min(len, index+60)));
            index = index + 60;
        }
        stream.flush();         
    }    
    
    
    // building the bitSeq transcriptome - 32899 transcripts
    // this includes mRNA, pseudogenes and transposons
    static public void main(String[] args)throws Exception {
      
        File gtf = new File("/net/waterston/vol9/References/WS245/AllWormBase.withTransposon.gff3");
        Annotation.chromoRenameMap = new TreeMap<>();
        Annotation.chromoRenameMap.put("I","chrI");
        Annotation.chromoRenameMap.put("II","chrII");
        Annotation.chromoRenameMap.put("III","chrIII");
        Annotation.chromoRenameMap.put("IV","chrIV");
        Annotation.chromoRenameMap.put("V","chrV");
        Annotation.chromoRenameMap.put("X","chrX");
        Annotation.chromoRenameMap.put("MtDNA","chrM");
        
        
        File genome = new File("/net/waterston/vol9/References/WS245/c_elegans.PRJNA13758.WS245.genomic.renamed.fa");
        AnnotationModelGFF model = new AnnotationModelGFF(gtf.getPath());       
        GenomeSequence genomeSeq = new GenomeSequence(genome.getPath());
        Transcriptome xome = new Transcriptome(model,genomeSeq);
        TreeSet<String> types = new TreeSet<>();
        types.add("mRNA");
        types.add("pseudogenic_transcript");
        PrintStream stream = new PrintStream("/net/waterston/vol9/References/WS245/mRNA.pseudo.xome.fa");
        xome.reportByParentType(stream,"Parent",types);
/*        
//        PrintStream stream = new PrintStream(genome.getPath().replace("fasta", "xome.fasta"));
        
        xome.reportParentType(System.out, "Transcript","exon");
        
        File gtf = new File("/net/waterston/vol9/References/Release6/dmel-all-r6.15.gtf");
        File genome = new File("/net/waterston/vol9/References/Release6/dmel-all-chromosome-r6.15.fasta");
        AnnotationModel model = new AnnotationModel(gtf.getPath(),"gene_id","transcript_id","transcript_symbol");
        GenomeSequence genomeSeq = new GenomeSequence(genome.getPath());
        Transcriptome xome = new Transcriptome(model,genomeSeq);
        PrintStream stream = new PrintStream(genome.getPath().replace("fasta", "xome.fasta"));
        xome.reportParentType(stream, "transcript_id","exon");  
*/
    }
}
