/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.LMS.RNASeq;

import org.rhwlab.LMS.RNASeq.md5.MD5sum;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FilenameFilter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import javax.json.JsonObject;
import org.rhwlab.LMS.RNASeq.merged.BAMs;
import org.rhwlab.LMS.RNASeq.merged.MergedID;
import org.rhwlab.db.MySql;
import org.rhwlab.formats.GFF3;
import org.rhwlab.formats.GFF3.Entry;

/**
 *
 * @author gevirl
 */
// record that a RNASeq job has finished
// args[0] - the RunID that has finished
// args[1] - the program that has finished (STAR,BitSeq,Analysis(BitSeq),Cufflinks)
public class Finisher implements Runnable {
    public Finisher(String runID,String program)throws Exception {
        this.program = program;
        this.runID = runID;
        Properties props = System.getProperties();
        dir = System.getProperty("user.dir");
    }

    @Override
    public void run() {
        java.sql.Timestamp sqlTime = new java.sql.Timestamp(System.currentTimeMillis());
        try {
            if (program.equals("STAR")){
                this.STAR(sqlTime,runID);
            } else if (program.contains("BitSeq")){
                this.BitSeq(sqlTime,runID);
            } else if (program.equals("Analysis")){
                this.Analysis(sqlTime,runID);           
            } else if (program.equals("Cufflinks")){
                this.Cufflinks(sqlTime, runID);
            } else if (program.equals("Monocle")){
                this.Monocle(sqlTime, runID);
            } else if (program.equals("Training")){
                this.Unification(sqlTime,runID);
            } else if (program.equals("Normalize")){
                this.Normalization(sqlTime,runID);
            }else if (program.equals("Archive")){
                this.Archive(sqlTime,runID);
            } else if (program.equals("Seldup")){
                this.Seldup(sqlTime,runID);
            } else if (program.equals("RemoveDup")){
                this.RemoveDup(sqlTime, runID);
            } else if(program.equals("HTSeq-count")){
                this.HTSeq(sqlTime, runID);
            } else if(program.equals("MergeBAMs")){
                this.MergedBAMs(sqlTime, runID);
            }
        } catch (Exception exc){
            exc.printStackTrace();
        }
    }    
    public void setDir(String dir){
        this.dir = dir;
    }
    public void init()throws Exception {
        geneGFF = new GFF3("/net/waterston/vol9/References/WS245/WormBaseGene.gff3");
        transcriptGFF = new GFF3("/net/waterston/vol9/References/WS245/wormbase.gff3");
        BufferedReader reader = new BufferedReader(new FileReader("/net/waterston/vol9/References/WS245/SteepGeneSet"));
        String gene = reader.readLine();
        while (gene != null){
            this.steepGenes.add(gene);
            gene = reader.readLine();
        }
        reader.close();
    }
    public void MergedBAMs(java.sql.Timestamp sqlTime,String runID)throws Exception {
        dir = MergedID.getDirectory(runID);
        JsonObject md5Object = MD5sum.calculate(dir,exts);
        PreparedStatement md5State = MySql.getMySql().getStatement("Update RNASeqMerged set MD5s = ? where MergedID = ?");
        md5State.setString(1, md5Object.toString());
        md5State.setString(2, runID);
        md5State.execute(); 
        
        
 //       BAMs.updateReadCounts(runID);
        MergedID.updateBAMDate(runID,"MergeCompleted");
        updateGridError(dir,"RNASeqMerged","MergedID",runID);
    }    
    public void Seldup(java.sql.Timestamp sqlTime,String runID)throws Exception {
        System.out.printf("Finisher: %s\n", runID);
        dir = MergedID.getDirectory(runID);   
        
        // open the seldup Log.out file        
        BufferedReader reader = new BufferedReader(new FileReader(new File(dir,"seldup.out.log")));
        String line = reader.readLine();
        while (line != null){
            if (line.contains("estimated PCR duplicate rate")){
                String[] tokens = line.trim().split(" ");
                float rate = Float.valueOf(tokens[0]);
                PreparedStatement state = MySql.getMySql().getStatement("Update RNASeqMerged set PCRDupRate = ? where MergedID = ?");
                state.setFloat(1, rate);
                state.setString(2, runID);
                state.execute(); 
                break;
            }
            line = reader.readLine();
        }
        reader.close(); 
        updateGridError(dir,"RNASeqMerged","MergedID",runID);        
    }    
    public void RemoveDup(java.sql.Timestamp sqlTime,String runID)throws Exception {
        System.out.printf("Finisher: %s\n", runID);
        dir = MergedID.getDirectory(runID);
        
        File genomeSave = new File(dir,"genome.save.bam");
        File genomeBam = new File(dir,"Aligned.toGenome.out.bam");
        File genomeBamNoDups = new File(dir,"genome.bam");
        if (genomeBamNoDups.exists()){
            Files.move(genomeBam.toPath(), genomeSave.toPath(),StandardCopyOption.REPLACE_EXISTING);
            Files.move(genomeBamNoDups.toPath(), genomeBam.toPath(),StandardCopyOption.REPLACE_EXISTING);
 //           genomeSave.delete();
        }
        
        File xomeSave = new File(dir,"xome.save.bam");
        File xomeBam = new File(dir,"Aligned.toTranscriptome.out.bam");
        File xomeBamNoDups = new File(dir,"xome.bam");
        if (xomeBamNoDups.exists()){
            Files.move(xomeBam.toPath(), xomeSave.toPath(), StandardCopyOption.REPLACE_EXISTING);
            Files.move(xomeBamNoDups.toPath(), xomeBam.toPath(),StandardCopyOption.REPLACE_EXISTING);
   //         xomeSave.delete();
        }
        
        
        JsonObject md5Object = MD5sum.calculate(dir,exts);
        PreparedStatement md5State = MySql.getMySql().getStatement("Update RNASeqMerged set MD5s = ? , DupsRemoved = ? where MergedID = ?");
        md5State.setString(1, md5Object.toString());
        md5State.setBoolean(2, true);
        md5State.setString(3, runID);
        md5State.execute(); 
        MergedID.updateBAMDate(runID,"DupRemovedCompleted");  
        updateGridError(dir,"RNASeqMerged","MergedID",runID);
    }    
    public void Archive(java.sql.Timestamp sqlTime,String runID)throws Exception {
        JsonObject md5Object = MD5sum.calculate(dir,exts);
        PreparedStatement md5State = MySql.getMySql().getStatement("Update RNASequencing set MD5s = ? where SequencingID = ?");
        md5State.setString(1, md5Object.toString());
        md5State.setString(2, runID);
        md5State.execute(); 
          
    }
    public void Unification(java.sql.Timestamp sqlTime,String runID)throws Exception {
        PreparedStatement state = MySql.getMySql().getStatement("Update RNASeqUnification set UnificationCompleted = ? where UnificationID = ?");
        state.setTimestamp(1, sqlTime);
        state.setString(2, runID);
        state.execute();           
    }
    public void Normalization(java.sql.Timestamp sqlTime,String runID)throws Exception {
        PreparedStatement state = MySql.getMySql().getStatement("Update RNASeqUnification set DeconvolutionCompleted = ? where UnificationID = ?");
        state.setTimestamp(1, sqlTime);
        state.setString(2, runID);
        state.execute();           
    }   

    public void HTSeq(java.sql.Timestamp sqlTime,String runID)throws Exception {
        expressionComplete(sqlTime,runID); 
        System.out.printf("Finisher: %s\n", runID);
        dir = org.rhwlab.LMS.RNASeq.merged.ExpressionID.getDirectory(runID);
        
        File inFile = new File(dir,"HTSeq.out");
        HTSeqFileProcess(inFile);
        
        inFile = new File(dir,"HTSeq.all.out");
        HTSeqFileProcess(inFile);
       
        JsonObject md5Object = MD5sum.calculate(dir,exts);
        PreparedStatement md5State = MySql.getMySql().getStatement("Update RNASeqMergedExpression set MD5s = ? where ExpressionID = ?");
        md5State.setString(1, md5Object.toString());
        md5State.setString(2, runID);
        md5State.execute();  
        updateGridError(dir,"RNASeqMergedExpression","ExpressionID",runID);        
    }
    private void  HTSeqFileProcess(File inFile)throws Exception {
        // make a set of mRNA genes
        TreeSet<String> mRNAGenes = new TreeSet<>();
        for (String id : geneGFF.getIDs()){
            Entry entry = geneGFF.getEntry(id);
            String type = entry.attributes.get("biotype");
            if (type.equals("protein_coding")){
                mRNAGenes.add(id);  // id is wbgene 
            }
        }   
        
        // make the set of steep genes
        TreeSet<String> steepwbGenes = new TreeSet<>();
        BufferedReader reader = new BufferedReader(new FileReader("/net/waterston/vol9/References/WS245/SteepWBGeneSet"));
        String line = reader.readLine();
        while (line != null){
            steepwbGenes.add(line);  // line is the wbgene
            line = reader.readLine();
        }  
        reader.close();
        
        File outFile = new File(inFile.getPath()+".csv");
        File mRNA = new File(inFile.getPath()+".mRNA.csv");
        File steep = new File(inFile.getPath()+".steep.csv");
        HashMap<String,Entry> geneIndex;
        geneIndex = geneGFF.buildAtributeIndex("Name");
        PrintStream stream = new PrintStream(outFile);
        PrintStream mRNAStream = new PrintStream(mRNA);
        PrintStream steepStream = new PrintStream(steep);
        
        reader = new BufferedReader(new FileReader(inFile));
        line = reader.readLine();
        while (line != null){
            String[] tokens = line.split("\t");
            Entry entry = geneIndex.get(tokens[0].replaceAll("\"", "").replaceAll(" ", ""));
            if (entry != null){
                String seqName = entry.attributes.get("sequence_name");
                String locus = entry.attributes.get("locus");
                String wbGene = entry.attributes.get("Name");
                if (locus == null){
                    locus = seqName;
                }
                stream.printf("%s,%s,%s\n",seqName,locus,tokens[1]);
                if (mRNAGenes.contains(wbGene)){
                    mRNAStream.printf("%s,%s,%s\n",seqName,locus,tokens[1]);
                }
                if (steepwbGenes.contains(wbGene)){
                    steepStream.printf("%s,%s,%s\n",seqName,locus,tokens[1]);
                }                
            }
            line = reader.readLine();
        }
        stream.close();
        reader.close();        
    }

    public void STAR(java.sql.Timestamp sqlTime,String runID)throws Exception {
        System.out.printf("Finisher: %s\n", runID);
        dir = AlignmentID.getDirectory(runID);

        // open the GenomicLog.out file and look for ERROR        
        StringBuilder builder = new StringBuilder();
        boolean error = false;
        BufferedReader reader = new BufferedReader(new FileReader(new File(dir,"GenomeLog.out")));
        String line = reader.readLine();
        while (line != null){

            if (line.toLowerCase().contains("error")){
                builder.append("Genome: ");
                builder.append(line);
                error = true;
            }
            line = reader.readLine();
        }
        reader.close();
        // open the GenomicLog.out file and look for ERROR        
        builder = new StringBuilder();
        reader = new BufferedReader(new FileReader(new File(dir,"XomeLog.out")));
        line = reader.readLine();
        while (line != null){

            if (line.toLowerCase().contains("error")){
                builder.append("Xome: ");
                builder.append(line);
                error = true;
            }
            line = reader.readLine();
        }
        reader.close();
        
        if (error){
            PreparedStatement errorState = MySql.getMySql().getStatement("Update RNASeqIndividualAlign set Notes = ? where AlignmentID = ?");
            errorState.setString(1, builder.toString());
            errorState.setString(2, runID);
            errorState.execute();
        } else {
            System.out.printf("Finisher: no error detected in Log file\n");
            PreparedStatement errorState = MySql.getMySql().getStatement("Update RNASeqIndividualAlign set Notes = null where AlignmentID = ?");
            errorState.setString(1, runID);
            errorState.execute();            
        } 
        
        // save the genomic log file in the db
        StringBuilder logBuilder = new StringBuilder();
        reader = new BufferedReader(new FileReader(new File(dir,"GenomeLog.final.out")));
        line = reader.readLine();
        while (line != null){
            logBuilder.append(line);
            logBuilder.append("\n"); 
            line = reader.readLine();
        }
        reader.close();
        PreparedStatement logState = MySql.getMySql().getStatement("Update RNASeqIndividualAlign set GenomicLogFile = ? where AlignmentID = ?");
        logState.setString(1, logBuilder.toString());
        logState.setString(2, runID);
        logState.execute();
        
        // save the xomic log file in the db
        logBuilder = new StringBuilder();
        reader = new BufferedReader(new FileReader(new File(dir,"XomeLog.final.out")));
        line = reader.readLine();
        while (line != null){
            logBuilder.append(line);
            logBuilder.append("\n"); 
            line = reader.readLine();
        }
        reader.close();
        logState = MySql.getMySql().getStatement("Update RNASeqIndividualAlign set XomicLogFile = ? where AlignmentID = ?");
        logState.setString(1, logBuilder.toString());
        logState.setString(2, runID);
        logState.execute();        
        
        System.out.printf("Finisher: updating read counts\n");

        File bamFile = new File(dir,"Aligned.toTranscriptome.out.bam");
        org.rhwlab.alignment.BAM bam = new org.rhwlab.alignment.BAM(bamFile);
        String species = AlignmentID.getSpecies(runID);
        bam.countRibosomalReads(species);
        int count = bam.getRibosomalReadCount();
        PreparedStatement countState = MySql.getMySql().getStatement("Update RNASeqIndividualAlign set rRNAReads = ? where AlignmentID = ?");
        countState.setInt(1, count);
        countState.setString(2, runID);
        countState.execute();        
        
        // put a track description on any wig file that do not have it already
        File[] wigs = new File(dir).listFiles(new FilenameFilter(){
            @Override
            public boolean accept(File dir, String name) {
                return name.endsWith(".wig");
            }
        });
        if (wigs == null){
            System.out.printf("wigs is null, dir: %s\n",dir);
        }
        else if (wigs.length >0){
            for (File wig : wigs){
                System.out.printf("Finisher: fixing wig %s\n", wig);
                reader = new BufferedReader(new FileReader(wig));
                line = reader.readLine();
                if (!line.startsWith("track")){
                    File wigOutFile = new File("temp.out");
                    PrintStream wigOut = new PrintStream(wigOutFile); 
                    String desc = String.format("%s%s",runID,wig.getName().substring(0,wig.getName().lastIndexOf(".out.wig")));
                    wigOut.printf("track type=wiggle_0 name=\"%s\" description=\"%s\" visibility=2 color=50,50,100 yLineMark=0.0 yLineOnOff=on autoScale=off alwaysZero=on maxHeightPixels=128:32:11 viewLimits=0:35 priority=1\n",desc,desc);
                    while (line != null){
                        wigOut.println(line);
                        line = reader.readLine();
                    }
                    wigOut.flush();
                    wigOut.close();
                    Files.move(wigOutFile.toPath(),wig.toPath(),StandardCopyOption.REPLACE_EXISTING);                
                }
                reader.close();
            }
        }
        // update all the md5sums
        
        JsonObject md5Object = MD5sum.calculate(dir,exts);
        PreparedStatement md5State = MySql.getMySql().getStatement("Update RNASeqIndividualAlign set MD5s = ? where AlignmentID = ?");
        md5State.setString(1, md5Object.toString());
        md5State.setString(2, runID);
        md5State.execute();  
        AlignmentID.updateAlignDates(dir, runID);
        updateGridError(dir,"RNASeqIndividualAlign","AlignmentID",runID);
    }
    static public void updateMD5sums(String directory,String table,String field,String id)throws Exception {
        JsonObject md5Object = MD5sum.calculate(directory,Finisher.exts);
        String cmd = String.format("Update %s set MD5s = ? where %s = ?",table,field);
        PreparedStatement md5State = MySql.getMySql().getStatement(cmd);
        md5State.setString(1, md5Object.toString());
        md5State.setString(2, id);
        md5State.execute();         
    }
    static public void updateGridError(String directory,String table,String field,String id)throws Exception {
        JsonObject json = GridSubmitOld.latestErrorFiles(new File(directory));
        String cmd = String.format("Update %s set GridErrorFiles = ? where %s = ?",table,field);
        PreparedStatement md5State = MySql.getMySql().getStatement(cmd);
        md5State.setString(1, json.toString());
        md5State.setString(2, id);
        md5State.execute();         
    }
    public void alignmentCounts(String runID)throws Exception {
        int totalCount =0;
        int rRNACount = 0;
        int gfpCount = 0;
        int cherryCount=0;
        int mitoCount = 0;
        File alignDir = new File(dir);
        File[] files = alignDir.listFiles();
        for (File file : files){
            if (file.getName().endsWith(".bam")){
                SamReader samReader = SamReaderFactory.makeDefault().open(file);
                SAMRecordIterator iter = samReader.iterator();
                
                
                while (iter.hasNext()){
                    
                    try {
                        SAMRecord record = iter.next();
                        ++totalCount;
                        String ref = record.getReferenceName();
                        if (ref.equals("GFP")){
                            ++gfpCount;
                        }
                        if (ref.equals("mCherry")){
                            ++cherryCount;
                        }
                        if (ref.equals("chrM")||ref.equals("MtDNA")){
                            ++mitoCount;
                        }
                        else {
                            for (String mito : mitoTranscripts){
                                if (mito.equals(ref)){
                                    ++mitoCount;
                                    break;
                                }
                            }
                        }
                        for (String rRNA : rRNAs){
                            if (rRNA.equals(ref)){
                                ++rRNACount;
                                break;
                            }
                        }
                        for (String rRNA : rRNAfly){
                            if (rRNA.equals(ref)){
                                ++rRNACount;
                                break;
                            }
                        }  
                    } catch (Exception exc){}
                }
                samReader.close();

                PreparedStatement state = MySql.getMySql().getStatement(
                        "Update RNASeqAlignment set TotalAlignments = ? , rRNAAlignments= ? , gfpAlignments = ? , cherryAlignments = ? , mitoAlignments = ? where AlignmentID = ?");
                state.setInt(1, totalCount);
                state.setInt(2, rRNACount);
                state.setInt(3, gfpCount);
                state.setInt(4, cherryCount);
                state.setInt(5, mitoCount);
                state.setString(6, runID);
                state.execute(); 
                break;
            }
        }

       
    }
    private void expressionComplete(java.sql.Timestamp sqlTime,String runID)throws Exception {
        PreparedStatement state = MySql.getMySql().getStatement("Update RNASeqMergedExpression set ExpressionCompleted = ? where ExpressionID = ?");
        state.setTimestamp(1, sqlTime);
        state.setString(2, runID);
        state.execute();        
    }
    public void Cufflinks(java.sql.Timestamp sqlTime,String runID)throws Exception {
        expressionComplete(sqlTime,runID); 
    }

    // construct a map of the transripts with identical sequence for the reference used in the given expression run
    public Map<String,List<String>> IdenticalTranscripts(String runID)throws Exception {
        String reference = ExpressionID.getReference(runID);
        File seqFile = new File(AlignmentReference.getSequenceFile(reference));
        File identFile = new File(seqFile.getParent(),"IdenticalSequenceTranscripts");
        TreeMap<String,List<String>> ret = new TreeMap<>();
        BufferedReader reader = new BufferedReader(new FileReader(identFile));
        String line = reader.readLine();
        while (line != null){
            String[] tokens = line.split("\t");
            ArrayList<String> list = new ArrayList<>();
            for (int i=1 ; i<tokens.length ; ++i){
                list.add(tokens[i]);
            }
            ret.put(tokens[0], list);
            line = reader.readLine();
        }
        reader.close();
        return ret;
    }
     // convert relative abundance to rpkm - returns the output filename
    private File toRPKM(File file,String[] transcripts,double[] len)throws Exception {
        double f = Math.pow(10.0, 9.0);
        File ret = new File(file.getParent(),file.getName().replace("thetaMeans", "transcript.rpkm"));
        PrintStream stream = new PrintStream(ret);
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();
        int i;
        while (line != null){
            if (line.charAt(0)!='#'){
                String []tokens = line.split(" ");
                i = Integer.valueOf(tokens[0])-1;
                double theta = Double.valueOf(tokens[1]);
                double v = (theta*f)/(len[i]);  // rpkm
                String tr = transcripts[i];
                Entry transcriptEntry = this.transcriptGFF.getEntry(tr);
                if (transcriptEntry == null){
                    stream.printf("%s %s %s %f\n", tr,tr,tr,v);   
                }else {
                    String type = this.type(transcriptEntry);
                    String cgc = cgc(transcriptEntry);                    
                    stream.printf("%s %s %s %f\n", tr,cgc,type,v);
                }
            }
            line = reader.readLine();
        }
        reader.close();
        stream.close();
        return ret;
    }
/*    
    // convert rpkm to tpm - returns the output filename
    private File toTPM(File rpkmFile)throws Exception {
        double f = Math.pow(10.0, 6.0);
        File ret = new File(rpkmFile.getParent(),rpkmFile.getName().replace("rpkm", "tpm"));
        
        // compute the conversion denominator
        BufferedReader reader = new BufferedReader(new FileReader(rpkmFile));
        String line = reader.readLine();
        double d = 0.0;
        while (line != null){
            if (line.charAt(0)!='#'){
                String []tokens = line.split(" ");
                double rpkm = Double.valueOf(tokens[3]);
                d = d + rpkm;
            }
            line = reader.readLine();
        }
        reader.close();
        
        // do the conversion to tpm
        PrintStream stream = new PrintStream(ret);
        reader = new BufferedReader(new FileReader(rpkmFile));
        line = reader.readLine();
        while (line != null){
            if (line.charAt(0)!='#'){
                String []tokens = line.split(" ");
                double rpkm = Double.valueOf(tokens[3]);
                double v = (rpkm*f)/(d);  // tpm
                stream.printf("%s %s %s %f\n",tokens[0],tokens[1],tokens[2],v);
            }
            line = reader.readLine();
        }
        reader.close();
        stream.close();
        return ret;
    }
    */
    private void toReportFiles(File allTranscripts)throws Exception {
        File mRNAFile = filter_mRNA(allTranscripts);
        toCSV(mRNAFile);
        File geneFile = toGenes(mRNAFile);
        toSteepGenes(geneFile);
    }
    private File filter_mRNA(File file)throws Exception {
        File outFile = new File(file.getPath().replace("transcript", "mRNA.transcript"));
        PrintStream stream = new PrintStream(outFile);
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();
        while (line != null){
            if (line.charAt(0)!='#'){
                String[] tokens = line.split(" ");
                Entry transcriptEntry = this.transcriptGFF.getEntry(tokens[0]);
                if (transcriptEntry == null){
                    System.out.printf("not found: %s\n",line);
                }
                String type = this.type(transcriptEntry);
                if (type.equals("mRNA")){
                    stream.println(line);
                } else {
//                    System.out.println(line);
                }
            } else {
                stream.println(line);
            }             
            line = reader.readLine();
        }  
        reader.close();
        stream.close();
        return outFile;
    }
    private File toGenes(File file)throws Exception {
        TreeMap<String,String> geneTypes = new TreeMap<>();
        TreeMap<String,Double> geneSums = new TreeMap<>();
        File outFile = new File(file.getPath().replace("transcript", "gene"));
        PrintStream stream = new PrintStream(outFile);
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();
        while (line != null){
            if (line.charAt(0)!='#'){
                String[] tokens = line.split(" ");
                double v = Double.valueOf(tokens[3]);
                Entry transcriptEntry = this.transcriptGFF.getEntry(tokens[0]);
                String wb = this.wbGene(transcriptEntry);
                String type = this.type(transcriptEntry);
                geneTypes.put(wb,type);
                Double sums = geneSums.get(wb);
                if (sums == null){
                    sums = v;
                }else {
                    sums = sums + v;
                }
                geneSums.put(wb, sums);                
            } else {
                stream.println(line);
            }
            line = reader.readLine();
        }
        
        for (String wb : geneSums.keySet()){
            Entry e = geneGFF.getEntry(wb);
            if (e == null){
                int asdhfisdh=0;
            }
            String seq = e.attributes.get("sequence_name");
            Entry geneEntry = this.geneGFF.getEntry(wb);
            String cgc = geneEntry.attributes.get("locus");
            if (cgc == null){
                cgc = seq;
            }
            stream.printf("%s %s %s %f\n",seq,cgc,geneTypes.get(wb),geneSums.get(wb));
        }
        
        stream.close();
        reader.close();
        return outFile;        
    }
    private File toSteepGenes(File file)throws Exception {
        File outFile = new File(file.getPath().replace("mRNA.gene", "steep.mRNA.gene"));
        PrintStream stream = new PrintStream(outFile);
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();
        while (line != null){
            if (line.charAt(0)!='#'){
                String[] tokens = line.split(" ");
                if (this.steepGenes.contains(tokens[0])){
                    stream.println(line);
                }                
            } else {
                stream.println(line);
            }
            line = reader.readLine();
        }  
        stream.close();
        reader.close();
        return outFile;        
    }
    private File toCSV(File file)throws Exception {
        File outFile = new File(file.getPath()+".csv");
        PrintStream stream = new PrintStream(outFile);
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String line = reader.readLine();
        while (line != null){
            if (line.charAt(0)!='#'){
                line = line.replaceAll(" ",",");
            } 
            stream.println(line);
            line = reader.readLine();
        }
        stream.close();
        reader.close();
        return outFile;
    }
    public void BitSeq(java.sql.Timestamp sqlTime,String runID)throws Exception {
        File thetaFile = new File(dir,runID+".thetaMeans");

        // read in the transcript data file
        BufferedReader reader = new BufferedReader(new FileReader(new File(dir,"data.tr")));       
        // process the header
        String line = reader.readLine();
        String[] tokens = line.split(" ");
        int n = Integer.valueOf(tokens[2]);
        String[] transcripts = new String[n];  // transcript ids (sequence names)
        double[] len = new double[n];  // length of transcriptin bases
        
        // make an array of transcript names and lengths
        for (int i=0 ; i<n ; ++i){
            line = reader.readLine();
            tokens = line.split(" ");
            transcripts[i] = tokens[1];
            if (transcripts[i].contains(":")){
                transcripts[i] = transcripts[i].split(":")[1];
            }
            if (tokens.length == 4){
                len[i] = Double.valueOf(tokens[3]);
            } else {
                len[i] = Double.valueOf(tokens[2]);
            }
        }
        reader.close(); 
        
        // read the histone names
        reader = new BufferedReader(new FileReader("/net/waterston/vol9/References/WS245/HistoneTranscripts"));
        TreeSet<String> histoneTranscripts = new TreeSet<>();
        line = reader.readLine();
        while (line != null){
            histoneTranscripts.add(line);
            line = reader.readLine();
        }
        reader.close();
        
        // make a list of mRNA transcripts
        TreeSet<String> mRNATranscripts = new TreeSet<>();
        for (String transcript : transcripts){
            Entry transcriptEntry = this.transcriptGFF.getEntry(transcript);
            if (transcriptEntry == null){
                System.out.printf("not found: %s\n",line);
            }
            String type = this.type(transcriptEntry);
            if (type.equals("mRNA")){   
                mRNATranscripts.add(transcript);
            }         
        }
        
        // make a list if steep transcripts
        reader = new BufferedReader(new FileReader("/net/waterston/vol9/References/WS245/SteepTranscriptSet"));
        TreeSet<String> steepTranscripts = new TreeSet<>();
        line = reader.readLine();
        while (line != null){
            steepTranscripts.add(line);
            line = reader.readLine();
        }
        reader.close(); 
        
        for (File file : new File(dir).listFiles()){
            if (file.getName().endsWith("tpm.csv")){
                file.delete();
            }
        }
        
        File allFile = toTPM(thetaFile,transcripts,len,null,null,"all.transcript");
        toGenes(allFile);
        File  mRNAFile = toTPM(thetaFile,transcripts,len,mRNATranscripts,null,"mRNA.transcript");
        toGenes(mRNAFile);
        File noHistFile = toTPM(thetaFile,transcripts,len,mRNATranscripts,histoneTranscripts,"mRNA.nohistone.transcript");
        toGenes(noHistFile);
        File histFile = toTPM(thetaFile,transcripts,len,histoneTranscripts,null,"histone.transcript");
        toGenes(histFile);
        File steepFile = toTPM(thetaFile,transcripts,len,steepTranscripts,null,"steep.transcript");
        toGenes(steepFile);
 /*       
        File riboZero = toRPKM(thetaFile,transcripts,len);
        WormPolyA polyA = new WormPolyA(riboZero);
        polyA.run();
        File polyAFile = polyA.getOutputFile();
        toReportFiles(riboZero);
        toReportFiles(polyAFile);
        
        File tpmRiboZero = toTPM(riboZero);
        File tpmPolyAFile = toTPM(polyAFile);
        toReportFiles(tpmRiboZero);
        toReportFiles(tpmPolyAFile);
*/
        
        // update all the md5sums
        JsonObject md5Object = MD5sum.calculate(dir,exts);
        PreparedStatement md5State = MySql.getMySql().getStatement("Update RNASeqMergedExpression set MD5s = ? where ExpressionID = ?");
        md5State.setString(1, md5Object.toString());
        md5State.setString(2, runID);
        md5State.execute();
        expressionComplete(sqlTime,runID); 
        updateGridError(dir,"RNASeqMergedExpression","ExpressionID",runID);         
    }
    
    private File toTPM(File thetaFile,String[] thetaTranscripts,double[] thetaLens,Set<String> includes,Set<String> excludes,String label) throws Exception{
        double f = Math.pow(10.0, 6.0);
        File tpmFile = new File(thetaFile.getParent(),thetaFile.getName().replace("thetaMeans",label+".tpm.csv"));
        File tpmDenomFile = new File(tpmFile.getParent(),tpmFile.getName().replace(".tpm.csv",".tpm.denom.csv"));
        PrintStream stream = new PrintStream(tpmFile);
        
        BufferedReader reader = new BufferedReader(new FileReader(thetaFile));
        String line = reader.readLine();
        double sum = 0.0;
        while (line != null){
            if (line.charAt(0) != '#'){
                String[] tokens = line.split(" ");
                int i = Integer.valueOf(tokens[0])-1;  // index into array of theta transcripts
                double theta = Double.valueOf(tokens[1]);
                if (excludes == null || !excludes.contains(thetaTranscripts[i]) ){
                    if (includes == null || includes.contains(thetaTranscripts[i]) ){
                        sum = sum + theta/thetaLens[i];
                    }
                }
            }
            line = reader.readLine();
        }
        reader.close();
        PrintStream denomStream = new PrintStream(tpmDenomFile);
        denomStream.println(sum);
        denomStream.close();
        
        reader = new BufferedReader(new FileReader(thetaFile));
        line = reader.readLine();
        while (line != null){
            if (line.charAt(0) != '#'){
                String[] tokens = line.split(" ");
                int i = Integer.valueOf(tokens[0])-1;  // index into array of theta transcripts
                double theta = Double.valueOf(tokens[1]);
                if (excludes == null || !excludes.contains(thetaTranscripts[i]) ){
                    if (includes == null || includes.contains(thetaTranscripts[i]) ){
                        Entry transcriptEntry = this.transcriptGFF.getEntry(thetaTranscripts[i]);
                        String type = this.type(transcriptEntry);
                        String cgc = cgc(transcriptEntry);                    
                        stream.printf("%s %s %s %f\n", thetaTranscripts[i],cgc,type,f*(theta/thetaLens[i])/sum);  
                    }
                }
            }
            line = reader.readLine();
        }             
        reader.close();       
        return tpmFile;
    }
   
    // find an  expression calculation(RiboZero reference) that was done on the same set of sequencing files as the given runID
    private String findRiboZeroExpression(String runID)throws Exception {
        Set<String> seqIDs = ExpressionID.getSequencingIDs(runID);
        String library = ExpressionID.getLibraryID(runID);
        String reference = ExpressionID.getReference(runID);
        String version = AlignmentReference.getVersion(reference);
        String sql = String.format
        ("Select ExpressionID from RNAExpression where LibraryID = \'%s\' and Reference =\'%s\'",library,version+"XomeRiboZero");
        ResultSet rs = MySql.getMySql().execute(sql);
        while (rs.next()){
            String ribo0ExpID = rs.getString("ExpressionID");
            Set<String> ribo0SeqIDs = ExpressionID.getSequencingIDs(ribo0ExpID);
            if (seqIDs.equals(ribo0SeqIDs)){
                return ribo0ExpID;
            }
        }
        return null;
    }
    static public String[] getHistoneNames()throws Exception {
        BufferedReader reader = new BufferedReader(new FileReader("/net/waterston/vol9/References/WS245/HistoneSequenceNames"));
        ArrayList<String> list = new ArrayList<>();
        String line = reader.readLine();
        while (line != null){
            list.add(line);
            line = reader.readLine();
        }
        reader.close();
        return list.toArray(new String[0]);
    }
    private void analysisComplete(java.sql.Timestamp sqlTime,String runID)throws Exception {
        PreparedStatement state = MySql.getMySql().getStatement("Update RNASeqAnalysis set Completed = ? where AnalysisID = ?");
        state.setTimestamp(1, sqlTime);
        state.setString(2, runID);
        state.execute();        
    }
    public void Monocle(java.sql.Timestamp sqlTime,String runID)throws Exception {
        analysisComplete(sqlTime,runID);
    }
    // analysis by BitSeq completed
    public void Analysis(java.sql.Timestamp sqlTime,String runID)throws Exception {
        analysisComplete(sqlTime,runID);

        String reference = AnalysisID.getReference(runID);
        String sequenceFile = AlignmentReference.getSequenceFile(reference);
        BufferedReader reader = new BufferedReader(new FileReader(sequenceFile));
        ArrayList<String> transcripts = new ArrayList<>();
        String transcript = reader.readLine();
        while (transcript != null){
            if (transcript.charAt(0)=='>'){
                transcripts.add(transcript.substring(1).trim());
            }
            transcript = reader.readLine();
        }
        reader.close();

        reader = new BufferedReader(new FileReader(new File(dir,"data.pplr")));
        PrintStream stream = new PrintStream(new File(dir,"transcript.pplr"));
        String line = reader.readLine();
        int i=0;
        while (line != null){
            if (line.charAt(0)=='#'){
                stream.println(line);
            } else {
                String trans = transcripts.get(i);
                Entry transcriptEntry = this.transcriptGFF.getEntry(trans);
                String cgc = this.cgc(transcriptEntry);
                String type = this.type(transcriptEntry);
                String seq = this.seqName(transcriptEntry);
                stream.printf("%s %s %s %s %s\n",trans,seq,cgc,type,line);
                ++i;
            }
            line = reader.readLine();
        }
        reader.close();
        stream.flush();
        stream.close();        
    }
    private String wbGene(Entry transcriptEntry){
        return transcriptEntry.attributes.get("Parent").split(":")[1];
    }
    private String type(Entry transcriptEntry){
        String type = transcriptEntry.type;
        if (transcriptEntry.source.contains("transposon")){
            if (transcriptEntry.type.contains("pseudogenic")){
                type = "pseudogenic_transposon";
            }else {
                type = "transposon";
            }
        }  
        return type;
    }
    private String cgc(Entry transcriptEntry){
        String wbGene = transcriptEntry.attributes.get("Parent").split(":")[1];
        Entry geneEntry = this.geneGFF.getEntry(wbGene);

        String cgc = null;
        if (geneEntry != null){
            cgc = geneEntry.attributes.get("locus");
            if (cgc == null) {
                cgc = geneEntry.attributes.get("sequence_name");
            } 
        }else {
            cgc = transcriptEntry.attributes.get("Name");
        }
        return cgc;
    }
    private String seqName(Entry transcriptEntry){
        String wbGene = transcriptEntry.attributes.get("Parent").split(":")[1];
        Entry geneEntry = this.geneGFF.getEntry(wbGene);

        String seq = null;
        if (geneEntry != null){
            seq = geneEntry.attributes.get("sequence_name");

        }else {
            seq = transcriptEntry.attributes.get("Name");
        }
        return seq;
    }    
    static public void main(String[] args)throws Exception {

        Finisher finisher = new Finisher(args[0],args[1]);
        finisher.init();
        finisher.run();
/*        
        HashMap<String,String> locusMap = new HashMap<>();
        HashMap<String,String> typeMap = new HashMap<>();
        HashMap<String,String> wbGeneMap = new HashMap<>();
        BufferedReader reader = new BufferedReader(new FileReader("/net/waterston/vol9/References/WS245/wormbase.gff3"));
        String line = reader.readLine();
        while (line != null){
            String[] gffTokens = line.split("\t");
            String[] attTokens = gffTokens[8].split(";");
            HashMap<String,String> valueMap = new HashMap<>();
            for (String att : attTokens){
                String[] values = att.split("=");
                valueMap.put(values[0],values[1]);
            }
            
            locusMap.put(valueMap.get("Name"),valueMap.get("locus"));
            wbGeneMap.put(valueMap.get("Name"),valueMap.get("Parent").split(":")[1]);
            if (gffTokens[1].equals("WormBase")){
                typeMap.put(valueMap.get("Name"), gffTokens[2]);
            }else if (gffTokens[2].equals("mRNA")){
                typeMap.put(valueMap.get("Name"), "transposon");
            }else {
                typeMap.put(valueMap.get("Name"), "pseudogenic_transposon");
            }
            line = reader.readLine();
        }
        reader.close();
 */       

//        System.out.println(sqlTime.toString());
    }
    Set<String> steepGenes = new TreeSet<>();
    GFF3 transcriptGFF;
    GFF3 geneGFF;
    
    String dir;
    String program;
    String runID;
    static String[] mitoTranscripts = {"MTCE.3","MTCE.4","MTCE.11","MTCE.12","MTCE.16","MTCE.21","MTCE.23","MTCE.25","MTCE.26","MTCE.31","MTCE.34","MTCE.35"};
    
    static String[] rRNAs = {"F31C3.7","F31C3.8","F31C3.9","F31C3.11","MTCE.7","MTCE.33","T09B4.23","T27C5.18","Y102A5D.5","Y102A5D.6","Y102A5D.7",
        "Y102A5D.8","Y102A5D.9","Y102A5D.10","Y102A5D.11","Y102A5D.12","ZK218.12","ZK218.16","ZK218.17","ZK218.18","ZK218.19","ZK218.20"};
    
    static String[] exts = {".gz",".bw",".bam",".csv"};
    static String[] rRNAfly = {
        "FBtr0086345",
"FBtr0086346",
"FBtr0086347",
"FBtr0086349",
"FBtr0086350",
"FBtr0086351",
"FBtr0086352",
"FBtr0086353",
"FBtr0086354",
"FBtr0086356",
"FBtr0086357",
"FBtr0086358",
"FBtr0086359",
"FBtr0086360",
"FBtr0086361",
"FBtr0086362",
"FBtr0086364",
"FBtr0086365",
"FBtr0086366",
"FBtr0086367",
"FBtr0086368",
"FBtr0086369",
"FBtr0086370",
"FBtr0086371",
"FBtr0086372",
"FBtr0086373",
"FBtr0086374",
"FBtr0086375",
"FBtr0086376",
"FBtr0086377",
"FBtr0086378",
"FBtr0086379",
"FBtr0086380",
"FBtr0086381",
"FBtr0086382",
"FBtr0086383",
"FBtr0086384",
"FBtr0086385",
"FBtr0086386",
"FBtr0086387",
"FBtr0086388",
"FBtr0086389",
"FBtr0086390",
"FBtr0086391",
"FBtr0086392",
"FBtr0086393",
"FBtr0086394",
"FBtr0086395",
"FBtr0086396",
"FBtr0086397",
"FBtr0086398",
"FBtr0086399",
"FBtr0086400",
"FBtr0086401",
"FBtr0086402",
"FBtr0086403",
"FBtr0086404",
"FBtr0086405",
"FBtr0086406",
"FBtr0086407",
"FBtr0086409",
"FBtr0086410",
"FBtr0086411",
"FBtr0086412",
"FBtr0086413",
"FBtr0086414",
"FBtr0086415",
"FBtr0086416",
"FBtr0086417",
"FBtr0086418",
"FBtr0086419",
"FBtr0086420",
"FBtr0086421",
"FBtr0086422",
"FBtr0086423",
"FBtr0086424",
"FBtr0086425",
"FBtr0086426",
"FBtr0086427",
"FBtr0086428",
"FBtr0086429",
"FBtr0086430",
"FBtr0086431",
"FBtr0086432",
"FBtr0086433",
"FBtr0086434",
"FBtr0086435",
"FBtr0086436",
"FBtr0086437",
"FBtr0086438",
"FBtr0086439",
"FBtr0086440",
"FBtr0086441",
"FBtr0086442",
"FBtr0086443",
"FBtr0086444",
"FBtr0114187",
"FBtr0114194",
"FBtr0114196",
"FBtr0114198",
"FBtr0114201",
"FBtr0114202",
"FBtr0114205",
"FBtr0114206",
"FBtr0114207",
"FBtr0114208",
"FBtr0114209",
"FBtr0114210",
"FBtr0114211",
"FBtr0114214",
"FBtr0114216",
"FBtr0114218",
"FBtr0114222",
"FBtr0114223",
"FBtr0114228",
"FBtr0114249",
"FBtr0114253",
"FBtr0346874",
"FBtr0114257",
"FBtr0114259",
"FBtr0114261",
"FBtr0114262",
"FBtr0114274",
"FBtr0114275",
"FBtr0114280",
"FBtr0114283",
"FBtr0114284",
"FBtr0114285",
"FBtr0114286",
"FBtr0346901",
"FBtr0100888",
"FBtr0100890",
"FBtr0343433",
"FBtr0346875",
"FBtr0346876",
"FBtr0346878",
"FBtr0346879",
"FBtr0346880",
"FBtr0346882",
"FBtr0346883",
"FBtr0346884",
"FBtr0346885",
"FBtr0346873",
"FBtr0346877",
"FBtr0346881",
"FBtr0346887",
"FBtr0346898"};
    
}