/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.LMS.RNASeq;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;

/**
 *
 * @author gevirl
 */
// convert RiboZero transcript and gene expression values to comparable PolyA values
// applies a conversion factor to the non-histones
// histones are not output
public class WormPolyA implements Runnable {
    File inputFile;
    File outFile;
//    static TreeSet<String> histones;
    
    public WormPolyA(File rpkmFile){
        inputFile = rpkmFile;
    }

    @Override
    public void run() {
        try {
/*            
            if (histones == null){
                // get the list of worm histones
                String[] names  = Finisher.getHistoneNames();
                histones = new TreeSet<>();
                for (String name : names){
                    histones.add(name);
                }
            }
*/
            // read the histone and total read counts
            int totalReadCount=1;
            int histoneReadCount=0;
            File readsOut = new File(inputFile.getParent(),"reads.out");
            BufferedReader reader = new BufferedReader(new FileReader(readsOut));
            String line = reader.readLine();
            while (line != null){
                String[] tokens = line.split(":");
                if (tokens[0].contains("Total")){
                    totalReadCount = Integer.valueOf(tokens[1].trim());
                }
                if (tokens[0].contains("Histone")){
                    histoneReadCount = Integer.valueOf(tokens[1].trim());
                }                
                line = reader.readLine();
            }
            reader.close();
            double f = (double)totalReadCount/(double)(totalReadCount-histoneReadCount);
            
            outFile = new File(inputFile.getPath().replace(".rpkm", ".polyA.rpkm"));

            PrintStream stream = new PrintStream(outFile);
            stream.printf("# TotalReads=%d HistoneReads=%d Conversion=%f\n", totalReadCount,histoneReadCount,f);
            
            reader = new BufferedReader(new FileReader(inputFile));
            line = reader.readLine();
            while (line != null){
                String[] tokens = line.split(",| |\t");
                String name = tokens[0];
                double value = Double.valueOf(tokens[tokens.length-1]);
//                if (!histones.contains(name)){
                    tokens[tokens.length-1] = String.format("%.6f",f*value);
//                }
                writeLine(tokens,stream);
                line = reader.readLine();
            }
            reader.close();
            stream.close();
        } catch (Exception exc){
            exc.printStackTrace();
        }
    }
    
    private void writeLine(String[] tokens,PrintStream stream){
        stream.print(tokens[0]);
        for (int i=1 ; i<tokens.length ; ++i){
            stream.printf(" %s",tokens[i]);
        }
        stream.println();
    }
    public File getOutputFile(){
        return outFile;
    }
    
    static public void main(String[] args) throws Exception {
        File file = new File("/net/waterston/vol9/RNASeq/alhigdon/RB884_DS3/RB884_80_1_1/Merged_10/RiboZeroBitSeq1.16.0",
        "RiboZeroBitSeq1.16.0_RB884_80_1_1_Merged10.mRNA.gene.rpkm");
        WormPolyA polyA = new WormPolyA(file);
        polyA.run();
        
    }
}
