/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.RNASeq;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.stat.StatUtils;
import org.jdom2.Element;
import org.rhwlab.db.MySql;

/**
 *
 * @author gevirl
 */


public class ExperimentDataSource {
    // contruct the source from a simple data file
    public ExperimentDataSource(File file)throws Exception {
        this.name = file.getName();
        this.dataMap = new HashMap<>();
        
        BufferedReader reader = new BufferedReader(new FileReader(file));
        String[] headers = reader.readLine().split("\t|,| ");
        this.times = new double[headers.length-1];
        for (int i=0 ; i<times.length ; ++i){
            times[i] = Double.valueOf(headers[i+1]);
        }
        String line = reader.readLine();
        while (line != null){
            String[] tokens = line.split("\t|,| ");
            double[] data = new double[tokens.length-1];
            for (int i=0 ; i<data.length ; ++i){
                data[i] = Double.valueOf(tokens[i+1]);
            }
            this.dataMap.put(tokens[0], data);
            line = reader.readLine();
        }
        reader.close();
        
        this.features = dataMap.keySet();
    }
// makes an experiment data source
// transcripts list can be empty in which case all the features from the data file are included
// if the transcript list is not empty, then only those transcripts are included in the data source
// a data file can be provided in the constructor, but it is overriden by the file attribute
// if no file is provided by either above route, then the data is found in the database
// if a file is provided, then there must be a columns element to identify the columns to use from the data file
// the transcript list is ignored if the data is coming from the database (no file specified)    
    public ExperimentDataSource(Element experiment,Set<String> features,String fileGlobal)throws Exception {
        this.features = features;
        
        String file = fileGlobal;
        String fileAtt = experiment.getAttributeValue("file");
        if (fileAtt != null){
            file = fileAtt;   // override the gloabal file specification
        }
        name = experiment.getAttributeValue("name");
        
        // get the times to use
        timesStr = experiment.getChildText("Times");
        String[] tokens = timesStr.split(",");
        times = new double[tokens.length];
        for (int i=0 ; i<tokens.length;++i){
            times[i] = Double.valueOf(tokens[i].trim());
        }
        
        String featureClassName = experiment.getAttributeValue("featureClass");
        Feature feature = null;
        if (featureClassName != null){
            Class featureClass = Class.forName(featureClassName);
            feature = (Feature)featureClass.newInstance();
        }
        
        
        if (file != null ){ // read a data file instead of database
            Element colEle = experiment.getChild("Columns");
            String offsetStr = colEle.getAttributeValue("offset");
            int offset = 0;
            if (offsetStr != null){
                offset = Integer.valueOf(offsetStr);
            }
            String[] colStr = experiment.getChildText("Columns").split(",");
            int[] columns = new int[colStr.length];
            for (int i=0 ; i<columns.length;++i){
                columns[i] = offset + Integer.parseInt(colStr[i].trim());
            }
            
            dataMap = new HashMap<String,double[]>();
            String sep = " ";
            if (file.endsWith(".tab")) {
                sep = "\t";
            } else if (file.endsWith(".csv")){
                sep = ",";
            }
            BufferedReader reader = new BufferedReader(new FileReader(file));
 //           String line = reader.readLine();  // skip the header line
            String line = reader.readLine();
            while (line != null){
                tokens = line.split(sep);
                String featureName = tokens[0];
                if (feature != null){
                    featureName = feature.getName(tokens);
                }
                if (features==null || features.isEmpty() || features.contains(tokens[0])){
                    double[] data = new double[columns.length];
                    boolean skip = false;
                    for (int i=0 ; i<columns.length;++i){
                        if (tokens[columns[i]].equals("NA")){
                            skip = true;
                            break;
                        }
                        data[i] = Double.parseDouble(tokens[columns[i]]);
                    }
                    if (!skip) dataMap.put(featureName, data);
                }
                line = reader.readLine();
            }
            reader.close();
            this.features = dataMap.keySet();
        } else if (times.length==0){ // multiple data files used to build the dataMap
            // are there separate files for each time point??
            List<Element> fileEleList = experiment.getChildren("File");
            ArrayList<TimePointFile> timePointFileList = new ArrayList<>();
            for (Element fileEle : fileEleList){
                timePointFileList.add(new TimePointFile(fileEle,features));
                
            }
            // read all the data from the individual files
            times = new double[timePointFileList.size()];
            for (int i=0 ; i<times.length ; ++i){
                TimePointFile timePointFile = timePointFileList.get(i);
                times[i] = timePointFile.getSampleTime();
                timePointFile.readData(dataMap, i, times.length);
            }
        }
        

    }
    // gets gene expression data  (depricated)
    public double[] getData(String transcript) throws Exception {
        if (dataMap==null){
            // get the data from the db - only gets the times specified in the xml
            String sql = String.format("Select * from RnaSeq where Experiment=\'%s\' and Gene=\'%s\' and ExpTime in (%s) order by ExpTime",name, transcript,timesStr);
            
            ResultSet rs = MySql.getMySql().execute(sql);
            double[] data = new double[times.length];
            int i = 0;
            while (rs.next()){
                data[i] = rs.getDouble("Expression");
                ++i;
            }
            return data;
        } else {
            return dataMap.get(transcript);
        }
    }
    // this is the current preferred method to get experiment feature expression
    public double[] getNormalizedFeatureData(String feature,double mean,double sd)throws Exception {
        double[] data = getFeatureData(feature);
        double m = StatUtils.mean(data);
        double s = Math.sqrt(StatUtils.variance(data, m));
        if (m==0.0 || s==0.0){
            for (int i=0 ; i<data.length;++i){
                data[i] = gaussian.sample();
            }
        } else {
            // normalize the expression data
            for (int i = 0 ; i<data.length ; ++i){
                data[i] = sd*((data[i]-m)/s) + mean;
                if (Double.isNaN(data[i])){
                    System.out.println("exp data not a number");
                    System.exit(1);
                }
            }                    
        }        
        return data;
    }
    // this is current preferred method to get raw (unnormalized) data
    public double[] getFeatureData(String feature) throws Exception {
        if (dataMap==null){
            // get the data from the db
            String sql = String.format("Select * from DCPM where Experiment=\'%s\' and Feature=\'%s\' and Time in (%s) order by Time",name, feature,timesStr);
            ResultSet rs = MySql.getMySql().execute(sql);
            double[] data = null;
            int i = 0;
            while (rs.next()){
                if (data == null) data = new double[times.length];
                data[i] = rs.getDouble("DCPM");
                ++i;
            }
            rs.close();
            return data;
        } else {
            return dataMap.get(feature);
        }
    }    
    public String getName(){
        return name;
    }
    public double[] getTimes(){
        return times;
    }
    public HashMap<String,double[]> getDataMap(){
        return dataMap;
    }
    // get all features for this experiment
    public Set<String> getFeatures()throws Exception {
        return this.features;
    }
    static NormalDistribution gaussian = new NormalDistribution(100.0,.1);
    String name;  //experiment name
    String timesStr;  // times in data source for this experiment in String format
    double[] times;
    HashMap<String,double[]> dataMap = null;   
//    double[][] stageFraction;
    Set<String> features;
}
