/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.LMS.dataframe.coreg;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.math3.ml.distance.DistanceMeasure;
import org.apache.commons.math3.ml.distance.EuclideanDistance;
import org.apache.commons.math3.ml.distance.ManhattanDistance;
import org.apache.commons.math3.stat.ranking.NaturalRanking;
import org.rhwlab.LMS.dataframe.FileDataFrame;
import org.rhwlab.gene.model.Annotation;
import org.rhwlab.gene.model.DeadGenes;
import org.rhwlab.gene.model.ModelFromGFF;

/**
 *
 * @author gevirl
 */
public class Operons {

    TreeMap<String, List<Annotation>> operonMap = new TreeMap<>();  // operon,geneList
    TreeMap<String, String> wbGeneMap = new TreeMap<>();

    public Operons() throws Exception {
        DeadGenes dead = new DeadGenes();
        File geneModel = new File("/net/waterston/vol9/References/WS245/AllWormBase.withTransposon.gff3");
        ModelFromGFF gff3 = new ModelFromGFF(geneModel);

        InputStream stream = this.getClass().getResourceAsStream("/org/rhwlab/LMS/dataframe/coreg/OperonGenes.csv");
        BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
        String line = reader.readLine();
        while (line != null) {
            String[] tokens = line.split(",");
            String wbGene = tokens[0];
            if (dead.isDead(wbGene)) {
                wbGene = dead.getMergedTo(wbGene);
            }
            String seq = tokens[1];
            String operon = tokens[2];
            if (operon.equals("CEOP1923")) {
                int uiahsdfui = 0;
            }
            wbGeneMap.put(wbGene, operon);

            List<Annotation> wbGeneList = operonMap.get(operon);
            if (wbGeneList == null) {
                wbGeneList = new ArrayList<>();
                operonMap.put(operon, wbGeneList);
            }
            Annotation annot = gff3.getGeneAnnotation(wbGene);
            if (annot != null) {
                wbGeneList.add(annot);
            }
            line = reader.readLine();
        }
        reader.close();

        // remove any operons with only one gene
        ArrayList<String> toRemove = new ArrayList<>();
        for (String operon : operonMap.keySet()) {
            if (operonMap.get(operon).size() < 2) {
                toRemove.add(operon);
            }
        }
        for (String operon : toRemove) {
            operonMap.remove(operon);
        }

        // sort the genes in the operons by transcription order 
        for (String operon : operonMap.keySet()) {
            List<Annotation> wbGeneList = operonMap.get(operon);
            wbGeneList.sort(new Comparator() {
                @Override
                public int compare(Object o1, Object o2) {
                    Annotation a1 = (Annotation) o1;
                    Annotation a2 = (Annotation) o2;
                    if (a1.getStrand().equals("+")) {
                        return Integer.compare(a1.getStart(), a2.getStart());
                    }
                    return -Integer.compare(a1.getStart(), a2.getStart());
                }
            });
        }
    }

    public Set<String> getOperonIDs() {
        return operonMap.keySet();
    }

    public List<Annotation> getOperonGenes(String operonID) {
        return operonMap.get(operonID);
    }

    public Annotation[][] getGenePairs(String operon) {
        List<Annotation> genes = getOperonGenes(operon);
        Annotation[][] ret = new Annotation[genes.size() - 1][];

        for (int i = 0; i < ret.length; ++i) {
            ret[i] = new Annotation[2];
            ret[i][0] = genes.get(i);
            ret[i][1] = genes.get(i + 1);
        }
        return ret;
    }

    static void operonDistanceStudy(File expData, String columnName, int[] cols, DistanceMeasure[] dist, String[] labels) throws Exception {
        PrintStream stream = System.out;
        FileDataFrame df = new FileDataFrame(expData);
        TreeMap<String, List<Integer>> indexMap = df.indexColumn(columnName);
        Operons operons = new Operons();
        List<double[]> distanceList = new ArrayList<double[]>();
        for (String operon : operons.getOperonIDs()) {
            Annotation[][] genePairs = operons.getGenePairs(operon);
            for (int i = 0; i < genePairs.length; ++i) {
                String gene0 = genePairs[i][0].getGeneName();
                String gene1 = genePairs[i][1].getGeneName();
                Integer row0 = indexMap.get(gene0).get(0);
                Integer row1 = indexMap.get(gene1).get(0);
                if (row0 != null && row1 != null) {

                    List<String> data0 = df.getRow(row0);
                    List<String> data1 = df.getRow(row1);

                    double[] x0 = new double[cols.length];
                    double[] x1 = new double[cols.length];
                    for (int j = 0; j < cols.length; ++j) {
                        x0[j] = Double.valueOf(data0.get(cols[j]));
                        x1[j] = Double.valueOf(data1.get(cols[j]));
                    }

                    double[] distances = new double[dist.length];
                    for (int d = 0; d < dist.length; ++d) {
                        distances[d] = dist[d].compute(x0, x1);
                    }
                    distanceList.add(distances);
                }
            }

        }
        
        // compute rankings for each distance
        double[][] ranks = new double[dist.length][];
        for (int d=0 ; d<dist.length ; ++d){
            double[] v = new double[distanceList.size()];
            for (int j=0; j<distanceList.size() ; ++j){
                v[j] = distanceList.get(j)[d];
            }
            NaturalRanking ranking = new NaturalRanking();
            ranks[d] = ranking.rank(v);
        }
        
        int op = 0;
        for (String operon : operons.getOperonIDs()) {
            Annotation[][] genePairs = operons.getGenePairs(operon);
            for (int i = 0; i < genePairs.length; ++i) {
                String gene0 = genePairs[i][0].getGeneName();
                String gene1 = genePairs[i][1].getGeneName();
                Integer row0 = indexMap.get(gene0).get(0);
                Integer row1 = indexMap.get(gene1).get(0);
                if (row0 != null && row1 != null) {

                    stream.printf("%s,%d,%s,%s", operon, i + 1, gene0, gene1);
                    double[] distances = distanceList.get(op);
                    
                    for (int d = 0; d < distances.length; ++d) {
                        stream.printf(",%.3f,%.3f", distances[d],ranks[d][op]/distanceList.size());
                    }
                    stream.println();
                    ++op;
                }
            }
        }
    }

    static public void main(String[] args) throws Exception {
        File data = new File("/net/waterston/vol2/home/gevirl/FACS/FACS_AllGenes_RepAvg_Max1.csv");
        int[] cols = new int[35];
        for (int i = 0; i < 35; ++i) {
            cols[i] = i + 2;
        }
        DistanceMeasure[] measures = new DistanceMeasure[2];
        measures[0] = new EuclideanDistance();
        measures[1] = new ManhattanDistance();
        String[] labels = new String[2];
        labels[0] = "Euclidean";
        labels[1] = "Manhattan";

        operonDistanceStudy(data, "CommonName", cols, measures, labels);
        int asiodfsio = 0;
    }
}
