
/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.RNASeq;

 
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.PrintStream;
import java.util.ArrayList;
import org.apache.commons.math3.linear.Array2DRowRealMatrix;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.linear.SingularValueDecomposition;
import org.apache.commons.math3.stat.StatUtils;
 

 
public class PCA {
    // Matrix M has samples in the rows (columns are the variables/dimensions)
    public PCA(RealMatrix M) throws Exception {
        mu = columnMeans(M);
        sig = columnSD(M,mu);
 //       normalize(M);
        svd = new SingularValueDecomposition(M); // M = U X S X Vtranspose
 //       reNormalize(M);
    }
    public double[] getEigenValues(){
        return svd.getSingularValues();
    }
    public RealMatrix getEigenVectors(){
        return svd.getV();  // columns of V are the eigenvectors
    }
    // columns of returned transformed matrix are the principle components (reduced dimension=the number of samples)
    // the dimension of the rows is the same as the input matrix (the number of samples)
    public RealMatrix getTransformedDataMatrix(){
        RealMatrix U = svd.getU();
        RealMatrix S = svd.getS();
        return U.multiply(S);  // transformed data matrix  Z = M X V = U X S
    }
    static public double[] columnMeans(RealMatrix m){
        double[] means = new double[m.getColumnDimension()];
        for (int i = 0 ; i < means.length ; ++i) {
            double[] columnData = m.getColumn(i);
            means[i] = StatUtils.mean(columnData);
        }   
        return means;
    }
    static public double[] columnSD(RealMatrix m,double[] means){
        double[] sd = new double[m.getColumnDimension()];
        for (int i = 0 ; i < means.length ; ++i) {
            double[] columnData = m.getColumn(i);
            sd[i] = Math.sqrt(StatUtils.variance(columnData, means[i]));
        }   
        return sd;        
    }
    
    final public void normalize(RealMatrix m){
      for (int c=0 ; c<m.getColumnDimension() ; ++c){
          for (int r=0 ; r<m.getRowDimension() ; ++r){
              m.setEntry(r, c, (m.getEntry(r, c)-mu[c])/sig[c] );
          }
      }         
    }
    final public void reNormalize(RealMatrix m){
      for (int c=0 ; c<m.getColumnDimension() ; ++c){
          for (int r=0 ; r<m.getRowDimension() ; ++r){
              m.setEntry(r, c, (m.getEntry(r, c)*sig[c])+mu[c] );
          }
      }         
    }    
    SingularValueDecomposition svd;
    double[] mu;
    double[] sig;
  
  // args[0] - data file - pca done on this matrix
  // args[1] - kdefile - results of deconvolution (Z array)
  // args[2] - output file
    // apply the eigenvectors to the pca in the kde file
  static public void main(String[] args)throws Exception {
      KdeFile kde = new KdeFile(args[1]);
      RealMatrix kdeMatrix = kde.asMatrix();  // PCs x stages
      
      BufferedReader reader = new BufferedReader(new FileReader(args[0]));
      String line = reader.readLine();
      ArrayList<double[]> list = new ArrayList<>();
      ArrayList<String> geneList = new ArrayList<>();
      int N=0;
      while (line != null){
          String[] tokens = line.split(",");
          N = tokens.length-1;
          double[] v = new double[N];
          boolean nonzero = false;
          for (int i=0 ; i<v.length;++i){
              v[i] = Double.valueOf(tokens[i+1]);
              if (v[i] > 0.0){
                  nonzero = true;
              }
          }
          if (nonzero){
              list.add(v);
              geneList.add(tokens[0]);
          }
          line = reader.readLine();
      }
      reader.close();
      
      // build data matrix - samples x genes
      RealMatrix mat = new Array2DRowRealMatrix(N,list.size());   
      for (int d=0 ; d<list.size() ; ++d){
          double[] data = list.get(d);
          for (int i=0 ; i<N ; ++i){
              mat.setEntry(i,d, data[i]);
          }
      }
      PCA pca = new PCA(mat);
      
      RealMatrix eigen = pca.getEigenVectors();  // genes x PCs
      
      RealMatrix tZ = eigen.multiply(kdeMatrix);  // genes x stages
      tZ = tZ.transpose();
      pca.reNormalize(tZ);
      tZ = tZ.transpose();
      
      PrintStream stream = new PrintStream(args[2]);
      for (int i=0 ; i<geneList.size() ; ++i){
          String gene = geneList.get(i);
          double[] x = tZ.getRow(i);
          stream.print(gene);
          for (int j =0 ; j<x.length ; ++j){
              stream.printf(",%f", x[j]);
          }
          stream.println();
      }
      stream.close();

  }
}



