/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.encode.ChipSeq.peaks;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.rhwlab.chipseq.PeakCluster;
import umontreal.iro.lecuyer.probdist.BinomialDist;

/**
 *
 * @author gevirl
 */
public class ClusterEnrichment implements Comparable {
    String tf;
    int occupiedClusters;
    int totalClusters;
    double ratio;
    int tfClusters;
    double genomeRatio;
    double pVal;
    String sign;
    int N;
    double logP;
    
    public ClusterEnrichment(String tf,int occupiedClusters,int totalClusters,double ratio,int tfClusters,int N,double genomeRatio,double pVal,String sign){
        this.tf = tf;
        this.occupiedClusters = occupiedClusters;
        this.pVal = pVal;
        this.genomeRatio = genomeRatio;
        this.sign = sign;
        this.totalClusters = totalClusters;
        this.ratio = ratio;
        this.tfClusters = tfClusters;
        this.N = N;
        if (pVal > 0.0){
            logP = Math.log10(pVal);
        }
        if (sign.equals("+")){
            logP = -logP;
        }        
        
    }
    static public TreeMap<String,List<ClusterEnrichment>> readFiles(String directory,String contains)throws Exception {
        TreeMap<String,List<ClusterEnrichment>> ret = new TreeMap<>();   
        for (File file : new File(directory).listFiles()){
            if (file.getName().contains(contains)){
                List<ClusterEnrichment> list = new ArrayList<>();
                ret.put(file.getName(), list);
                BufferedReader reader = new BufferedReader(new FileReader(file));
                String line = reader.readLine();
                while (line != null){
                    String[] tokens = line.split(",");
                    list.add(new ClusterEnrichment(
                            tokens[0],
                            Integer.valueOf(tokens[1]),
                            Integer.valueOf(tokens[2]),
                            Double.valueOf(tokens[3]),
                            Integer.valueOf(tokens[4]),
                            Integer.valueOf(tokens[5]),
                            Double.valueOf(tokens[6]),
                            Double.valueOf(tokens[7]),
                            tokens[8]
                            
                    ));
                    line = reader.readLine();
                }
                reader.close();
            }
        }
        return ret;
    }
    public void print(PrintStream stream){

        stream.printf("%s,%d,%d,%f,%d,%d,%f,%e,%s,%f\n", tf,occupiedClusters,totalClusters,ratio,tfClusters,N,genomeRatio,pVal,sign,logP);
    }

    @Override
    public int compareTo(Object o) {
        ClusterEnrichment other = (ClusterEnrichment)o;
        int ret = Double.compare(other.logP, logP);
        if (ret == 0){
            ret = tf.compareTo(other.tf);
        }
        return ret;
       
    }
    
    
    static public void main(String[] args) throws Exception {
        int maxClusterTFs = 50;
        File dir = new File("/net/waterston/vol2/home/gevirl/FACS/enrichment");
        ClusterReportFile file = new ClusterReportFile(new File("/net/waterston/vol2/home/gevirl/Embryonic_LarvalPeakClusters.tab"));
        Set<String> chippedTFs = file.getChipedTFs();
        
        DEGenes de = new DEGenes(new File("/net/waterston/vol2/home/gevirl/Downloads/gene_diffexpress.yes_exclude.180130.txt"));
        String[] times = {"T0", "T1", "T2", "T3", "T4"};
        String[] tissues = {"ceh32", "cnd1", "end1", "hlh1", "nhr25", "pha4", "tbx37"};
/*        
        // compute the probability a cluster will contain a TF for each TF based on all clusters
        TreeMap<String, Integer> allGeneEnrich = new TreeMap<>(); // number of clusters indexed by tf
        int allN = file.clusterEnrichment(new TreeSet<>(), allGeneEnrich,maxClusterTFs);
        TreeMap<String, Double> binomialP = new TreeMap<>(); // probability of cluster containing a tf indexed by tf (all clusters < size limit)
        for (String tf : allGeneEnrich.keySet()) {
            double p = (double) allGeneEnrich.get(tf) / (double) allN;
            binomialP.put(tf, p);
        }
        for (String tissue : tissues) {
            for (String time : times) {
                System.out.printf("%s    %s\n",tissue,time);
                File outFile = new File(dir,String.format("%s_%s_clusterEnrich_%d.csv",tissue,time,maxClusterTFs));
                PrintStream stream = new PrintStream(outFile);
                TreeMap<String, Integer> geneEnrich = new TreeMap<>(); // number of clusters with a tf indexed by tf for the DE genes
                Set<String> genes = de.getGenes(tissue,time, 0.05);
                int n = file.clusterEnrichment(genes, geneEnrich,maxClusterTFs);
                TreeSet<ClusterEnrichment> sort = new TreeSet<>();
                for (String tf : allGeneEnrich.keySet()){
                    BinomialDist dist = new BinomialDist(n, binomialP.get(tf));
                    double pVal;
                    Integer enrich = geneEnrich.get(tf);
                    if (enrich == null){
                        enrich = 0;
                    }                    
                    double ratio = (double)enrich/(double)n;
                    if (binomialP.get(tf) < ratio){
                        pVal = dist.barF(enrich);

                    } else {
                        pVal = dist.cdf(enrich);
                    }
                    
                    String sign = "+";;
                    if (binomialP.get(tf) > ratio){
                        sign = "-";
                    }
                    sort.add(new ClusterEnrichment(tf,enrich,n,ratio,allGeneEnrich.get(tf),allN,binomialP.get(tf),pVal,sign));
                }
                for (ClusterEnrichment en : sort){
                    en.print(stream);
                }
                stream.close();
            }
        }
 */       
        TreeMap<String,List<PeakCluster>> allClusterMap = file.allIndexedClusters(maxClusterTFs);  // lists of clusters indexed by gene target
        
        TreeMap<String, Integer> allTargetEnrich = new TreeMap<>(); // number of gene targets indexed by tf
        int geneN = file.geneEnrichment(allClusterMap.keySet(),allClusterMap, allTargetEnrich,maxClusterTFs);
        
        TreeMap<String, Double> binP = new TreeMap<>(); // probability of target gene having a cluster with a tf indexed by tf (all clusters < size limit)
        for (String tf : allTargetEnrich.keySet()) {
            double p = (double) allTargetEnrich.get(tf) / (double) geneN;
            binP.put(tf, p);
        }  
        
        for (String tissue : tissues) {
            for (String time : times) {
                
                System.out.printf("%s    %s\n",tissue,time);
                File outFile = new File(dir,String.format("%s_%s_geneEnrich_%d.csv",tissue,time,maxClusterTFs));
                PrintStream stream = new PrintStream(outFile);
                
                TreeMap<String, Integer> geneEnrich = new TreeMap<>(); // number of genes with a tf indexed by tf for the DE genes
                Set<String> genes = de.getGenes(tissue,time, 0.05);
                int n = file.geneEnrichment(genes,allClusterMap, geneEnrich,maxClusterTFs);
                
                TreeSet<ClusterEnrichment> sort = new TreeSet<>();
                for (String tf : allTargetEnrich.keySet()){
                    BinomialDist dist = new BinomialDist(n, binP.get(tf));
                    double pVal;
                    Integer enrich = geneEnrich.get(tf);
                    if (enrich == null){
                        enrich = 0;
                    }
                    double ratio = (double)enrich/(double)n;
                    if (binP.get(tf) < ratio){
                        pVal = dist.barF(enrich);

                    } else {
                        pVal = dist.cdf(enrich);
                    }
                    String sign = "+";;
                    if (binP.get(tf) > ratio){
                        sign = "-";
                    }
                    sort.add(new ClusterEnrichment(tf,enrich,n,ratio,allTargetEnrich.get(tf),geneN,binP.get(tf),pVal,sign));
                }
                for (ClusterEnrichment en : sort){
                    en.print(stream);
                }
                stream.close();
            }
        }        
        
        
        
    }
}
