/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.encode.ChipSeq.peaks;

import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalTree;
import htsjdk.samtools.util.IntervalTreeMap;
import java.io.File;
import java.io.PrintStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.rhwlab.UCSC.AggregateBed;
import static org.rhwlab.UCSC.AggregateBed.readBedRecords;
import org.rhwlab.encode.ChipSeq.IndexedSelection;
import org.rhwlab.encode.ChipSeq.ReleasedWormSelection;
import org.rhwlab.encode.ChipSeq.ViewByTF;
import org.rhwlab.encode.objects.EncodeObject;
import org.rhwlab.gene.model.Annotation;
import org.rhwlab.gene.model.AnnotationModel;
import org.rhwlab.gene.model.ModelFromGFF;
import org.rhwlab.modern.DCCFile;
import org.rhwlab.modern.ExperimentalFile;
import org.rhwlab.modern.ReferenceGenome;
import org.rhwlab.chipseq.ChipSeqBedRecord;
import org.rhwlab.chipseq.Peak;
import org.rhwlab.chipseq.PeakCluster;

/**
 *
 * @author gevirl
 */
// all the peak clusters found in a directory of peak gz files
public class PeakClusters {

    File dir;
    int n;
    Map<String, List<PeakCluster>> clusterMap = new TreeMap<>();  // lists of clusters indexed by chromosome
    TreeSet<String> tfs = new TreeSet<>();

    // forms the clusters by downloading the data from DCC
    public PeakClusters(String assembly, IndexedSelection expSelect, Set<String> stages, int delta) throws Exception {
        List<PeakFile> peakFiles = new ArrayList<>();
        for (String tf : expSelect.getAllTFs()) {
            TreeMap<String, String[]> accMap = expSelect.getAccessionsForTF(tf);
            for (String stage : accMap.keySet()) {
                if (stages == null || stages.contains(stage)) {
                    String[] accs = accMap.get(stage);
                    for (String acc : accs) {
                        EncodeObject obj = expSelect.getEncodeObject(acc);
                        org.rhwlab.modern.Experiment modExp = new org.rhwlab.modern.Experiment(obj.getJsonObject());
                        for (DCCFile dccFile : modExp.getFiles()) {
                            if (dccFile.getStatus().equals("released")) {
                                if (dccFile.getAssembly() != null && dccFile.getAssembly().equals(assembly)) {
                                    ArrayList<ChipSeqBedRecord> bedRecords = new ArrayList<>();
                                    String fname = dccFile.getSubmittedFileName();
                                    if (fname.startsWith("spp.optimal")) {
                                        if (fname.endsWith(".gz")) {

                                            ExperimentalFile expFile = new ExperimentalFile(dccFile, null);

                                            URL url = new URL(expFile.getDownloadLink());
                                            readBedRecords(tf, stage, url, bedRecords);
                                            PeakFile peakFile = new PeakFile(bedRecords, tf, stage);
                                            peakFiles.add(peakFile);
                                            tfs.add(peakFile.getTF());
                                        }
                                    }
                                }
                            }
                        }

                    }
                }
                int uidhfushd = 0;
            }

        }

        clusterThePeaks(peakFiles, delta);

    }

    // build the peak clusters from previously download files in a directory
    public PeakClusters(File dir, int delta) throws Exception {
        List<PeakFile> peakFiles = new ArrayList<>();
        this.dir = dir;
        int nFiles = 0;
        for (File file : dir.listFiles()) {
            if (file.getName().endsWith(".gz")) {
                ++nFiles;
                String s = file.getName().split("_")[1];
 //               String tf = s.replace("spp.optimal.", "").toUpperCase();
                String tf = s.replace("spp.optimal.", "");

                PeakFile peakFile = new PeakFile(file, tf);
                peakFiles.add(peakFile);
                n = n + peakFile.getRecordCount();
                tfs.add(tf);
            }
        }
        System.out.printf("Total peaks: %s,  total files: %d\n", n, nFiles);
        clusterThePeaks(peakFiles, delta);
    }

    public PeakClusters(List<PeakFile> peakFiles, int delta) {
        int n = 0;
        for (PeakFile peakFile : peakFiles) {
            tfs.add(peakFile.getTF());
            n = n + peakFile.getRecordCount();
        }
        clusterThePeaks(peakFiles, delta);
    }

    final public void clusterThePeaks(List<PeakFile> peakFiles, int delta) {
        Set<String> chromoSet = new TreeSet<>();
        for (PeakFile peakFile : peakFiles) {
            chromoSet.addAll(peakFile.getChromosomes());
        }
        for (String chromo : chromoSet) {
            clusterMap.put(chromo, this.clusterChromo(chromo, peakFiles, delta));
        }
    }

    // cluster the peaks for the chromosome
    private List<PeakCluster> clusterChromo(String chromo, List<PeakFile> peakFiles, int delta) {
        List<Peak> chromPeaks = aggregatePeaks(chromo, peakFiles);  // list of sorted peaks (all tfs) for the chromosome
        System.out.printf("%s peaks: %s\n", chromo, chromPeaks.size());

        ArrayList<PeakCluster> ret = new ArrayList<>();

        PeakCluster cluster = new PeakCluster(chromo);
        Peak currentPeak = chromPeaks.get(0);
        cluster.addPeak(currentPeak);

        for (int i = 1; i < chromPeaks.size(); ++i) {
            Peak next = chromPeaks.get(i);
            if (currentPeak.distanceTo(next) > delta) {
                ret.add(cluster);
                cluster = new PeakCluster(chromo);

            }
            cluster.addPeak(next);
            currentPeak = next;
        }
        return ret;
    }

    // aggregate the peaks for a chromosome - result is sorted
    private List<Peak> aggregatePeaks(String chromo, List<PeakFile> peakFiles) {
        List<Peak> ret = new ArrayList<>();

        List<Peak[]> peakArraysList = new ArrayList<>(); // the sorted peaks for this chromosome from all the peakfiles
        for (PeakFile peakFile : peakFiles) {
            Peak[] peaks = peakFile.getChromosomePeaks(chromo);
            if (peaks != null) {
                peakArraysList.add(peaks);
            }
        }

        // initialiize
        Peak[] currentPeak = new Peak[peakArraysList.size()];
        int[] pos = new int[peakArraysList.size()];
        for (int i = 0; i < pos.length; ++i) {
            pos[i] = 0;
            Peak[] peaks = peakArraysList.get(i);
            currentPeak[i] = peakArraysList.get(i)[0];
        }

        // merge the peaks from the sorted arrays
        int m = minPeak(currentPeak);
        while (m != -1) {
            ret.add(currentPeak[m]);
            int next = pos[m] + 1;
            Peak[] peaks = peakArraysList.get(m);
            if (next < peaks.length) {
                currentPeak[m] = peaks[next];
                pos[m] = next;
            } else {
                pos[m] = -1;
                currentPeak[m] = null;
            }
            m = minPeak(currentPeak);
        }
        return ret;
    }

    public void report(File file) throws Exception {

        PrintStream stream = new PrintStream(file);
        PeakCluster.reportHeader(stream, tfs, "\t");
        for (String chromo : this.clusterMap.keySet()) {
            for (PeakCluster cluster : clusterMap.get(chromo)) {
                cluster.report(stream, tfs, "\t");
            }
        }
        stream.close();
    }

    // find the minimum position  peak 
    private int minPeak(Peak[] peaks) {
        // find a non null peak to start with
        int ret = -1;
        for (int i = 0; i < peaks.length; ++i) {
            if (peaks[i] != null) {
                ret = i;
                break;
            }
        }
        if (ret != -1) {
            for (int i = ret + 1; i < peaks.length; ++i) {
                if ((peaks[i] != null) && (peaks[ret].compareTo(peaks[i]) > 0)) {
                    ret = i;
                }
            }
        }
        return ret;
    }

    public void locateInGenome(AnnotationModel annotModel) {
        for (String chromo : this.clusterMap.keySet()) {
            List<PeakCluster> peakClusterList = clusterMap.get(chromo);
            IntervalTreeMap geneIntervalMap = annotModel.asTree("gene", chromo, "biotype=protein_coding");
            IntervalTree geneIntervalTree = new IntervalTree();
            for (Object obj : geneIntervalMap.keySet()) {
                Interval geneInterval = (Interval) obj;
                Object geneAnnot = geneIntervalMap.get(geneInterval);
                if (geneAnnot instanceof List) {
                    int oiwerwei = 0;
                } else {
                    int dfgisudfh = 0;
                }
                geneIntervalTree.put(geneInterval.getStart(), geneInterval.getEnd(), geneAnnot);
            }
            for (PeakCluster cluster : peakClusterList) {
                cluster.locateInGenome(geneIntervalMap, geneIntervalTree);
                String targetGene = cluster.getTarget();
                if (annotModel instanceof ModelFromGFF) {
                    ModelFromGFF gff = (ModelFromGFF) annotModel;
                    String[] names = gff.geneNameTriplet(targetGene);
                    List<Annotation> annots = gff.getTranscriptsForGene(names[2]);
                    if (!annots.isEmpty()) {
                        Annotation annot = annots.get(0);
                        String targetStrand = annot.getStrand();
                        if (targetStrand.equals("+")){
                            for (int i=0 ; i<annots.size() ; ++i){
                                if (cluster.getMeanPosition()<annot.getStart()){
                                    cluster.setTranscript(annot.getTranscript());
                                    break;
                                }
                            }
                        }else {
                            for (int i=annots.size()-1 ; i>=0 ; --i){
                                if (cluster.getMeanPosition()>annot.getEnd()){
                                    cluster.setTranscript(annot.getTranscript());
                                    break;
                                }
                            }                            
                        }
                    }
                }

                int uiasdfsd = 0;
            }
        }
    }

    // make a bed file from the clusters - color each cluster according to number of peaks in the cluster
    public void toBedFormat(String bedFile) throws Exception {
        PrintStream stream = new PrintStream(bedFile);
//        stream.println("track name=\"PeakClusters\" description=\"ChipSeq PeakClusters\" visibility=2 itemRgb=\"On\"");
        for (String chromo : this.clusterMap.keySet()) {
            for (PeakCluster cluster : this.clusterMap.get(chromo)) {
                cluster.reportAsBedFormat(stream);
            }
        }
        stream.close();
    }

    // args[0] directory of worm selection
    // this was for testing on local machine - not cluster
    public static void main(String[] args) throws Exception {
        PeakClusters clusters;
        String bedFile;
        String reportFile;
        String aggBedFile;

        ModelFromGFF gff3 = new ModelFromGFF(new File("/net/waterston/vol9/References/WS245/AllWormBase.withTransposon.gff3"));

        // build the cluster bed from a set of spp.optimal files downloaded from DCC
//          clusters = new PeakClusters(new File("/net/waterston/vol2/home/gevirl/Downloads/worm/ce11"), 60);
//          bedFile = "/net/waterston/vol2/home/gevirl/Downloads/worm/ce11/PeakClusters.bed";
        // build the worm peak clusters based on a set of stages
        Set<String> embryonicStages = new TreeSet<>(Arrays.asList("earlyembryonic", "lateembryonic", "midembryonic", "mixedstage(embryonic)"));
        Set<String> earlyLarvalStages = new TreeSet<>(Arrays.asList("L1larva", "L2larva", "L3larva"));
        Set<String> embryonicLarvalStages = new TreeSet<>();
        embryonicLarvalStages.addAll(embryonicStages);
        embryonicLarvalStages.addAll(earlyLarvalStages);
        ReleasedWormSelection selection = new ReleasedWormSelection(new File(args[0]));

        ReferenceGenome genome = new ReferenceGenome();
        ViewByTF view = new ViewByTF();
        view.setGenomes(genome);
        view.setSpecies("worm");
        view.setSelection(selection);
        genome.setAssembly("worm", "ce11");

        AggregateBed aggBed = new AggregateBed(view.getRoot(), "spp.optimal", "gz", embryonicStages);
        clusters = new PeakClusters("ce11", selection, embryonicStages, 60);
        bedFile = "/net/waterston/vol2/home/gevirl/EmbryonicPeakClusters.bed";
        aggBedFile = "/net/waterston/vol2/home/gevirl/EmbryonicPeak.bed";
        reportFile = "/net/waterston/vol2/home/gevirl/EmbryonicPeakClusters.tab";
        // convert the clusters to a bed file
        clusters.locateInGenome(gff3);
        clusters.report(new File(reportFile));
        clusters.toBedFormat(bedFile);
        aggBed.writeTo(new File(aggBedFile));

        aggBed = new AggregateBed(view.getRoot(), "spp.optimal", "gz", embryonicLarvalStages);
        clusters = new PeakClusters("ce11", selection, embryonicLarvalStages, 60);
        bedFile = "/net/waterston/vol2/home/gevirl/Embryonic_LarvalPeakClusters.bed";
        aggBedFile = "/net/waterston/vol2/home/gevirl/Embryonic_LarvalPeak.bed";
        reportFile = "/net/waterston/vol2/home/gevirl/Embryonic_LarvalPeakClusters.tab";
        // convert the clusters to a bed file
        clusters.locateInGenome(gff3);
        clusters.report(new File(reportFile));
        clusters.toBedFormat(bedFile);
        aggBed.writeTo(new File(aggBedFile));

        aggBed = new AggregateBed(view.getRoot(), "spp.optimal", "gz", earlyLarvalStages);
        clusters = new PeakClusters("ce11", selection, earlyLarvalStages, 60);
        bedFile = "/net/waterston/vol2/home/gevirl/EarlyLarvalPeakClusters.bed";
        aggBedFile = "/net/waterston/vol2/home/gevirl/EarlyLarvalPeak.bed";
        reportFile = "/net/waterston/vol2/home/gevirl/EarlyLarvalPeakClusters.tab";
        // convert the clusters to a bed file
        clusters.locateInGenome(gff3);
        clusters.report(new File(reportFile));
        clusters.toBedFormat(bedFile);
        aggBed.writeTo(new File(aggBedFile));

        aggBed = new AggregateBed(view.getRoot(), "spp.optimal", "gz", null);
        clusters = new PeakClusters("ce11", selection, null, 60);
        bedFile = "/net/waterston/vol2/home/gevirl/PeakClusters.bed";
        bedFile = "/net/waterston/vol2/home/gevirl/Peak.bed";
        reportFile = "/net/waterston/vol2/home/gevirl/PeakClusters.tab";

        // convert the clusters to a bed file
        clusters.locateInGenome(gff3);
        clusters.report(new File(reportFile));
        clusters.toBedFormat(bedFile);
        aggBed.writeTo(new File(aggBedFile));

        int dfhuhf = 0;
    }
}
