/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.rhwlab.chipseq;

import htsjdk.samtools.util.IntervalTree;
import htsjdk.samtools.util.IntervalTreeMap;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import java.util.TreeSet;
import org.rhwlab.gene.model.ModelFromGFF;
import org.rhwlab.genemodel.GenomeSequence;
import org.rhwlab.singlecell.randomforest.TF_TableModel;
import org.rhwlab.singlecell.randomforest.TargetTableModel;

/**
 *
 * @author gevirl
 */
public class AnnotatedChipSeqBedFile {

    File bedFile;
    IntervalTreeMap geneTreeMap;
    TreeMap<String, IntervalTree> geneTrees;
    TreeSet<ImportanceAnnotatedBedRecord> records = new TreeSet<>();
    TreeMap<String, List<ImportanceAnnotatedBedRecord>> targetIndex = new TreeMap<>();  // indexed by target
    TreeMap<String, List<ImportanceAnnotatedBedRecord>> tfIndex = new TreeMap<>();  // indexed by tf

    public AnnotatedChipSeqBedFile() {

    }

    // read the previously importance annotated bedfile or unannotated file
    public AnnotatedChipSeqBedFile(File bedFile) throws Exception {
        BufferedReader reader = new BufferedReader(new FileReader(bedFile));
        String line = reader.readLine();
        while (line != null) {
            ImportanceAnnotatedBedRecord rec = new ImportanceAnnotatedBedRecord(line);
            addRecord(rec);
            line = reader.readLine();
        }
        reader.close();
    }

    // form the importance annotated bedfile from an unannoted bedfile
    public AnnotatedChipSeqBedFile(File bedFile, String expFile, IntervalTreeMap geneTreeMap, TreeMap<String, IntervalTree> geneTrees) throws Exception {
        this.bedFile = bedFile;
        this.geneTreeMap = geneTreeMap;
        this.geneTrees = geneTrees;
        BufferedReader reader = new BufferedReader(new FileReader(bedFile));
        String line = reader.readLine();
        while (line != null) {
            ImportanceAnnotatedBedRecord rec = new ImportanceAnnotatedBedRecord(line, expFile, geneTreeMap, geneTrees);
            addRecord(rec);
            line = reader.readLine();
        }
        reader.close();
    }

    /*
    public void addRecord(ImportanceAnnotatedBedRecord rec, String target) {
        String tf = rec.getTF();
        if (!this.records.contains(rec)) {
            records.add(rec);
            List<ImportanceAnnotatedBedRecord> tfList = tfIndex.get(tf);
            if (tfList == null) {
                tfList = new ArrayList<>();
                tfIndex.put(tf, tfList);
            }
            tfList.add(rec);
        }

        List<ImportanceAnnotatedBedRecord> targetList = targetIndex.get(target);
        if (targetList == null) {
            targetList = new ArrayList<>();
            targetIndex.put(target, targetList);
        }
        targetList.add(rec);

    }
     */
    public void addRecord(ImportanceAnnotatedBedRecord rec) {

        if (this.records.contains(rec)) {
            return;
        }

        records.add(rec);

        String tf = rec.getTF();
        String target = rec.getImportanceTarget();

        List<ImportanceAnnotatedBedRecord> targetList = targetIndex.get(target);
        if (targetList == null) {
            targetList = new ArrayList<>();
            targetIndex.put(target, targetList);
        }
        targetList.add(rec);

        List<ImportanceAnnotatedBedRecord> tfList = tfIndex.get(tf);
        if (tfList == null) {
            tfList = new ArrayList<>();
            tfIndex.put(tf, tfList);
        }
        tfList.add(rec);

    }

    // return is indexed by tf
    public TreeMap<String, List<ImportanceAnnotatedBedRecord>> getRecordsForTarget(String target) {
        TreeMap<String, List<ImportanceAnnotatedBedRecord>> ret = new TreeMap<>();

        List<ImportanceAnnotatedBedRecord> list = targetIndex.get(target);
        if (list != null) {
            for (ImportanceAnnotatedBedRecord rec : list) {
                String tf = rec.getTF();
                List<ImportanceAnnotatedBedRecord> tfList = ret.get(tf);
                if (tfList == null) {
                    tfList = new ArrayList<>();
                    ret.put(tf, tfList);
                }
                tfList.add(rec);
            }
        }
        return ret;
    }

    public List<ImportanceAnnotatedBedRecord> getRecordsForTF(String tf) {
        return this.tfIndex.get(tf);
    }

    // save the bedRecords as Fasta for all the tfs
    public void saveAsFasta(GenomeSequence genome, File dir, int delta) throws Exception {
        for (String tf : this.tfIndex.keySet()) {
            System.out.println(tf);
            saveAsFasta(tf, genome, dir, delta);
        }
    }

    // save all the bedRecords for a given tf into a fasta file in a given directory
    public void saveAsFasta(String tf, GenomeSequence genome, File dir, int delta) throws Exception {
        List<ImportanceAnnotatedBedRecord> recs = this.getRecordsForTF(tf);
        if (recs != null) {
            PrintStream stream = new PrintStream(new File(dir, "ChipSeqPeaks.fa"));
            saveAsFasta(recs, genome, stream, delta);
            stream.close();
        }
    }

    // save a set of bed records to a stream
    static public void saveAsFasta(List<ImportanceAnnotatedBedRecord> recs, GenomeSequence genome, PrintStream stream, int delta) {
        TreeMap<String, Integer> chrSizes = genome.getChromosomeSizes();
        TreeSet<String> idSet = new TreeSet<>();
        if (recs != null) {
            for (ImportanceAnnotatedBedRecord rec : recs) {

                int midPeak = rec.getStart() + rec.getPeakOffset();
                int s = midPeak - delta;
                if (s < 0) {
                    s = 0;
                }
                int e = midPeak + delta;
                if (e > chrSizes.get(rec.getChromosome())) {
                    e = chrSizes.get(rec.getChromosome());
                }
                String seq = genome.getSequence(rec.getChromosome(), s, e);
                String loc = String.format("%s:%d-%d", rec.getChromosome(), s, e);
                if (!idSet.contains(loc)) {
                    idSet.add(loc);
                    stream.printf(">%s %s\n", loc, rec.getTarget());
                    stream.println(seq);
                }
            }
        }
    }

    public void save(File outFile) throws Exception {
        PrintStream stream = new PrintStream(outFile);
        for (ImportanceAnnotatedBedRecord rec : this.records) {
            stream.println(rec.toString());
        }
        stream.close();
    }

    // make fasta files for all the tfs using all the peaks
    static public void allFasta() throws Exception {
        File meme = new File("/net/waterston/vol2/home/gevirl/MEME/all");
        String expFile = "/net/waterston/vol2/home/gevirl/SingleCell_L2_GenesByCellTypes_TPM_20.0.csv";

        ModelFromGFF gff3 = new ModelFromGFF(new File("/net/waterston/vol9/References/WS260/c_elegans.PRJNA13758.WS260.annotations.WormBase.gff3"));
        IntervalTreeMap geneIntervalMap = gff3.asTree("gene", null, "biotype=protein_coding");
        TreeMap<String, IntervalTree> geneIntervalTree = gff3.intervalTreesFromMap(geneIntervalMap);

        File bedFile = new File("/net/waterston/vol2/home/gevirl/annotatedPeak.bed");
        AnnotatedChipSeqBedFile bed = new AnnotatedChipSeqBedFile(bedFile, expFile, geneIntervalMap, geneIntervalTree);

        GenomeSequence genome = new GenomeSequence("/net/waterston/vol9/References/WS245/c_elegans.PRJNA13758.WS245.genomic.renamed.fa");
        List tfs = TF_TableModel.getTFs(expFile);
        for (Object obj : tfs) {
            String tf = (String) obj;
            System.out.println(tf);
            TargetTableModel model = new TargetTableModel(tf, expFile);
            AnnotatedChipSeqBedFile filteredBed = model.filterBed(bed, -1);
            filteredBed.saveAsFasta(tf, genome, meme, 50);
        }
    }

    static public void fastaByImportance(int importances[], File bedFile, File memeDir) throws Exception {
        String expFile = "/net/waterston/vol2/home/gevirl/SingleCell_L2_GenesByCellTypes_TPM_20.0.csv";
        GenomeSequence genome = new GenomeSequence("/net/waterston/vol9/References/WS245/c_elegans.PRJNA13758.WS245.genomic.renamed.fa");

        AnnotatedChipSeqBedFile bed = new AnnotatedChipSeqBedFile(bedFile);
        List tfs = TF_TableModel.getTFs(expFile);

        for (Object obj : tfs) {
            String tf = (String) obj;
            if (bed.getRecordsForTF(tf) != null) {
                System.out.println(tf);
                for (int importance : importances) {
                    File dir = new File(new File(memeDir, tf), String.format("E%d", importance));
                    dir.mkdirs();
                    TargetTableModel model = new TargetTableModel(tf, expFile);
                    AnnotatedChipSeqBedFile filteredBed = model.filterBed(bed, Math.pow(10.0, importance));

                    filteredBed.saveAsFasta(tf, genome, dir, 50);
                }
            }
        }
    }

    static public void annotateBedByImportance() throws Exception {
        String expFile = "/net/waterston/vol2/home/gevirl/SingleCell_L2_GenesByCellTypes_TPM_20.0.csv";
        ModelFromGFF gff3 = new ModelFromGFF(new File("/net/waterston/vol9/References/WS260/c_elegans.PRJNA13758.WS260.annotations.WormBase.gff3"));
        IntervalTreeMap geneIntervalMap = gff3.asTree("gene", null, "biotype=protein_coding");
        TreeMap<String, IntervalTree> geneIntervalTree = gff3.intervalTreesFromMap(geneIntervalMap);

        File bedFile = new File("/net/waterston/vol2/home/gevirl/annotatedPeak.bed");
        File outBedFile = new File("/net/waterston/vol2/home/gevirl/importancePeak.bed");
        AnnotatedChipSeqBedFile bed = new AnnotatedChipSeqBedFile(bedFile, expFile, geneIntervalMap, geneIntervalTree);
        bed.save(outBedFile);
    }

    public static void main(String[] args) throws Exception {
        File memeDir = new File("/net/waterston/vol2/home/gevirl/meme");
        int[] importances = {-1, 7, 8, 9, 10};
        fastaByImportance(importances, new File("/net/waterston/vol2/home/gevirl/importancePeak.bed"), memeDir);
        int ohdfsdf = 0;
    }
}
