/*
 * Decompiled with CFR 0.152.
 */
package picard.illumina;

import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.programgroups.BaseCallingProgramGroup;
import picard.illumina.IlluminaBasecallingMetrics;
import picard.illumina.NewIlluminaBasecallsConverter;
import picard.illumina.parser.BaseIlluminaDataProvider;
import picard.illumina.parser.ClusterData;
import picard.illumina.parser.IlluminaDataProviderFactory;
import picard.illumina.parser.IlluminaDataType;
import picard.illumina.parser.IlluminaFileUtil;
import picard.illumina.parser.NewIlluminaDataProvider;
import picard.illumina.parser.ParameterizedFileUtil;
import picard.illumina.parser.ReadStructure;
import picard.illumina.parser.readers.AbstractIlluminaPositionFileReader;
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;
import picard.illumina.parser.readers.LocsFileReader;
import picard.util.TabbedTextFileWithHeaderParser;

@CommandLineProgramProperties(summary="Collects Illumina Basecalling metrics for a sequencing run.  <p>This tool will produce per-barcode and per-lane basecall metrics for each sequencing run.  Mean values for each metric are determined using data from all of the tiles.  This tool requires the following data, LANE(#), BASECALLS_DIR, READ_STRUCTURE, and an input file listing the sample barcodes.  Program will provide metrics including: the total numbers of bases, reads, and clusters, as well as the fractions of each bases, reads, and clusters that passed Illumina quality filters (PF) both per barcode and per lane.  For additional information on Illumina's PF quality metric, please see the corresponding <a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>GATK Dictionary entry</a>.</p> <p>The input barcode_list.txt file is a file containing all of the sample and molecular barcodes and can be obtained from the <a href='http://broadinstitute.github.io/picard/command-line-overview.html#ExtractIlluminaBarcodes'>ExtractIlluminaBarcodes</a> tool.  </p>Note: Metrics labeled as percentages are actually expressed as fractions!  <h4>Usage example:</h4><pre>java -jar picard.jar CollectIlluminaBasecallingMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      LANE=001 \\<br />      READ_STRUCTURE=25T8B25T \\<br />      INPUT=barcode_list.txt </pre><p>Please see the CollectIlluminaBasecallingMetrics <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#IlluminaBasecallingMetrics'>definitions</a> for a complete description of the metrics produced by this tool.  </p><hr />", oneLineSummary="Collects Illumina Basecalling metrics for a sequencing run.  ", programGroup=BaseCallingProgramGroup.class)
@DocumentedFeature
public class CollectIlluminaBasecallingMetrics
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Collects Illumina Basecalling metrics for a sequencing run.  ";
    static final String USAGE_DETAILS = "<p>This tool will produce per-barcode and per-lane basecall metrics for each sequencing run.  Mean values for each metric are determined using data from all of the tiles.  This tool requires the following data, LANE(#), BASECALLS_DIR, READ_STRUCTURE, and an input file listing the sample barcodes.  Program will provide metrics including: the total numbers of bases, reads, and clusters, as well as the fractions of each bases, reads, and clusters that passed Illumina quality filters (PF) both per barcode and per lane.  For additional information on Illumina's PF quality metric, please see the corresponding <a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>GATK Dictionary entry</a>.</p> <p>The input barcode_list.txt file is a file containing all of the sample and molecular barcodes and can be obtained from the <a href='http://broadinstitute.github.io/picard/command-line-overview.html#ExtractIlluminaBarcodes'>ExtractIlluminaBarcodes</a> tool.  </p>Note: Metrics labeled as percentages are actually expressed as fractions!  <h4>Usage example:</h4><pre>java -jar picard.jar CollectIlluminaBasecallingMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      LANE=001 \\<br />      READ_STRUCTURE=25T8B25T \\<br />      INPUT=barcode_list.txt </pre><p>Please see the CollectIlluminaBasecallingMetrics <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#IlluminaBasecallingMetrics'>definitions</a> for a complete description of the metrics produced by this tool.  </p><hr />";
    @Argument(doc="The Illumina basecalls output directory from which data are read", shortName="B")
    public File BASECALLS_DIR;
    @Argument(doc="The barcodes directory with _barcode.txt files (generated by ExtractIlluminaBarcodes). If not set, use BASECALLS_DIR. ", shortName="BCD", optional=true)
    public File BARCODES_DIR;
    @Argument(doc="The lane whose data will be read", shortName="L")
    public Integer LANE;
    @Argument(doc="The file containing barcodes to expect from the run - barcodeData.#", shortName="I", optional=true)
    public File INPUT;
    @Argument(doc="A description of the logical structure of clusters in an Illumina Run, i.e. a description of the structure IlluminaBasecallsToSam assumes the  data to be in. It should consist of integer/character pairs describing the number of cycles and the type of those cycles (B for Sample Barcode, M for molecular barcode, T for Template, and S for skip).  E.g. If the input data consists of 80 base clusters and we provide a read structure of \"28T8M8B8S28T\" then the sequence may be split up into four reads:\n* read one with 28 cycles (bases) of template\n* read two with 8 cycles (bases) of molecular barcode (ex. unique molecular barcode)\n* read three with 8 cycles (bases) of sample barcode\n* 8 cycles (bases) skipped.\n* read four with 28 cycles (bases) of template\nThe skipped cycles would NOT be included in an output SAM/BAM file or in read groups therein.", shortName="RS")
    public String READ_STRUCTURE;
    @Argument(doc="The file to which the collected metrics are written", shortName="O", optional=true)
    public File OUTPUT;
    private int barcodeLength = 0;
    private String unmatchedBarcode;
    private final SortedMap<String, IlluminaMetricCounts> barcodeToMetricCounts = new TreeMap<String, IlluminaMetricCounts>();
    private static final String BARCODE_NAME_COLUMN = "barcode_name";
    private static final String BARCODE_SEQUENCE_COLUMN_NAME_STUB = "barcode_sequence_";

    @Override
    protected int doWork() {
        IlluminaDataProviderFactory factory;
        IOUtil.assertDirectoryIsReadable(this.BASECALLS_DIR);
        if (this.OUTPUT == null) {
            this.OUTPUT = new File(this.BASECALLS_DIR, String.format("LANE%s_basecalling_metrics", this.LANE));
        }
        IOUtil.assertFileIsWritable(this.OUTPUT);
        ReadStructure readStructure = new ReadStructure(this.READ_STRUCTURE);
        BclQualityEvaluationStrategy bclQualityEvaluationStrategy = new BclQualityEvaluationStrategy(2);
        if (this.INPUT == null) {
            factory = new IlluminaDataProviderFactory(this.BASECALLS_DIR, this.LANE, readStructure, bclQualityEvaluationStrategy, IlluminaDataType.PF, IlluminaDataType.Position);
        } else {
            IOUtil.assertFileIsReadable(this.INPUT);
            TabbedTextFileWithHeaderParser barcodesParser = new TabbedTextFileWithHeaderParser(this.INPUT);
            for (TabbedTextFileWithHeaderParser.Row row : barcodesParser) {
                String barcodeName = row.getField(BARCODE_NAME_COLUMN);
                StringBuilder barcode = new StringBuilder();
                for (int i = 1; i <= readStructure.sampleBarcodes.length(); ++i) {
                    barcode.append(row.getField(BARCODE_SEQUENCE_COLUMN_NAME_STUB + i));
                    if (this.barcodeLength != 0) continue;
                    this.barcodeLength = barcode.length();
                }
                if (barcode.length() <= 0) continue;
                this.barcodeToMetricCounts.put(barcode.toString(), new IlluminaMetricCounts(barcode.toString(), barcodeName, this.LANE));
            }
            factory = IlluminaFileUtil.hasCbcls(this.BASECALLS_DIR, this.LANE) ? new IlluminaDataProviderFactory(this.BASECALLS_DIR, this.BARCODES_DIR, this.LANE, readStructure, bclQualityEvaluationStrategy) : (this.barcodeToMetricCounts.isEmpty() ? new IlluminaDataProviderFactory(this.BASECALLS_DIR, this.BARCODES_DIR, this.LANE, readStructure, bclQualityEvaluationStrategy, IlluminaDataType.PF, IlluminaDataType.Position) : new IlluminaDataProviderFactory(this.BASECALLS_DIR, this.BARCODES_DIR, this.LANE, readStructure, bclQualityEvaluationStrategy, IlluminaDataType.PF, IlluminaDataType.Position, IlluminaDataType.Barcodes));
        }
        this.unmatchedBarcode = StringUtil.repeatCharNTimes('N', this.barcodeLength);
        if (IlluminaFileUtil.hasCbcls(this.BASECALLS_DIR, this.LANE)) {
            this.setupNewDataProvider(factory);
        } else {
            BaseIlluminaDataProvider provider = factory.makeDataProvider();
            while (provider.hasNext()) {
                ClusterData cluster = (ClusterData)provider.next();
                this.addCluster(cluster);
            }
        }
        this.onComplete();
        return 0;
    }

    private void setupNewDataProvider(IlluminaDataProviderFactory factory) {
        File laneDir;
        File[] cycleDirs;
        List<File> cbcls;
        if (this.BARCODES_DIR == null) {
            this.BARCODES_DIR = this.BASECALLS_DIR;
        }
        if ((cbcls = Arrays.stream(cycleDirs = IOUtil.getFilesMatchingRegexp(laneDir = new File(this.BASECALLS_DIR, IlluminaFileUtil.longLaneStr(this.LANE)), IlluminaFileUtil.CYCLE_SUBDIRECTORY_PATTERN)).flatMap(cycleDir -> Arrays.stream(IOUtil.getFilesMatchingRegexp(cycleDir, "^" + IlluminaFileUtil.longLaneStr(this.LANE) + "_(\\d{1,5}).cbcl$"))).collect(Collectors.toList())).size() == 0) {
            throw new PicardException("No CBCL files found.");
        }
        IOUtil.assertFilesAreReadable(cbcls);
        ArrayList<AbstractIlluminaPositionFileReader.PositionInfo> locs = new ArrayList<AbstractIlluminaPositionFileReader.PositionInfo>();
        File locsFile = new File(this.BASECALLS_DIR.getParentFile(), "s.locs");
        IOUtil.assertFileIsReadable(locsFile);
        try (LocsFileReader locsFileReader = new LocsFileReader(locsFile);){
            while (locsFileReader.hasNext()) {
                locs.add(locsFileReader.next());
            }
        }
        Pattern laneTileRegex = Pattern.compile(ParameterizedFileUtil.escapePeriods(ParameterizedFileUtil.makeLaneTileRegex(".filter", this.LANE)));
        File[] filterFiles = NewIlluminaBasecallsConverter.getTiledFiles(laneDir, laneTileRegex);
        IOUtil.assertFilesAreReadable(Arrays.asList(filterFiles));
        Pattern barcodeRegex = Pattern.compile(ParameterizedFileUtil.escapePeriods(ParameterizedFileUtil.makeBarcodeRegex(this.LANE)));
        HashMap<Integer, File> barcodesFiles = new HashMap<Integer, File>();
        for (File barcodeFile : NewIlluminaBasecallsConverter.getTiledFiles(this.BARCODES_DIR, barcodeRegex)) {
            Matcher tileMatcher = barcodeRegex.matcher(barcodeFile.getName());
            if (!tileMatcher.matches()) continue;
            IOUtil.assertFileIsReadable(barcodeFile);
            barcodesFiles.put(Integer.valueOf(tileMatcher.group(1)), barcodeFile);
        }
        factory.getAvailableTiles().forEach(tile -> {
            File barcodeFile = (File)barcodesFiles.get(tile);
            NewIlluminaDataProvider provider = factory.makeDataProvider(cbcls, (List<AbstractIlluminaPositionFileReader.PositionInfo>)locs, filterFiles, (int)tile, barcodeFile);
            while (provider.hasNext()) {
                this.addCluster((ClusterData)provider.next());
            }
        });
    }

    private void addCluster(ClusterData cluster) {
        IlluminaMetricCounts counters;
        String barcode = cluster.getMatchedBarcode();
        if (barcode == null) {
            barcode = this.unmatchedBarcode;
        }
        if ((counters = (IlluminaMetricCounts)this.barcodeToMetricCounts.get(barcode)) == null) {
            counters = new IlluminaMetricCounts(barcode, null, this.LANE);
            this.barcodeToMetricCounts.put(barcode, counters);
        }
        int tileNumber = cluster.getTile();
        counters.incrementClusterCount(tileNumber, cluster.isPf());
    }

    private void onComplete() {
        try {
            MetricsFile<IlluminaBasecallingMetrics, Comparable<?>> file = this.getMetricsFile();
            IlluminaMetricCounts allLaneCounts = new IlluminaMetricCounts(null, null, this.LANE);
            for (String s : this.barcodeToMetricCounts.keySet()) {
                IlluminaMetricCounts counts = (IlluminaMetricCounts)this.barcodeToMetricCounts.get(s);
                counts.addMetricsToFile(file);
                allLaneCounts.addIlluminaMetricCounts(counts);
            }
            if (!this.barcodeToMetricCounts.keySet().contains("")) {
                allLaneCounts.addMetricsToFile(file);
            }
            file.write(this.OUTPUT);
        }
        catch (Exception ex) {
            throw new PicardException("Error writing output file " + this.OUTPUT.getPath(), ex);
        }
    }

    public static void main(String[] argv) {
        new CollectIlluminaBasecallingMetrics().instanceMainWithExit(argv);
    }

    private class IlluminaMetricCounts {
        private final Histogram<Integer> tileToClusterHistogram = new Histogram();
        private final Histogram<Integer> tileToPfClusterHistogram = new Histogram();
        final IlluminaBasecallingMetrics metrics = new IlluminaBasecallingMetrics();

        public IlluminaMetricCounts(String barcode, String barcodeName, Integer laneNumber) {
            this.metrics.MOLECULAR_BARCODE_SEQUENCE_1 = barcode;
            this.metrics.MOLECULAR_BARCODE_NAME = barcodeName;
            this.metrics.LANE = Integer.toString(laneNumber);
        }

        public void incrementClusterCount(int tileNumber, boolean isPf) {
            this.incrementClusterCount(tileNumber, 1.0, isPf);
        }

        public void incrementClusterCount(int tileNumber, double incrementAmount, boolean isPf) {
            this.incrementClusterCount((Integer)tileNumber, incrementAmount, isPf ? 1.0 : 0.0);
        }

        public void incrementClusterCount(Integer tileNumber, double incrementAmount, double pfIncrementAmount) {
            this.tileToClusterHistogram.increment(tileNumber, incrementAmount);
            this.tileToPfClusterHistogram.increment(tileNumber, pfIncrementAmount);
        }

        private void onComplete() {
            double meanClustersPerTile = this.tileToClusterHistogram.getMeanBinSize();
            this.metrics.MEAN_CLUSTERS_PER_TILE = Math.round(meanClustersPerTile);
            this.metrics.SD_CLUSTERS_PER_TILE = Math.round(this.tileToClusterHistogram.getStandardDeviationBinSize(meanClustersPerTile));
            double meanPfClustersPerTile = this.tileToPfClusterHistogram.getMeanBinSize();
            this.metrics.MEAN_PF_CLUSTERS_PER_TILE = Math.round(meanPfClustersPerTile);
            this.metrics.SD_PF_CLUSTERS_PER_TILE = Math.round(this.tileToPfClusterHistogram.getStandardDeviationBinSize(meanPfClustersPerTile));
            DecimalFormat decFormat = new DecimalFormat("#.##");
            Histogram<Integer> laneToPctPfClusterHistogram = this.tileToPfClusterHistogram.divideByHistogram(this.tileToClusterHistogram);
            double meanPctPfClustersPerTile = laneToPctPfClusterHistogram.getMeanBinSize();
            this.metrics.MEAN_PCT_PF_CLUSTERS_PER_TILE = Double.isNaN(meanPctPfClustersPerTile) ? 0.0 : Double.valueOf(decFormat.format(meanPctPfClustersPerTile * 100.0));
            this.metrics.SD_PCT_PF_CLUSTERS_PER_TILE = Double.valueOf(decFormat.format(laneToPctPfClusterHistogram.getStandardDeviationBinSize(meanPctPfClustersPerTile) * 100.0));
            this.metrics.TOTAL_CLUSTERS = (long)this.tileToClusterHistogram.getSumOfValues();
            this.metrics.PF_CLUSTERS = (long)this.tileToPfClusterHistogram.getSumOfValues();
            ReadStructure readStructure = new ReadStructure(CollectIlluminaBasecallingMetrics.this.READ_STRUCTURE);
            int templateBaseCountPerCluster = 0;
            for (int i = 0; i < readStructure.templates.length(); ++i) {
                templateBaseCountPerCluster += readStructure.templates.get((int)i).length;
            }
            this.metrics.TOTAL_READS = this.metrics.TOTAL_CLUSTERS * (long)readStructure.templates.length();
            this.metrics.PF_READS = this.metrics.PF_CLUSTERS * (long)readStructure.templates.length();
            this.metrics.TOTAL_BASES = this.metrics.TOTAL_CLUSTERS * (long)templateBaseCountPerCluster;
            this.metrics.PF_BASES = this.metrics.PF_CLUSTERS * (long)templateBaseCountPerCluster;
        }

        public void addMetricsToFile(MetricsFile<IlluminaBasecallingMetrics, Comparable<?>> file) {
            this.onComplete();
            file.addMetric(this.metrics);
        }

        public void addIlluminaMetricCounts(IlluminaMetricCounts counts) {
            this.tileToClusterHistogram.addHistogram(counts.tileToClusterHistogram);
            this.tileToPfClusterHistogram.addHistogram(counts.tileToPfClusterHistogram);
        }
    }
}

