/*
 * Decompiled with CFR 0.152.
 */
package picard.analysis.artifacts;

import htsjdk.samtools.AlignmentBlock;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.filter.AggregateFilter;
import htsjdk.samtools.filter.AlignedFilter;
import htsjdk.samtools.filter.DuplicateReadFilter;
import htsjdk.samtools.filter.FailsVendorReadQualityFilter;
import htsjdk.samtools.filter.InsertSizeFilter;
import htsjdk.samtools.filter.MappingQualityFilter;
import htsjdk.samtools.filter.NotPrimaryAlignmentFilter;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.CodeUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.IntervalListReferenceSequenceMask;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.analysis.SinglePassSamProgram;
import picard.analysis.artifacts.ArtifactCounter;
import picard.analysis.artifacts.ErrorSummaryMetrics;
import picard.analysis.artifacts.SequencingArtifactMetrics;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.DbSnpBitSetUtil;
import picard.util.VariantType;

@CommandLineProgramProperties(summary="Collect metrics to quantify single-base sequencing artifacts.  <p>This tool examines two sources of sequencing errors associated with hybrid selection protocols.  These errors are divided into two broad categories, pre-adapter and bait-bias.  Pre-adapter errors can arise from laboratory manipulations of a nucleic acid sample e.g. shearing and occur prior to the ligation of adapters for PCR amplification (hence the name pre-adapter).  </p><p>Bait-bias artifacts occur during or after the target selection step, and correlate with substitution rates that are 'biased', or higher for sites having one base on the reference/positive strand relative to sites having the complementary base on that strand.  For example, during the target selection step, a (G>T) artifact might result in a higher substitution rate at sites with a G on the positive strand (and C on the negative), relative to sites with the flip (C positive)/(G negative).  This is known as the 'G-Ref' artifact. </p><p>For additional information on these types of artifacts, please see the corresponding GATK dictionary entries on <a href='https://www.broadinstitute.org/gatk/guide/article?id=6333'>bait-bias</a> and <a href='https://www.broadinstitute.org/gatk/guide/article?id=6332'>pre-adapter artifacts</a>.</p><p>This tool produces four files; summary and detail metrics files for both pre-adapter and bait-bias artifacts. The detailed metrics show the error rates for each type of base substitution within every possible triplet base configuration.  Error rates associated with these substitutions are Phred-scaled and provided as quality scores, the lower the value, the more likely it is that an alternate base call is due to an artifact. The summary metrics provide likelihood information on the 'worst-case' errors. </p><h4>Usage example:</h4><pre>java -jar picard.jar CollectSequencingArtifactMetrics \\<br />     I=input.bam \\<br />     O=artifact_metrics.txt \\<br />     R=reference_sequence.fasta</pre>Please see the metrics at the following links <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterDetailMetrics'>PreAdapterDetailMetrics</a>, <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterSummaryMetrics'>PreAdapterSummaryMetrics</a>, <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasDetailMetrics'>BaitBiasDetailMetrics</a>, and <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasSummaryMetrics'>BaitBiasSummaryMetrics</a> for complete descriptions of the output metrics produced by this tool. <hr />", oneLineSummary="Collect metrics to quantify single-base sequencing artifacts.  ", programGroup=DiagnosticsAndQCProgramGroup.class)
@DocumentedFeature
public class CollectSequencingArtifactMetrics
extends SinglePassSamProgram {
    static final String USAGE_SUMMARY = "Collect metrics to quantify single-base sequencing artifacts.  ";
    static final String USAGE_DETAILS = "<p>This tool examines two sources of sequencing errors associated with hybrid selection protocols.  These errors are divided into two broad categories, pre-adapter and bait-bias.  Pre-adapter errors can arise from laboratory manipulations of a nucleic acid sample e.g. shearing and occur prior to the ligation of adapters for PCR amplification (hence the name pre-adapter).  </p><p>Bait-bias artifacts occur during or after the target selection step, and correlate with substitution rates that are 'biased', or higher for sites having one base on the reference/positive strand relative to sites having the complementary base on that strand.  For example, during the target selection step, a (G>T) artifact might result in a higher substitution rate at sites with a G on the positive strand (and C on the negative), relative to sites with the flip (C positive)/(G negative).  This is known as the 'G-Ref' artifact. </p><p>For additional information on these types of artifacts, please see the corresponding GATK dictionary entries on <a href='https://www.broadinstitute.org/gatk/guide/article?id=6333'>bait-bias</a> and <a href='https://www.broadinstitute.org/gatk/guide/article?id=6332'>pre-adapter artifacts</a>.</p><p>This tool produces four files; summary and detail metrics files for both pre-adapter and bait-bias artifacts. The detailed metrics show the error rates for each type of base substitution within every possible triplet base configuration.  Error rates associated with these substitutions are Phred-scaled and provided as quality scores, the lower the value, the more likely it is that an alternate base call is due to an artifact. The summary metrics provide likelihood information on the 'worst-case' errors. </p><h4>Usage example:</h4><pre>java -jar picard.jar CollectSequencingArtifactMetrics \\<br />     I=input.bam \\<br />     O=artifact_metrics.txt \\<br />     R=reference_sequence.fasta</pre>Please see the metrics at the following links <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterDetailMetrics'>PreAdapterDetailMetrics</a>, <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterSummaryMetrics'>PreAdapterSummaryMetrics</a>, <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasDetailMetrics'>BaitBiasDetailMetrics</a>, and <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasSummaryMetrics'>BaitBiasSummaryMetrics</a> for complete descriptions of the output metrics produced by this tool. <hr />";
    @Argument(doc="An optional list of intervals to restrict analysis to.", optional=true)
    public File INTERVALS;
    @Argument(doc="VCF format dbSNP file, used to exclude regions around known polymorphisms from analysis.", optional=true)
    public File DB_SNP;
    @Argument(shortName="Q", doc="The minimum base quality score for a base to be included in analysis.")
    public int MINIMUM_QUALITY_SCORE = 20;
    @Argument(shortName="MQ", doc="The minimum mapping quality score for a base to be included in analysis.")
    public int MINIMUM_MAPPING_QUALITY = 30;
    @Argument(shortName="MIN_INS", doc="The minimum insert size for a read to be included in analysis.")
    public int MINIMUM_INSERT_SIZE = 60;
    @Argument(shortName="MAX_INS", doc="The maximum insert size for a read to be included in analysis. Set to 0 to have no maximum.")
    public int MAXIMUM_INSERT_SIZE = 600;
    @Argument(shortName="UNPAIRED", doc="Include unpaired reads. If set to true then all paired reads will be included as well - MINIMUM_INSERT_SIZE and MAXIMUM_INSERT_SIZE will be ignored.")
    public boolean INCLUDE_UNPAIRED = false;
    @Argument(shortName="DUPES", doc="Include duplicate reads. If set to true then all reads flagged as duplicates will be included as well.")
    public boolean INCLUDE_DUPLICATES = false;
    @Argument(shortName="NON_PF", doc="Whether or not to include non-PF reads.")
    public boolean INCLUDE_NON_PF_READS = false;
    @Argument(shortName="TANDEM", doc="Set to true if mate pairs are being sequenced from the same strand, i.e. they're expected to face the same direction.")
    public boolean TANDEM_READS = false;
    @Argument(doc="When available, use original quality scores for filtering.")
    public boolean USE_OQ = true;
    @Argument(doc="The number of context bases to include on each side of the assayed base.")
    public int CONTEXT_SIZE = 1;
    @Argument(doc="If specified, only print results for these contexts in the detail metrics output. However, the summary metrics output will still take all contexts into consideration.", optional=true)
    public Set<String> CONTEXTS_TO_PRINT = new HashSet<String>();
    @Argument(shortName="EXT", doc="Append the given file extension to all metric file names (ex. OUTPUT.pre_adapter_summary_metrics.EXT). None if null", optional=true)
    public String FILE_EXTENSION = null;
    private static final String UNKNOWN_LIBRARY = "UnknownLibrary";
    private static final String UNKNOWN_SAMPLE = "UnknownSample";
    private File preAdapterSummaryOut;
    private File preAdapterDetailsOut;
    private File baitBiasSummaryOut;
    private File baitBiasDetailsOut;
    private File errorSummaryFile;
    private IntervalListReferenceSequenceMask intervalMask;
    private DbSnpBitSetUtil dbSnpMask;
    private SamRecordFilter recordFilter;
    private String currentRefString = null;
    private int currentRefIndex = -1;
    private final Set<String> samples = new HashSet<String>();
    private final Set<String> libraries = new HashSet<String>();
    private final Map<String, ArtifactCounter> artifactCounters = new HashMap<String, ArtifactCounter>();
    private static final Log log = Log.getInstance(CollectSequencingArtifactMetrics.class);

    @Override
    protected boolean requiresReference() {
        return true;
    }

    @Override
    protected String[] customCommandLineValidation() {
        ArrayList<String> messages = new ArrayList<String>();
        int contextFullLength = 2 * this.CONTEXT_SIZE + 1;
        if (this.CONTEXT_SIZE < 0) {
            messages.add("CONTEXT_SIZE cannot be negative");
        }
        for (String context : this.CONTEXTS_TO_PRINT) {
            if (context.length() == contextFullLength) continue;
            messages.add("Context " + context + " is not the length implied by CONTEXT_SIZE: " + contextFullLength);
        }
        if (this.MINIMUM_INSERT_SIZE < 0) {
            messages.add("MINIMUM_INSERT_SIZE cannot be negative");
        }
        if (this.MAXIMUM_INSERT_SIZE < 0) {
            messages.add("MAXIMUM_INSERT_SIZE cannot be negative");
        }
        if (this.MAXIMUM_INSERT_SIZE > 0 && this.MAXIMUM_INSERT_SIZE < this.MINIMUM_INSERT_SIZE) {
            messages.add("MAXIMUM_INSERT_SIZE cannot be less than MINIMUM_INSERT_SIZE unless set to 0");
        }
        return messages.isEmpty() ? null : messages.toArray(new String[messages.size()]);
    }

    @Override
    protected void setup(SAMFileHeader header, File samFile) {
        String outext = null != this.FILE_EXTENSION ? this.FILE_EXTENSION : "";
        this.preAdapterSummaryOut = new File(this.OUTPUT + ".pre_adapter_summary_metrics" + outext);
        this.preAdapterDetailsOut = new File(this.OUTPUT + ".pre_adapter_detail_metrics" + outext);
        this.baitBiasSummaryOut = new File(this.OUTPUT + ".bait_bias_summary_metrics" + outext);
        this.baitBiasDetailsOut = new File(this.OUTPUT + ".bait_bias_detail_metrics" + outext);
        this.errorSummaryFile = new File(this.OUTPUT + ".error_summary_metrics" + outext);
        IOUtil.assertFilesAreWritable(Arrays.asList(this.preAdapterSummaryOut, this.preAdapterDetailsOut, this.baitBiasSummaryOut, this.baitBiasDetailsOut, this.errorSummaryFile));
        for (SAMReadGroupRecord rec : header.getReadGroups()) {
            this.samples.add(CodeUtil.getOrElse(rec.getSample(), UNKNOWN_SAMPLE));
            this.libraries.add(CodeUtil.getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY));
        }
        if (this.INTERVALS != null) {
            IOUtil.assertFileIsReadable(this.INTERVALS);
            IntervalList intervalList = IntervalList.fromFile(this.INTERVALS).uniqued();
            this.intervalMask = new IntervalListReferenceSequenceMask(intervalList);
            if (this.DB_SNP != null) {
                IOUtil.assertFileIsReadable(this.DB_SNP);
                this.dbSnpMask = new DbSnpBitSetUtil(this.DB_SNP, header.getSequenceDictionary(), EnumSet.noneOf(VariantType.class), intervalList, Optional.of(log));
            }
        } else if (this.DB_SNP != null) {
            IOUtil.assertFileIsReadable(this.DB_SNP);
            this.dbSnpMask = new DbSnpBitSetUtil(this.DB_SNP, header.getSequenceDictionary(), EnumSet.noneOf(VariantType.class), null, Optional.of(log));
        }
        ArrayList<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
        if (!this.INCLUDE_NON_PF_READS) {
            filters.add(new FailsVendorReadQualityFilter());
        }
        filters.add(new NotPrimaryAlignmentFilter());
        if (!this.INCLUDE_DUPLICATES) {
            filters.add(new DuplicateReadFilter());
        }
        filters.add(new AlignedFilter(true));
        filters.add(new MappingQualityFilter(this.MINIMUM_MAPPING_QUALITY));
        if (!this.INCLUDE_UNPAIRED) {
            int effectiveMaxInsertSize = this.MAXIMUM_INSERT_SIZE == 0 ? Integer.MAX_VALUE : this.MAXIMUM_INSERT_SIZE;
            filters.add(new InsertSizeFilter(this.MINIMUM_INSERT_SIZE, effectiveMaxInsertSize));
        }
        this.recordFilter = new AggregateFilter(filters);
        String sampleAlias = StringUtil.join(",", new ArrayList<String>(this.samples));
        for (String library : this.libraries) {
            this.artifactCounters.put(library, new ArtifactCounter(sampleAlias, library, this.CONTEXT_SIZE, this.TANDEM_READS));
        }
    }

    @Override
    protected void acceptRead(SAMRecord rec, ReferenceSequence ref) {
        byte[] tmp;
        String library;
        if (this.recordFilter.filterOut(rec)) {
            return;
        }
        String string = library = rec.getReadGroup() == null ? UNKNOWN_LIBRARY : CodeUtil.getOrElse(rec.getReadGroup().getLibrary(), UNKNOWN_LIBRARY);
        if (!this.libraries.contains(library)) {
            throw new PicardException("Record contains library that is missing from header: " + library);
        }
        int contextFullLength = 2 * this.CONTEXT_SIZE + 1;
        ArtifactCounter counter = this.artifactCounters.get(library);
        byte[] readBases = rec.getReadBases();
        byte[] readQuals = this.USE_OQ ? ((tmp = rec.getOriginalBaseQualities()) == null ? rec.getBaseQualities() : tmp) : rec.getBaseQualities();
        for (AlignmentBlock block : rec.getAlignmentBlocks()) {
            for (int offset = 0; offset < block.getLength(); ++offset) {
                String context;
                int contextStartIndex;
                char readBase;
                int readPos = block.getReadStart() + offset;
                int refPos = block.getReferenceStart() + offset;
                byte qual = readQuals[readPos - 1];
                if (qual < this.MINIMUM_QUALITY_SCORE || (readBase = Character.toUpperCase((char)readBases[readPos - 1])) == 'N' || this.intervalMask != null && !this.intervalMask.get(ref.getContigIndex(), refPos) || this.dbSnpMask != null && this.dbSnpMask.isDbSnpSite(ref.getName(), refPos) || (contextStartIndex = refPos - this.CONTEXT_SIZE - 1) < 0 || contextStartIndex + contextFullLength > ref.length() || (context = this.getRefContext(ref, contextStartIndex, contextFullLength)).contains("N")) continue;
                counter.countRecord(context, readBase, rec);
            }
        }
    }

    private String getRefContext(ReferenceSequence ref, int contextStartIndex, int contextFullLength) {
        if (this.currentRefIndex != ref.getContigIndex()) {
            this.currentRefString = new String(ref.getBases()).toUpperCase();
            this.currentRefIndex = ref.getContigIndex();
        }
        return this.currentRefString.substring(contextStartIndex, contextStartIndex + contextFullLength);
    }

    @Override
    protected void finish() {
        MetricsFile preAdapterSummaryMetricsFile = this.getMetricsFile();
        MetricsFile preAdapterDetailMetricsFile = this.getMetricsFile();
        MetricsFile baitBiasSummaryMetricsFile = this.getMetricsFile();
        MetricsFile baitBiasDetailMetricsFile = this.getMetricsFile();
        MetricsFile errorSummaryMetricsFile = this.getMetricsFile();
        for (ArtifactCounter counter : this.artifactCounters.values()) {
            counter.finish();
            preAdapterSummaryMetricsFile.addAllMetrics(counter.getPreAdapterSummaryMetrics());
            baitBiasSummaryMetricsFile.addAllMetrics(counter.getBaitBiasSummaryMetrics());
            for (SequencingArtifactMetrics.PreAdapterDetailMetrics preAdapterDetailMetrics : counter.getPreAdapterDetailMetrics()) {
                if (!this.CONTEXTS_TO_PRINT.isEmpty() && !this.CONTEXTS_TO_PRINT.contains(preAdapterDetailMetrics.CONTEXT)) continue;
                preAdapterDetailMetricsFile.addMetric(preAdapterDetailMetrics);
            }
            for (SequencingArtifactMetrics.BaitBiasDetailMetrics baitBiasDetailMetrics : counter.getBaitBiasDetailMetrics()) {
                if (!this.CONTEXTS_TO_PRINT.isEmpty() && !this.CONTEXTS_TO_PRINT.contains(baitBiasDetailMetrics.CONTEXT)) continue;
                baitBiasDetailMetricsFile.addMetric(baitBiasDetailMetrics);
            }
        }
        preAdapterDetailMetricsFile.write(this.preAdapterDetailsOut);
        preAdapterSummaryMetricsFile.write(this.preAdapterSummaryOut);
        baitBiasDetailMetricsFile.write(this.baitBiasDetailsOut);
        baitBiasSummaryMetricsFile.write(this.baitBiasSummaryOut);
        if (!preAdapterDetailMetricsFile.getMetrics().isEmpty()) {
            List in = preAdapterDetailMetricsFile.getMetrics();
            in.forEach(m -> {
                if (m.REF_BASE == 'G' || m.REF_BASE == 'T') {
                    m.REF_BASE = (char)SequenceUtil.complement((byte)m.REF_BASE);
                    m.ALT_BASE = (char)SequenceUtil.complement((byte)m.ALT_BASE);
                }
            });
            Map<String, List<SequencingArtifactMetrics.PreAdapterDetailMetrics>> byError = in.stream().collect(Collectors.groupingBy(m -> m.REF_BASE + ">" + m.ALT_BASE));
            for (String error : new TreeSet<String>(byError.keySet())) {
                List<SequencingArtifactMetrics.PreAdapterDetailMetrics> ms = byError.get(error);
                ErrorSummaryMetrics summary = new ErrorSummaryMetrics();
                summary.REF_BASE = ms.get((int)0).REF_BASE;
                summary.ALT_BASE = ms.get((int)0).ALT_BASE;
                summary.SUBSTITUTION = error;
                summary.REF_COUNT = ms.stream().mapToLong(m -> m.PRO_REF_BASES + m.CON_REF_BASES).sum();
                summary.ALT_COUNT = ms.stream().mapToLong(m -> m.PRO_ALT_BASES + m.CON_ALT_BASES).sum();
                summary.calculateDerivedFields();
                errorSummaryMetricsFile.addMetric(summary);
            }
        }
        errorSummaryMetricsFile.write(this.errorSummaryFile);
    }

    @Override
    protected boolean usesNoRefReads() {
        return false;
    }
}

