/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.marbl.mhap.main;

import edu.umd.marbl.mhap.general.FastaData;
import edu.umd.marbl.mhap.general.Sequence;
import edu.umd.marbl.mhap.general.SequenceId;
import edu.umd.marbl.mhap.main.ValidBitVectorsFileBuilder;
import edu.umd.marbl.mhap.sketch.CountMin;
import edu.umd.marbl.mhap.sketch.KmerCounts;
import edu.umd.marbl.mhap.sketch.MinHashSearch;
import edu.umd.marbl.mhap.sketch.SequenceSketchStreamer;
import edu.umd.marbl.mhap.utils.MhapRuntimeException;
import edu.umd.marbl.mhap.utils.PackageInfo;
import edu.umd.marbl.mhap.utils.ParseOptions;
import edu.umd.marbl.mhap.utils.Utils;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.util.OpenBitSet;

public final class MhapMain {
    private final double acceptScore;
    private final HashSet<Long> filter;
    private static OpenBitSet validKmersHashes;
    private final String inFile;
    private final int kmerSize;
    private final double maxShift;
    private final int minStoreLength;
    private final boolean noSelf;
    private final int numHashes;
    private final int numMinMatches;
    protected final int numThreads;
    private final String processFile;
    private final int subSequenceSize;
    private final String toFile;
    private final boolean weighted;
    private final String validKmersFile;
    private static ParseOptions options;
    private final KmerCounts kmerCounter;
    private static final double DEFAULT_ACCEPT_SCORE = 0.04;
    private static final double DEFAULT_FILTER_CUTOFF = 1.0E-5;
    private static final int DEFAULT_KMER_SIZE = 16;
    private static final double DEFAULT_MAX_SHIFT_PERCENT = 0.2;
    private static final int DEFAULT_MIN_STORE_LENGTH = 0;
    private static final int DEFAULT_NUM_MIN_MATCHES = 3;
    private static final int DEFAULT_NUM_THREADS;
    private static final int DEFAULT_NUM_WORDS = 1024;
    private static final int DEFAULT_ORDERED_KMER_SIZE = 12;
    private static final int DEFAULT_SUB_SEQUENCE_SIZE = 100000;

    public static void main(String[] stringArray) throws Exception {
        Locale.setDefault(Locale.US);
        ParseOptions parseOptions = new ParseOptions();
        parseOptions.addStartTextLine("MHAP: MinHash Alignment Protocol. A tool for overlapping long-read sequences in bioinformatics.");
        parseOptions.addStartTextLine("\tVersion: " + PackageInfo.VERSION + ", Build time: " + PackageInfo.BUILD_TIME);
        parseOptions.addStartTextLine("\tUsage 1 (direct execution): java -server -Xmx<memory> -jar <MHAP jar> -s<fasta/dat from/self file> [-q<fasta/dat to file>] [-f<kmer filter list, must be sorted>]");
        parseOptions.addStartTextLine("\tUsage 2 (generate precomputed binaries): java -server -Xmx<memory> -jar <MHAP jar> -p<directory of fasta files> -q <output directory> [-f<kmer filter list, must be sorted>]");
        parseOptions.addOption("-s", "Usage 1 only. The FASTA or binary dat file (see Usage 2) of reads that will be stored in a box, and that all subsequent reads will be compared to.", "");
        parseOptions.addOption("-q", "Usage 1: The FASTA file of reads, or a directory of files, that will be compared to the set of reads in the box (see -s). Usage 2: The output directory for the binary formatted dat files.", "");
        parseOptions.addOption("-p", "Usage 2 only. The directory containing FASTA files that should be converted to binary format for storage.", "");
        parseOptions.addOption("-f", "k-mer filter file used for filtering out highly repetative k-mers. Must be sorted in descending order of frequency (second column).", "");
        parseOptions.addOption("-k", "[int], k-mer size used for MinHashing. The k-mer size for second stage filter is always set to 12, and currently cannot be modified.", 16);
        parseOptions.addOption("--num-hashes", "[int], number of min-mers to be used in MinHashing.", 1024);
        parseOptions.addOption("--threshold", "[double], the threshold similarity score cutoff for the second stage sort-merge filter. This is based on the average number of k-mers matching in the overlapping region.", 0.04);
        parseOptions.addOption("--filter-threshold", "[double], the cutoff at which the k-mer in the k-mer filter file is considered repetitive. This value for a specific k-mer is specified in the second column in the filter file. If no filter file is provided, this option is ignored.", 1.0E-5);
        parseOptions.addOption("--max-seq-size", "[int], not currently used.", 100000);
        parseOptions.addOption("--max-shift", "[double], region size to the left and right of the estimated overlap, as derived from the median shift and sequence length, where a k-mer matches are still considered valid. Second stage filter only.", 0.2);
        parseOptions.addOption("--num-min-matches", "[int], minimum # min-mer that must be shared before computing second stage filter. Any sequences below that value are considered non-overlapping.", 3);
        parseOptions.addOption("--num-threads", "[int], number of threads to use for computation. Typically set to 2 x #cores.", DEFAULT_NUM_THREADS);
        parseOptions.addOption("--weighted", "Perform weighted MinHashing.", false);
        parseOptions.addOption("--min-store-length", "[int], The minimum length of the read that is stored in the box. Used to filter out short reads from FASTA file.", 0);
        parseOptions.addOption("--no-self", "Do not compute the overlaps between sequences inside a box. Should be used when the to and from sequences are coming from different files.", false);
        parseOptions.addOption("--store-full-id", "Store full IDs as seen in FASTA file, rather than storing just the sequence position in the file. Some FASTA files have long IDS, slowing output of results. IDs not stored in compressed files.", false);
        parseOptions.addOption("--pacbio_fast", "Set all the parameters for the PacBio fast setting. This is the current best guidance, and could change at any time without warning.", false);
        parseOptions.addOption("--pacbio_sensitive", "Set all the parameters for the PacBio sensitive settings. This is the current best guidance, and could change at any time without warning.", false);
        parseOptions.addOption("--pacbio_experimental", "Set all the parameters for the PacBio experimental settings. This is the current best guidance, and could change at any time without warning.", false);
        parseOptions.addOption("--valid-kmers", "File of valid kmers to be used as filter for the hashes", "");
        parseOptions.addOption("--generate-kmers-from-bitvector", "To be used with the --valid-kmers option. Takes the kmers file, generate the bitvectors file and create a new kmers file. Used for tests. Does not run MHAP, only creates kmers file", false);
        if (!parseOptions.process(stringArray)) {
            System.exit(0);
        }
        if (parseOptions.get("--pacbio_fast").getBoolean() || parseOptions.get("--pacbio_sensitive").getBoolean() || parseOptions.get("--pacbio_experimental").getBoolean()) {
            if (!parseOptions.get("-k").isSet()) {
                parseOptions.setOptions("-k", 16);
                if (parseOptions.get("--pacbio_experimental").getBoolean()) {
                    parseOptions.setOptions("-k", 14);
                }
            }
            if (!parseOptions.get("--num-min-matches").isSet()) {
                parseOptions.setOptions("--num-min-matches", 3);
                if (parseOptions.get("--pacbio_experimental").getBoolean()) {
                    parseOptions.setOptions("--num-min-matches", 1);
                }
            }
            if (parseOptions.get("--pacbio_fast").getBoolean() && parseOptions.get("--pacbio_sensitive").getBoolean()) {
                System.out.println("Two default sequence type parameters cannot be set at the same time.");
                System.out.println(parseOptions.helpMenuString());
                System.exit(1);
            }
            if (!parseOptions.get("--num-hashes").isSet()) {
                if (parseOptions.get("--pacbio_fast").getBoolean()) {
                    parseOptions.setOptions("--num-hashes", 512);
                } else if (parseOptions.get("--pacbio_sensitive").getBoolean()) {
                    parseOptions.setOptions("--num-hashes", 1256);
                } else if (parseOptions.get("--pacbio_experimental").getBoolean()) {
                    parseOptions.setOptions("--num-hashes", 1256);
                }
            }
        }
        if (parseOptions.get("-s").getString().isEmpty() && parseOptions.get("-p").getString().isEmpty()) {
            System.out.println("Please set the -s or the -p options. See options below:");
            System.out.println(parseOptions.helpMenuString());
            System.exit(1);
        }
        if (!parseOptions.get("-p").getString().isEmpty() && parseOptions.get("-q").getString().isEmpty()) {
            System.out.println("Please set the -q option. See options below:");
            System.out.println(parseOptions.helpMenuString());
            System.exit(1);
        }
        if (!parseOptions.get("-p").getString().isEmpty() && !new File(parseOptions.get("-p").getString()).exists()) {
            System.out.println("Could not find requested file/folder: " + parseOptions.get("-p").getString());
            System.exit(1);
        }
        if (!parseOptions.get("-s").getString().isEmpty() && !new File(parseOptions.get("-s").getString()).exists()) {
            System.out.println("Could not find requested file/folder: " + parseOptions.get("-s").getString());
            System.exit(1);
        }
        if (!parseOptions.get("-q").getString().isEmpty() && !new File(parseOptions.get("-q").getString()).exists()) {
            System.out.println("Could not find requested file/folder: " + parseOptions.get("-q").getString());
            System.exit(1);
        }
        if (!parseOptions.get("-f").getString().isEmpty() && !new File(parseOptions.get("-f").getString()).exists()) {
            System.out.println("Could not find requested file/folder: " + parseOptions.get("-f").getString());
            System.exit(1);
        }
        if (!parseOptions.get("--valid-kmers").getString().isEmpty() && !new File(parseOptions.get("--valid-kmers").getString()).exists()) {
            System.out.println("Could not find requested file/folder: " + parseOptions.get("--valid-kmers").getString());
            System.exit(1);
        }
        if (parseOptions.get("--num-threads").getInteger() <= 0) {
            System.out.println("Number of threads must be positive.");
            System.exit(1);
        }
        if (parseOptions.get("-k").getInteger() <= 0) {
            System.out.println("k-mer size must be positive.");
            System.exit(1);
        }
        if (parseOptions.get("--num-min-matches").getInteger() <= 0) {
            System.out.println("Minimum number of matches must be positive.");
            System.exit(1);
        }
        if (parseOptions.get("--min-store-length").getInteger() < 0) {
            System.out.println("The minimum read length stored must be >=0.");
            System.exit(1);
        }
        if (parseOptions.get("--max-shift").getDouble() < -1.0) {
            System.out.println("The minimum shift must be greater than -1.");
            System.exit(1);
        }
        if (parseOptions.get("--threshold").getDouble() < 0.0) {
            System.out.println("The second stage filter cutoff must be >=0.");
            System.exit(1);
        }
        SequenceId.STORE_FULL_ID = parseOptions.get("--store-full-id").getBoolean();
        System.err.println("Running with these settings:");
        System.err.println("Version = " + PackageInfo.VERSION);
        System.err.println("Build time = " + PackageInfo.BUILD_TIME);
        System.err.println(parseOptions);
        boolean bl = parseOptions.get("--generate-kmers-from-bitvector").getBoolean();
        if (bl) {
            ValidBitVectorsFileBuilder validBitVectorsFileBuilder = new ValidBitVectorsFileBuilder();
            String string = parseOptions.get("--valid-kmers").getString();
            String string2 = string + ".bitvector";
            int n = parseOptions.get("-k").getInteger();
            System.err.println("Running the creation of the kmers file for tests.");
            validBitVectorsFileBuilder.createKmersFileFromBinaryBitVectors(string, string2, n);
        } else {
            MhapMain mhapMain = new MhapMain(parseOptions);
            mhapMain.computeMain();
        }
    }

    public MhapMain(ParseOptions parseOptions) throws Exception {
        this.processFile = parseOptions.get("-p").getString();
        this.inFile = parseOptions.get("-s").getString();
        this.toFile = parseOptions.get("-q").getString();
        this.noSelf = parseOptions.get("--no-self").getBoolean();
        this.numThreads = parseOptions.get("--num-threads").getInteger();
        this.subSequenceSize = parseOptions.get("--max-seq-size").getInteger();
        this.numHashes = parseOptions.get("--num-hashes").getInteger();
        this.kmerSize = parseOptions.get("-k").getInteger();
        this.numMinMatches = parseOptions.get("--num-min-matches").getInteger();
        this.minStoreLength = parseOptions.get("--min-store-length").getInteger();
        this.maxShift = parseOptions.get("--max-shift").getDouble();
        this.acceptScore = parseOptions.get("--threshold").getDouble();
        this.weighted = parseOptions.get("--weighted").getBoolean();
        this.validKmersFile = parseOptions.get("--valid-kmers").getString();
        String string = this.validKmersFile + ".bitvector";
        File file = new File(string);
        System.out.println("Reading");
        if (!this.validKmersFile.isEmpty()) {
            this.setupBitVectorsFile(string, file);
        } else {
            validKmersHashes = null;
        }
        String string2 = parseOptions.get("-f").getString();
        if (!string2.isEmpty()) {
            long l = System.nanoTime();
            System.err.println("Reading in filter file " + string2 + ".");
            try {
                this.filter = Utils.createKmerFilter(string2, parseOptions.get("--filter-threshold").getDouble(), this.kmerSize, 0);
            }
            catch (Exception exception) {
                throw new MhapRuntimeException("Could not parse k-mer filter file.", exception);
            }
            System.err.println("Time (s) to read filter file: " + (double)(System.nanoTime() - l) * 1.0E-9);
            this.kmerCounter = null;
        } else {
            this.filter = null;
            this.kmerCounter = this.recordFastaKmerCounts(this.inFile, parseOptions.get("--filter-threshold").getDouble());
        }
    }

    public void setupBitVectorsFile(String string, File file) throws Exception {
        ValidBitVectorsFileBuilder validBitVectorsFileBuilder = new ValidBitVectorsFileBuilder();
        if (!file.exists()) {
            System.err.println("Reading valid kmer file " + this.validKmersFile + ".");
            Utils.createValidKmerFilter(this.validKmersFile, this.kmerSize, 0);
            validKmersHashes = Utils.getValidKmerHashes();
            validBitVectorsFileBuilder.createValidBitVectorsFile(string, validKmersHashes);
        } else {
            validKmersHashes = validBitVectorsFileBuilder.readValidBitVectorsFile(string);
        }
    }

    public KmerCounts recordFastaKmerCounts(String string, double d) throws IOException {
        System.err.println("Computing k-mer counts...");
        final FastaData fastaData = new FastaData(this.inFile, 0);
        final CountMin<Long> countMin = new CountMin<Long>(1.0E-5, 0.99999, 0);
        ExecutorService executorService = Executors.newFixedThreadPool(this.numThreads);
        final AtomicInteger atomicInteger = new AtomicInteger();
        for (int i = 0; i < this.numThreads; ++i) {
            Runnable runnable = new Runnable(){

                @Override
                public void run() {
                    try {
                        Sequence sequence = fastaData.dequeue();
                        while (sequence != null) {
                            long[] lArray;
                            for (long l : lArray = Utils.computeSequenceHashesLong(sequence.getString(), MhapMain.this.kmerSize, 0)) {
                                countMin.add(l);
                            }
                            for (long l : lArray = Utils.computeSequenceHashesLong(sequence.getReverseCompliment().getString(), MhapMain.this.kmerSize, 0)) {
                                countMin.add(l);
                            }
                            int n = atomicInteger.addAndGet(2);
                            if (n % 5000 == 0) {
                                System.err.println("Kmers counted for " + n + " sequences (including reverse compliment)...");
                            }
                            sequence = fastaData.dequeue();
                        }
                    }
                    catch (IOException iOException) {
                        throw new MhapRuntimeException(iOException);
                    }
                }
            };
            executorService.execute(runnable);
        }
        executorService.shutdown();
        try {
            executorService.awaitTermination(365L, TimeUnit.DAYS);
        }
        catch (InterruptedException interruptedException) {
            executorService.shutdownNow();
            throw new MhapRuntimeException("Unable to finish all tasks.");
        }
        System.err.println("Computed k-mer counts for " + atomicInteger.get() + " sequences.");
        return new KmerCounts(countMin, atomicInteger.get(), d);
    }

    public void computeMain() throws IOException {
        long l = System.nanoTime();
        long l2 = System.nanoTime();
        long l3 = System.nanoTime();
        if (this.processFile != null && !this.processFile.isEmpty()) {
            System.err.println("Processing FASTA files for binary compression...");
            File file = new File(this.processFile);
            if (!file.exists()) {
                throw new MhapRuntimeException("Process file does not exist.");
            }
            if (this.toFile == null || this.toFile.isEmpty()) {
                throw new MhapRuntimeException("Target directory must be defined.");
            }
            File file2 = new File(this.toFile);
            if (!file2.exists() || !file2.isDirectory()) {
                throw new MhapRuntimeException("Target directory doesn't exit.");
            }
            ArrayList<Object> arrayList = new ArrayList<Object>();
            if (!file.isDirectory()) {
                arrayList.add(file);
            } else {
                Object object;
                for (Object object2 : object = file.listFiles(new FilenameFilter(){

                    @Override
                    public boolean accept(File file, String string) {
                        return !string.startsWith(".");
                    }
                })) {
                    arrayList.add(object2);
                }
            }
            for (Object object : arrayList) {
                l2 = System.nanoTime();
                SequenceSketchStreamer sequenceSketchStreamer = this.getSequenceHashStreamer(((File)object).getAbsolutePath(), 0);
                String string = ((File)object).getName();
                int n = string.lastIndexOf(46);
                if (n > 0) {
                    string = string.substring(0, n);
                }
                string = file2.getPath() + File.separator + string + ".dat";
                sequenceSketchStreamer.writeToBinary(string, false, this.numThreads);
                System.err.println("Processed " + sequenceSketchStreamer.getNumberProcessed() + " sequences (fwd and rev).");
                System.err.println("Read, hashed, and stored file " + ((File)object).getPath() + " to " + string + ".");
                System.err.println("Time (s): " + (double)(System.nanoTime() - l2) * 1.0E-9);
            }
            System.err.println("Total time (s): " + (double)(System.nanoTime() - l) * 1.0E-9);
            return;
        }
        System.err.println("Processing files for storage in reverse index...");
        int n = 0;
        SequenceSketchStreamer sequenceSketchStreamer = this.getSequenceHashStreamer(this.inFile, n);
        MinHashSearch minHashSearch = this.getMatchSearch(sequenceSketchStreamer);
        n += sequenceSketchStreamer.getNumberProcessed() / 2;
        System.err.println("Processed " + sequenceSketchStreamer.getNumberProcessed() + " unique sequences (fwd and rev).");
        System.err.println("Time (s) to read and hash from file: " + (double)(System.nanoTime() - l3) * 1.0E-9);
        long l4 = System.nanoTime();
        if (this.toFile == null || this.toFile.isEmpty()) {
            l2 = System.nanoTime();
            minHashSearch.findMatches();
            System.err.println("Time (s) to score and output to self: " + (double)(System.nanoTime() - l2) * 1.0E-9);
        } else {
            File file = new File(this.toFile);
            if (!file.exists()) {
                throw new MhapRuntimeException("To-file does not exist.");
            }
            ArrayList<File> arrayList = new ArrayList<File>();
            if (!file.isDirectory()) {
                arrayList.add(file);
            } else {
                File[] object = file.listFiles(new FilenameFilter(){

                    @Override
                    public boolean accept(File file, String string) {
                        return !string.startsWith(".");
                    }
                });
                for (File file2 : object) {
                    arrayList.add(file2);
                }
            }
            Collections.sort(arrayList);
            l2 = System.nanoTime();
            if (!this.noSelf) {
                minHashSearch.findMatches();
                System.out.flush();
                System.err.println("Time (s) to score and output to self: " + (double)(System.nanoTime() - l2) * 1.0E-9);
            }
            for (File file3 : arrayList) {
                sequenceSketchStreamer = this.getSequenceHashStreamer(file3.getAbsolutePath(), n);
                System.err.println("Opened fasta file " + file3.getCanonicalPath() + ".");
                l2 = System.nanoTime();
                minHashSearch.findMatches(sequenceSketchStreamer);
                System.out.flush();
                n += sequenceSketchStreamer.getNumberProcessed();
                System.err.println("Processed " + sequenceSketchStreamer.getNumberProcessed() + " to sequences.");
                System.err.println("Time (s) to score, hash to-file, and output: " + (double)(System.nanoTime() - l2) * 1.0E-9);
            }
        }
        System.out.flush();
        System.err.println("Total scoring time (s): " + (double)(System.nanoTime() - l4) * 1.0E-9);
        System.err.println("Total time (s): " + (double)(System.nanoTime() - l) * 1.0E-9);
        this.outputFinalStat(minHashSearch);
    }

    public MinHashSearch getMatchSearch(SequenceSketchStreamer sequenceSketchStreamer) throws IOException {
        return new MinHashSearch(sequenceSketchStreamer, this.numHashes, this.numMinMatches, this.numThreads, false, this.minStoreLength, this.maxShift, this.acceptScore);
    }

    public SequenceSketchStreamer getSequenceHashStreamer(String string, int n) throws IOException {
        SequenceSketchStreamer sequenceSketchStreamer = string.endsWith(".dat") ? new SequenceSketchStreamer(string, n) : new SequenceSketchStreamer(string, this.kmerSize, this.numHashes, this.subSequenceSize, 12, this.filter, this.kmerCounter, this.weighted, n);
        return sequenceSketchStreamer;
    }

    protected void outputFinalStat(MinHashSearch minHashSearch) {
        System.err.println("MinHash search time (s): " + minHashSearch.getMinHashSearchTime());
        System.err.println("Total matches found: " + minHashSearch.getMatchesProcessed());
        System.err.println("Average number of matches per lookup: " + (double)minHashSearch.getMatchesProcessed() / (double)minHashSearch.getNumberSequencesSearched());
        System.err.println("Average number of table elements processed per lookup: " + (double)minHashSearch.getNumberElementsProcessed() / (double)minHashSearch.getNumberSequencesSearched());
        System.err.println("Average number of table elements processed per match: " + (double)minHashSearch.getNumberElementsProcessed() / (double)minHashSearch.getMatchesProcessed());
        System.err.println("Average % of hashed sequences hit per lookup: " + (double)minHashSearch.getNumberSequencesHit() / (double)((long)minHashSearch.size() * minHashSearch.getNumberSequencesSearched()) * 100.0);
        System.err.println("Average % of hashed sequences hit that are matches: " + (double)minHashSearch.getMatchesProcessed() / (double)minHashSearch.getNumberSequencesHit() * 100.0);
        System.err.println("Average % of hashed sequences fully compared that are matches: " + (double)minHashSearch.getMatchesProcessed() / (double)minHashSearch.getNumberSequencesFullyCompared() * 100.0);
        System.err.flush();
    }

    public static OpenBitSet getValidKmersHashes() {
        return validKmersHashes;
    }

    public static ParseOptions getOptions() {
        return options;
    }

    public static void setOptions(ParseOptions parseOptions) {
        options = parseOptions;
    }

    static {
        DEFAULT_NUM_THREADS = Runtime.getRuntime().availableProcessors() * 2;
    }
}

