/*
 * Decompiled with CFR 0.152.
 */
package jsat.text;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import jsat.linear.SparseVector;
import jsat.linear.Vec;
import jsat.text.TextVectorCreator;
import jsat.text.tokenizer.Tokenizer;
import jsat.text.wordweighting.WordWeighting;

public class BasicTextVectorCreator
implements TextVectorCreator {
    private static final long serialVersionUID = -8620485679300539556L;
    private final Tokenizer tokenizer;
    private final Map<String, Integer> wordIndex;
    private final WordWeighting weighting;

    public BasicTextVectorCreator(Tokenizer tokenizer, Map<String, Integer> wordIndex, WordWeighting weighting) {
        this.tokenizer = tokenizer;
        this.wordIndex = wordIndex;
        this.weighting = weighting;
    }

    @Override
    public Vec newText(String text) {
        return this.newText(text, new StringBuilder(), new ArrayList<String>());
    }

    @Override
    public Vec newText(String input, StringBuilder workSpace, List<String> storageSpace) {
        this.tokenizer.tokenize(input, workSpace, storageSpace);
        SparseVector vec = new SparseVector(this.wordIndex.size());
        for (String word : storageSpace) {
            if (!this.wordIndex.containsKey(word)) continue;
            int index = this.wordIndex.get(word);
            vec.increment(index, 1.0);
        }
        this.weighting.applyTo(vec);
        return vec;
    }
}

