/*
 * Decompiled with CFR 0.152.
 */
package jsat.text.wordweighting;

import java.util.List;
import jsat.linear.Vec;
import jsat.text.wordweighting.WordWeighting;

public class TfIdf
extends WordWeighting {
    private static final long serialVersionUID = 5749882005002311735L;
    private double totalDocuments;
    private List<Integer> df;
    private double docMax = 0.0;
    private TermFrequencyWeight tfWeighting;

    public TfIdf() {
        this(TermFrequencyWeight.LOG);
    }

    public TfIdf(TermFrequencyWeight tfWeighting) {
        this.tfWeighting = tfWeighting;
    }

    @Override
    public void setWeight(List<? extends Vec> allDocuments, List<Integer> df) {
        this.totalDocuments = allDocuments.size();
        this.df = df;
    }

    @Override
    public double indexFunc(double value, int index) {
        double tf;
        if (index < 0 || value == 0.0) {
            return 0.0;
        }
        switch (this.tfWeighting) {
            case BOOLEAN: {
                tf = 1.0;
                break;
            }
            case LOG: {
                tf = 1.0 + Math.log(value);
                break;
            }
            case DOC_NORMALIZED: {
                tf = value / this.docMax;
                break;
            }
            default: {
                tf = value;
            }
        }
        double idf = Math.log(this.totalDocuments / (double)this.df.get(index).intValue());
        return tf * idf;
    }

    @Override
    public void applyTo(Vec vec) {
        if (this.df == null) {
            throw new RuntimeException("TF-IDF weightings haven't been initialized, setWeight method must be called before first use.");
        }
        if (this.tfWeighting == TermFrequencyWeight.DOC_NORMALIZED) {
            this.docMax = vec.max();
        }
        vec.applyIndexFunction(this);
    }

    public static enum TermFrequencyWeight {
        BOOLEAN,
        LOG,
        DOC_NORMALIZED;

    }
}

