001 package calhoun.analysis.crf.solver;
002
003 import java.util.Arrays;
004 import java.util.List;
005
006 import org.apache.commons.logging.Log;
007 import org.apache.commons.logging.LogFactory;
008
009 import calhoun.analysis.crf.CRFObjectiveFunctionGradient;
010 import calhoun.analysis.crf.CRFTraining;
011 import calhoun.analysis.crf.ModelManager;
012 import calhoun.analysis.crf.io.TrainingSequence;
013 import calhoun.util.ErrorException;
014 import cern.colt.matrix.impl.DenseDoubleMatrix1D;
015 import flanagan.math.Minimisation;
016 import flanagan.math.MinimisationFunction;
017
018 /** uses a nelder-mead algorithm (the simplex method) to do a general function optimization objective function.
019 * This optimization method does not use the gradient but requires many iterations and so is mainly useful in
020 * debugging. Use the {@link StandardOptimizer} for most problems.
021 * <p>
022 * This optimizer has several configuration properties that allow control over the optimization process:
023 * <ul>
024 * <li> <b><code>maxIters</code></b> - The maximum number of iterations (objective function evaluations) to attempt
025 * <li> <b><code>requireConverge</code></b> - if true, throws an error if convergence is not reached in the maximum number
026 * of iterations. Otherwise, the current feature weights are returned when <code>maxIters</code> is reached.
027 * <li> <b><code>stepSize</code></b> - the size of the initial changes made to the weights when exploring the objective function.
028 * <li> <b><code>starts</code></b> - an initial set of guesses at feature weights. Defaults to 1.0
029 * </ul>
030 * */
031 public class SimplexOptimizer implements CRFTraining {
032 @SuppressWarnings("unused")
033 private static final Log log = LogFactory.getLog(SimplexOptimizer.class);
034
035 // Configuration
036 CRFObjectiveFunctionGradient gradFunc;
037 int maxIters = 500;
038 double stepSize = 0.5;
039 boolean requireConvergence = true;
040 double[] starts = null;
041
042 public double[] optimize(ModelManager fm, List<? extends TrainingSequence<?>> data) {
043 gradFunc.setTrainingData(fm, data);
044 final int nFeatures = fm.getNumFeatures();
045 MinimisationFunction mFunc = new MinimisationFunction() {
046 double[] grad = new double[nFeatures];
047
048 public double function(double[] d) {
049 return -gradFunc.apply(d, grad);
050 }
051 };
052
053 Minimisation m = new Minimisation();
054 DenseDoubleMatrix1D steps = new DenseDoubleMatrix1D(nFeatures);
055 m.setNmax(maxIters);
056 if(starts == null) {
057 starts = new double[nFeatures];
058 Arrays.fill(starts, 1.0);
059 }
060 steps.assign(stepSize);
061 m.nelderMead(mFunc, starts, steps.toArray());
062 if(requireConvergence && !m.getConvStatus()) {
063 throw new ErrorException("Convergence not reached.");
064 }
065 //m.print("test/working/nelder.txt");
066 return m.getParamValues();
067 }
068
069 /** returns the configured objective function gradient which will be
070 * used by the optimizer during the training process.
071 * @return the configured objective function gradient
072 */
073 public CRFObjectiveFunctionGradient getObjectiveFunction() {
074 return gradFunc;
075 }
076
077 /** sets the objective function gradient. Called automatically during configuration. */
078 public void setObjectiveFunction(CRFObjectiveFunctionGradient objectiveFunction) {
079 this.gradFunc = objectiveFunction;
080 }
081
082 public int getMaxIters() {
083 return maxIters;
084 }
085
086 public void setMaxIters(int maxIters) {
087 this.maxIters = maxIters;
088 }
089
090 public boolean isRequireConvergence() {
091 return requireConvergence;
092 }
093
094 public void setRequireConvergence(boolean requireConvergence) {
095 this.requireConvergence = requireConvergence;
096 }
097
098 public double[] getStarts() {
099 return starts;
100 }
101
102 public void setStarts(double[] starts) {
103 this.starts = starts;
104 }
105
106 public double getStepSize() {
107 return stepSize;
108 }
109
110 public void setStepSize(double stepSize) {
111 this.stepSize = stepSize;
112 }
113 }