001 /*
002 * Java Genetic Algorithm Library (jenetics-5.1.0).
003 * Copyright (c) 2007-2019 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.prog.regression;
021
022 import static java.lang.Math.pow;
023 import static java.lang.String.format;
024 import static java.util.Objects.requireNonNull;
025
026 import java.lang.reflect.Array;
027 import java.util.ArrayList;
028 import java.util.Arrays;
029 import java.util.List;
030 import java.util.function.Function;
031 import java.util.function.Predicate;
032 import java.util.stream.Collectors;
033 import java.util.stream.Stream;
034
035 import io.jenetics.Genotype;
036 import io.jenetics.engine.Codec;
037 import io.jenetics.engine.Problem;
038 import io.jenetics.util.ISeq;
039
040 import io.jenetics.ext.util.Tree;
041
042 import io.jenetics.prog.ProgramChromosome;
043 import io.jenetics.prog.ProgramGene;
044 import io.jenetics.prog.op.Op;
045 import io.jenetics.prog.op.Program;
046
047 /**
048 * This class implements a <em>symbolic</em> regression problem. The example
049 * below shows a typical usage of the {@code Regression} class.
050 *
051 * <pre>{@code
052 * public class SymbolicRegression {
053 * private static final ISeq<Op<Double>> OPERATIONS =
054 * ISeq.of(MathOp.ADD, MathOp.SUB, MathOp.MUL);
055 *
056 * private static final ISeq<Op<Double>> TERMINALS = ISeq.of(
057 * Var.of("x", 0),
058 * EphemeralConst.of(() -> (double)RandomRegistry.getRandom().nextInt(10))
059 * );
060 *
061 * private static final Regression<Double> REGRESSION = Regression.of(
062 * Regression.codecOf(OPERATIONS, TERMINALS, 5),
063 * Error.of(LossFunction::mse),
064 * Sample.ofDouble(-1.0, -8.0000),
065 * // ...
066 * Sample.ofDouble(0.9, 1.3860),
067 * Sample.ofDouble(1.0, 2.0000)
068 * );
069 *
070 * public static void main(final String[] args) {
071 * final Engine<ProgramGene<Double>, Double> engine = Engine
072 * .builder(REGRESSION)
073 * .minimizing()
074 * .alterers(
075 * new SingleNodeCrossover<>(0.1),
076 * new Mutator<>())
077 * .build();
078 *
079 * final EvolutionResult<ProgramGene<Double>, Double> result = engine.stream()
080 * .limit(Limits.byFitnessThreshold(0.01))
081 * .collect(EvolutionResult.toBestEvolutionResult());
082 *
083 * final ProgramGene<Double> program = result.getBestPhenotype()
084 * .getGenotype()
085 * .getGene();
086 *
087 * final TreeNode<Op<Double>> tree = program.toTreeNode();
088 * MathExpr.rewrite(tree); // Simplify result program.
089 * System.out.println("Generations: " + result.getTotalGenerations());
090 * System.out.println("Function: " + new MathExpr(tree));
091 * System.out.println("Error: " + REGRESSION.error(tree));
092 * }
093 * }
094 * }</pre>
095 *
096 * @param <T> the operation type
097 *
098 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
099 * @version 5.0
100 * @since 5.0
101 */
102 public final class Regression<T>
103 implements Problem<Tree<Op<T>, ?>, ProgramGene<T>, Double>
104 {
105
106 private final Codec<Tree<Op<T>, ?>, ProgramGene<T>> _codec;
107 private final Error<T> _error;
108 private final Samples<T> _samples;
109
110
111 /**
112 * Create a new <em>symbolic</em> regression problem with the given data.
113 *
114 * @param codec the codec used for the for the problem
115 * @param error the error function
116 * @param samples the sample values used for finding a regression. Since
117 * the samples are given via a <em>supplier</em> they can be changed
118 * during the evolution process
119 */
120 private Regression(
121 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec,
122 final Error<T> error,
123 final Samples<T> samples
124 ) {
125 _codec = requireNonNull(codec);
126 _error = requireNonNull(error);
127 _samples = requireNonNull(samples);
128 }
129
130 @Override
131 public Function<Tree<Op<T>, ?>, Double> fitness() {
132 return this::error;
133 }
134
135 @Override
136 public Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec() {
137 return _codec;
138 }
139
140 /**
141 * Return an immutable list of the symbolic regression's points.
142 *
143 * @return the sample points
144 */
145 public List<Sample<T>> samples() {
146 return _samples;
147 }
148
149 /**
150 * Calculates the actual error for the given {@code program}.
151 *
152 * @param program the program to calculate the error value for
153 * @return the overall error value of the program
154 */
155 public double error(final Tree<Op<T>, ?> program) {
156 @SuppressWarnings("unchecked")
157 final T[] calculated = Stream.of(_samples.arguments())
158 .map(args -> Program.eval(program, args))
159 .toArray(size -> (T[])Array.newInstance(_samples.type(), size));
160
161 return _error.apply(program, calculated, _samples.results());
162 }
163
164 /* *************************************************************************
165 * Factory methods.
166 * ************************************************************************/
167
168 /**
169 * Create a new regression problem instance with the given parameters.
170 *
171 * @see #codecOf(ISeq, ISeq, int)
172 * @see #codecOf(ISeq, ISeq, int, Predicate)
173 *
174 * @param <T> the operation type
175 * @param codec the problem codec to use
176 * @param error the error function
177 * @param samples the sample points used for regression analysis
178 * @return a new regression problem instance
179 * @throws IllegalArgumentException if the given {@code samples} is empty
180 * @throws NullPointerException if on of the arguments is {@code null}
181 */
182 public static <T> Regression<T> of(
183 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec,
184 final Error<T> error,
185 final Iterable<Sample<T>> samples
186 ) {
187 if (!samples.iterator().hasNext()) {
188 throw new IllegalArgumentException("Sample list must not be empty.");
189 }
190
191 final List<Sample<T>> s = new ArrayList<>();
192 samples.forEach(s::add);
193
194 return new Regression<>(codec, error, new Samples<>(s));
195 }
196
197 /**
198 * Create a new regression problem instance with the given parameters.
199 *
200 * @see #codecOf(ISeq, ISeq, int)
201 * @see #codecOf(ISeq, ISeq, int, Predicate)
202 *
203 * @param <T> the operation type
204 * @param codec the problem codec to use
205 * @param error the error function
206 * @param samples the sample points used for regression analysis
207 * @return a new regression problem instance
208 * @throws IllegalArgumentException if the given {@code samples} is empty
209 * @throws NullPointerException if on of the arguments is {@code null}
210 */
211 @SafeVarargs
212 public static <T> Regression<T> of(
213 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec,
214 final Error<T> error,
215 final Sample<T>... samples
216 ) {
217 return of(codec, error, Arrays.asList(samples));
218 }
219
220
221 /* *************************************************************************
222 * Codec factory methods.
223 * ************************************************************************/
224
225 /**
226 * Create a new <em>codec</em>, usable for <em>symbolic regression</em>
227 * problems, with the given parameters.
228 *
229 * @param <T> the operation type
230 * @param operations the operations used for the symbolic regression
231 * @param terminals the terminal operations of the program tree
232 * @param depth the maximal tree depth (height) of newly created program
233 * trees
234 * @param validator the chromosome validator. A typical validator would
235 * check the size of the tree and if the tree is too large, mark it
236 * at <em>invalid</em>. The <em>validator</em> may be {@code null}.
237 * @return a new codec, usable for symbolic regression
238 * @throws IllegalArgumentException if the tree {@code depth} is not in the
239 * valid range of {@code [0, 30)}
240 * @throws NullPointerException if the {@code operations} or {@code terminals}
241 * are {@code null}
242 */
243 public static <T> Codec<Tree<Op<T>, ?>, ProgramGene<T>>
244 codecOf(
245 final ISeq<Op<T>> operations,
246 final ISeq<Op<T>> terminals,
247 final int depth,
248 final Predicate<? super ProgramChromosome<T>> validator
249 ) {
250 if (depth >= 30 || depth < 0) {
251 throw new IllegalArgumentException(format(
252 "Tree depth out of range [0, 30): %d", depth
253 ));
254 }
255
256 return Codec.of(
257 Genotype.of(ProgramChromosome.of(
258 depth,
259 validator,
260 operations,
261 terminals
262 )),
263 Genotype::getGene
264 );
265 }
266
267 /**
268 * Create a new <em>codec</em>, usable for <em>symbolic regression</em>
269 * problems, with the given parameters.
270 *
271 * @param <T> the operation type
272 * @param operations the operations used for the symbolic regression
273 * @param terminals the terminal operations of the program tree
274 * @param depth the maximal tree depth (height) of newly created program
275 * trees
276 * @return a new codec, usable for symbolic regression
277 * @throws IllegalArgumentException if the tree {@code depth} is not in the
278 * valid range of {@code [0, 30)}
279 * @throws NullPointerException if the {@code operations} or {@code terminals}
280 * are {@code null}
281 */
282 public static <T> Codec<Tree<Op<T>, ?>, ProgramGene<T>>
283 codecOf(
284 final ISeq<Op<T>> operations,
285 final ISeq<Op<T>> terminals,
286 final int depth
287 ) {
288 // Average arity of tree nodes.
289 final double k = operations.stream()
290 .collect(Collectors.averagingDouble(Op::arity));
291
292 // The average node count between treeDepth and treeDepth + 1.
293 // 2^(k + 1) - 1 + 2^(k + 2) - 1)/2 == 3*2^k - 1
294 final int max = (int)(3*pow(k, depth) - 1);
295
296 return codecOf(
297 operations,
298 terminals,
299 depth,
300 ch -> ch.getRoot().size() <= max
301 );
302 }
303
304 }
|