001 /*
002 * Java Genetic Algorithm Library (jenetics-6.1.0).
003 * Copyright (c) 2007-2020 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.prog.regression;
021
022 import static java.lang.Math.pow;
023 import static java.lang.String.format;
024 import static java.util.Objects.requireNonNull;
025
026 import java.util.ArrayList;
027 import java.util.List;
028 import java.util.function.Function;
029 import java.util.function.Predicate;
030 import java.util.stream.Collectors;
031
032 import io.jenetics.Genotype;
033 import io.jenetics.engine.Codec;
034 import io.jenetics.engine.Problem;
035 import io.jenetics.util.ISeq;
036
037 import io.jenetics.ext.util.Tree;
038
039 import io.jenetics.prog.ProgramChromosome;
040 import io.jenetics.prog.ProgramGene;
041 import io.jenetics.prog.op.Op;
042 import io.jenetics.prog.regression.Sampling.Result;
043
044 /**
045 * This class implements a <em>symbolic</em> regression problem. The example
046 * below shows a typical usage of the {@code Regression} class.
047 *
048 * <pre>{@code
049 * public class SymbolicRegression {
050 * private static final ISeq<Op<Double>> OPERATIONS =
051 * ISeq.of(MathOp.ADD, MathOp.SUB, MathOp.MUL);
052 *
053 * private static final ISeq<Op<Double>> TERMINALS = ISeq.of(
054 * Var.of("x", 0),
055 * EphemeralConst.of(() -> (double)RandomRegistry.random().nextInt(10))
056 * );
057 *
058 * private static final Regression<Double> REGRESSION = Regression.of(
059 * Regression.codecOf(OPERATIONS, TERMINALS, 5),
060 * Error.of(LossFunction::mse),
061 * Sample.ofDouble(-1.0, -8.0000),
062 * // ...
063 * Sample.ofDouble(0.9, 1.3860),
064 * Sample.ofDouble(1.0, 2.0000)
065 * );
066 *
067 * public static void main(final String[] args) {
068 * final Engine<ProgramGene<Double>, Double> engine = Engine
069 * .builder(REGRESSION)
070 * .minimizing()
071 * .alterers(
072 * new SingleNodeCrossover<>(0.1),
073 * new Mutator<>())
074 * .build();
075 *
076 * final EvolutionResult<ProgramGene<Double>, Double> result = engine.stream()
077 * .limit(Limits.byFitnessThreshold(0.01))
078 * .collect(EvolutionResult.toBestEvolutionResult());
079 *
080 * final ProgramGene<Double> program = result.bestPhenotype()
081 * .genotype()
082 * .gene();
083 *
084 * final TreeNode<Op<Double>> tree = program.toTreeNode();
085 * MathExpr.rewrite(tree); // Simplify result program.
086 * System.out.println("Generations: " + result.totalGenerations());
087 * System.out.println("Function: " + new MathExpr(tree));
088 * System.out.println("Error: " + REGRESSION.error(tree));
089 * }
090 * }
091 * }</pre>
092 *
093 * @see SampleBuffer
094 * @see Sampling
095 *
096 * @param <T> the operation type
097 *
098 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
099 * @version 6.0
100 * @since 5.0
101 */
102 public final class Regression<T>
103 implements Problem<Tree<Op<T>, ?>, ProgramGene<T>, Double>
104 {
105
106 private final Codec<Tree<Op<T>, ?>, ProgramGene<T>> _codec;
107 private final Error<T> _error;
108 private final Sampling<T> _sampling;
109
110
111 /**
112 * Create a new <em>symbolic</em> regression problem with the given data.
113 *
114 * @param codec the codec used for the for the problem
115 * @param error the error function
116 * @param sampling the sample values used for finding a regression.
117 */
118 private Regression(
119 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec,
120 final Error<T> error,
121 final Sampling<T> sampling
122 ) {
123 _codec = requireNonNull(codec);
124 _error = requireNonNull(error);
125 _sampling = requireNonNull(sampling);
126 }
127
128 @Override
129 public Function<Tree<Op<T>, ?>, Double> fitness() {
130 return this::error;
131 }
132
133 @Override
134 public Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec() {
135 return _codec;
136 }
137
138 /**
139 * Calculates the actual error for the given {@code program}.
140 *
141 * @param program the program to calculate the error value for
142 * @return the overall error value of the program
143 */
144 public double error(final Tree<? extends Op<T>, ?> program) {
145 final Result<T> result = _sampling.eval(program);
146 return result != null
147 ? _error.apply(program, result.calculated(), result.expected())
148 : Double.MAX_VALUE;
149 }
150
151 /* *************************************************************************
152 * Factory methods.
153 * ************************************************************************/
154
155 /**
156 * Create a new regression problem instance with the given parameters.
157 *
158 * @see #codecOf(ISeq, ISeq, int)
159 * @see #codecOf(ISeq, ISeq, int, Predicate)
160 *
161 * @param <T> the operation type
162 * @param codec the problem codec to use
163 * @param error the error function
164 * @param sampling the sampling function
165 * @return a new regression problem instance
166 * @throws NullPointerException if on of the arguments is {@code null}
167 */
168 public static <T> Regression<T> of(
169 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec,
170 final Error<T> error,
171 final Sampling<T> sampling
172 ) {
173 return new Regression<>(codec, error, sampling);
174 }
175
176 /**
177 * Create a new regression problem instance with the given parameters.
178 *
179 * @see #codecOf(ISeq, ISeq, int)
180 * @see #codecOf(ISeq, ISeq, int, Predicate)
181 *
182 * @param <T> the operation type
183 * @param codec the problem codec to use
184 * @param error the error function
185 * @param samples the sample points used for regression analysis
186 * @return a new regression problem instance
187 * @throws IllegalArgumentException if the given {@code samples} is empty
188 * @throws NullPointerException if on of the arguments is {@code null}
189 */
190 public static <T> Regression<T> of(
191 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec,
192 final Error<T> error,
193 final Iterable<? extends Sample<T>> samples
194 ) {
195 if (!samples.iterator().hasNext()) {
196 throw new IllegalArgumentException("Sample list must not be empty.");
197 }
198
199 final List<Sample<T>> s = new ArrayList<>();
200 samples.forEach(s::add);
201
202 return new Regression<>(codec, error, new SampleList<>(s));
203 }
204
205 /**
206 * Create a new regression problem instance with the given parameters.
207 *
208 * @see #codecOf(ISeq, ISeq, int)
209 * @see #codecOf(ISeq, ISeq, int, Predicate)
210 *
211 * @param <T> the operation type
212 * @param codec the problem codec to use
213 * @param error the error function
214 * @param samples the sample points used for regression analysis
215 * @return a new regression problem instance
216 * @throws IllegalArgumentException if the given {@code samples} is empty
217 * @throws NullPointerException if on of the arguments is {@code null}
218 */
219 @SafeVarargs
220 public static <T> Regression<T> of(
221 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec,
222 final Error<T> error,
223 final Sample<T>... samples
224 ) {
225 return of(codec, error, List.of(samples));
226 }
227
228
229 /* *************************************************************************
230 * Codec factory methods.
231 * ************************************************************************/
232
233 /**
234 * Create a new <em>codec</em>, usable for <em>symbolic regression</em>
235 * problems, with the given parameters.
236 *
237 * @param <T> the operation type
238 * @param operations the operations used for the symbolic regression
239 * @param terminals the terminal operations of the program tree
240 * @param depth the maximal tree depth (height) of newly created program
241 * trees
242 * @param validator the chromosome validator. A typical validator would
243 * check the size of the tree and if the tree is too large, mark it
244 * at <em>invalid</em>. The <em>validator</em> may be {@code null}.
245 * @return a new codec, usable for symbolic regression
246 * @throws IllegalArgumentException if the tree {@code depth} is not in the
247 * valid range of {@code [0, 30)}
248 * @throws NullPointerException if the {@code operations} or {@code terminals}
249 * are {@code null}
250 */
251 public static <T> Codec<Tree<Op<T>, ?>, ProgramGene<T>>
252 codecOf(
253 final ISeq<Op<T>> operations,
254 final ISeq<Op<T>> terminals,
255 final int depth,
256 final Predicate<? super ProgramChromosome<T>> validator
257 ) {
258 if (depth >= 30 || depth < 0) {
259 throw new IllegalArgumentException(format(
260 "Tree depth out of range [0, 30): %d", depth
261 ));
262 }
263
264 return Codec.of(
265 Genotype.of(
266 ProgramChromosome.of(
267 depth,
268 validator,
269 operations,
270 terminals
271 )
272 ),
273 Genotype::gene
274 );
275 }
276
277 /**
278 * Create a new <em>codec</em>, usable for <em>symbolic regression</em>
279 * problems, with the given parameters.
280 *
281 * @param <T> the operation type
282 * @param operations the operations used for the symbolic regression
283 * @param terminals the terminal operations of the program tree
284 * @param depth the maximal tree depth (height) of newly created program
285 * trees
286 * @return a new codec, usable for symbolic regression
287 * @throws IllegalArgumentException if the tree {@code depth} is not in the
288 * valid range of {@code [0, 30)}
289 * @throws NullPointerException if the {@code operations} or {@code terminals}
290 * are {@code null}
291 */
292 public static <T> Codec<Tree<Op<T>, ?>, ProgramGene<T>>
293 codecOf(
294 final ISeq<Op<T>> operations,
295 final ISeq<Op<T>> terminals,
296 final int depth
297 ) {
298 // Average arity of tree nodes.
299 final double k = operations.stream()
300 .collect(Collectors.averagingDouble(Op::arity));
301
302 // The average node count between treeDepth and treeDepth + 1.
303 // 2^(k + 1) - 1 + 2^(k + 2) - 1)/2 == 3*2^k - 1
304 final int max = (int)(3*pow(k, depth) - 1);
305
306 return codecOf(
307 operations,
308 terminals,
309 depth,
310 ch -> ch.root().size() <= max
311 );
312 }
313
314 }
|