001/* 002 * Java Genetic Algorithm Library (jenetics-8.1.0). 003 * Copyright (c) 2007-2024 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.prog.regression; 021 022import static java.lang.Math.pow; 023import static java.lang.String.format; 024import static java.util.Objects.requireNonNull; 025 026import java.util.ArrayList; 027import java.util.List; 028import java.util.function.Function; 029import java.util.function.Predicate; 030import java.util.stream.Collectors; 031 032import io.jenetics.Genotype; 033import io.jenetics.engine.Codec; 034import io.jenetics.engine.Problem; 035import io.jenetics.util.ISeq; 036 037import io.jenetics.ext.util.Tree; 038 039import io.jenetics.prog.ProgramChromosome; 040import io.jenetics.prog.ProgramGene; 041import io.jenetics.prog.op.Op; 042import io.jenetics.prog.regression.Sampling.Result; 043 044/** 045 * This class implements a <em>symbolic</em> regression problem. The example 046 * below shows a typical usage of the {@code Regression} class. 047 * 048 * {@snippet lang="java": 049 * public class SymbolicRegression { 050 * private static final ISeq<Op<Double>> OPERATIONS = 051 * ISeq.of(MathOp.ADD, MathOp.SUB, MathOp.MUL); 052 * 053 * private static final ISeq<Op<Double>> TERMINALS = ISeq.of( 054 * Var.of("x", 0), 055 * EphemeralConst.of(() -> (double)RandomRegistry.random().nextInt(10)) 056 * ); 057 * 058 * private static final Regression<Double> REGRESSION = Regression.of( 059 * Regression.codecOf(OPERATIONS, TERMINALS, 5), 060 * Error.of(LossFunction::mse), 061 * Sample.ofDouble(-1.0, -8.0000), 062 * // ... 063 * Sample.ofDouble(0.9, 1.3860), 064 * Sample.ofDouble(1.0, 2.0000) 065 * ); 066 * 067 * public static void main(final String[] args) { 068 * final Engine<ProgramGene<Double>, Double> engine = Engine 069 * .builder(REGRESSION) 070 * .minimizing() 071 * .alterers( 072 * new SingleNodeCrossover<>(0.1), 073 * new Mutator<>()) 074 * .build(); 075 * 076 * final EvolutionResult<ProgramGene<Double>, Double> result = engine.stream() 077 * .limit(Limits.byFitnessThreshold(0.01)) 078 * .collect(EvolutionResult.toBestEvolutionResult()); 079 * 080 * final ProgramGene<Double> program = result.bestPhenotype() 081 * .genotype() 082 * .gene(); 083 * 084 * final TreeNode<Op<Double>> tree = program.toTreeNode(); 085 * MathExpr.rewrite(tree); // Simplify result program. 086 * System.out.println("Generations: " + result.totalGenerations()); 087 * System.out.println("Function: " + new MathExpr(tree)); 088 * System.out.println("Error: " + REGRESSION.error(tree)); 089 * } 090 * } 091 * } 092 * 093 * @see SampleBuffer 094 * @see Sampling 095 * 096 * @param <T> the operation type 097 * 098 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 099 * @version 6.0 100 * @since 5.0 101 */ 102public final class Regression<T> 103 implements Problem<Tree<Op<T>, ?>, ProgramGene<T>, Double> 104{ 105 106 private final Codec<Tree<Op<T>, ?>, ProgramGene<T>> _codec; 107 private final Error<T> _error; 108 private final Sampling<T> _sampling; 109 110 111 /** 112 * Create a new <em>symbolic</em> regression problem with the given data. 113 * 114 * @param codec the codec used for the problem 115 * @param error the error function 116 * @param sampling the sample values used for finding a regression. 117 */ 118 private Regression( 119 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec, 120 final Error<T> error, 121 final Sampling<T> sampling 122 ) { 123 _codec = requireNonNull(codec); 124 _error = requireNonNull(error); 125 _sampling = requireNonNull(sampling); 126 } 127 128 @Override 129 public Function<Tree<Op<T>, ?>, Double> fitness() { 130 return this::error; 131 } 132 133 @Override 134 public Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec() { 135 return _codec; 136 } 137 138 /** 139 * Calculates the actual error for the given {@code program}. 140 * 141 * @param program the program to calculate the error value for 142 * @return the overall error value of the program 143 */ 144 public double error(final Tree<? extends Op<T>, ?> program) { 145 final Result<T> result = _sampling.eval(program); 146 return result != null 147 ? _error.apply(program, result.calculated(), result.expected()) 148 : Double.MAX_VALUE; 149 } 150 151 /* ************************************************************************* 152 * Factory methods. 153 * ************************************************************************/ 154 155 /** 156 * Create a new regression problem instance with the given parameters. 157 * 158 * @see #codecOf(ISeq, ISeq, int) 159 * @see #codecOf(ISeq, ISeq, int, Predicate) 160 * 161 * @param <T> the operation type 162 * @param codec the problem codec to use 163 * @param error the error function 164 * @param sampling the sampling function 165 * @return a new regression problem instance 166 * @throws NullPointerException if on of the arguments is {@code null} 167 */ 168 public static <T> Regression<T> of( 169 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec, 170 final Error<T> error, 171 final Sampling<T> sampling 172 ) { 173 return new Regression<>(codec, error, sampling); 174 } 175 176 /** 177 * Create a new regression problem instance with the given parameters. 178 * 179 * @see #codecOf(ISeq, ISeq, int) 180 * @see #codecOf(ISeq, ISeq, int, Predicate) 181 * 182 * @param <T> the operation type 183 * @param codec the problem codec to use 184 * @param error the error function 185 * @param samples the sample points used for regression analysis 186 * @return a new regression problem instance 187 * @throws IllegalArgumentException if the given {@code samples} is empty 188 * @throws NullPointerException if one of the arguments is {@code null} 189 */ 190 public static <T> Regression<T> of( 191 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec, 192 final Error<T> error, 193 final Iterable<? extends Sample<T>> samples 194 ) { 195 if (!samples.iterator().hasNext()) { 196 throw new IllegalArgumentException("Sample list must not be empty."); 197 } 198 199 final List<Sample<T>> s = new ArrayList<>(); 200 samples.forEach(s::add); 201 202 return new Regression<>(codec, error, new SampleList<>(s)); 203 } 204 205 /** 206 * Create a new regression problem instance with the given parameters. 207 * 208 * @see #codecOf(ISeq, ISeq, int) 209 * @see #codecOf(ISeq, ISeq, int, Predicate) 210 * 211 * @param <T> the operation type 212 * @param codec the problem codec to use 213 * @param error the error function 214 * @param samples the sample points used for regression analysis 215 * @return a new regression problem instance 216 * @throws IllegalArgumentException if the given {@code samples} is empty 217 * @throws NullPointerException if one of the arguments is {@code null} 218 */ 219 @SafeVarargs 220 public static <T> Regression<T> of( 221 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec, 222 final Error<T> error, 223 final Sample<T>... samples 224 ) { 225 return of(codec, error, List.of(samples)); 226 } 227 228 229 /* ************************************************************************* 230 * Codec factory methods. 231 * ************************************************************************/ 232 233 /** 234 * Create a new <em>codec</em>, usable for <em>symbolic regression</em> 235 * problems, with the given parameters. 236 * 237 * @param <T> the operation type 238 * @param operations the operations used for the symbolic regression 239 * @param terminals the terminal operations of the program tree 240 * @param depth the maximal tree depth (height) of newly created program 241 * trees 242 * @param validator the chromosome validator. A typical validator would 243 * check the size of the tree and if the tree is too large, mark it 244 * at <em>invalid</em>. The <em>validator</em> may be {@code null}. 245 * @return a new codec, usable for symbolic regression 246 * @throws IllegalArgumentException if the tree {@code depth} is not in the 247 * valid range of {@code [0, 30)} 248 * @throws NullPointerException if the {@code operations} or {@code terminals} 249 * are {@code null} 250 */ 251 public static <T> Codec<Tree<Op<T>, ?>, ProgramGene<T>> 252 codecOf( 253 final ISeq<Op<T>> operations, 254 final ISeq<Op<T>> terminals, 255 final int depth, 256 final Predicate<? super ProgramChromosome<T>> validator 257 ) { 258 if (depth >= 30 || depth < 0) { 259 throw new IllegalArgumentException(format( 260 "Tree depth out of range [0, 30): %d", depth 261 )); 262 } 263 264 return Codec.of( 265 Genotype.of( 266 ProgramChromosome.of( 267 depth, 268 validator, 269 operations, 270 terminals 271 ) 272 ), 273 Genotype::gene 274 ); 275 } 276 277 /** 278 * Create a new <em>codec</em>, usable for <em>symbolic regression</em> 279 * problems, with the given parameters. 280 * 281 * @param <T> the operation type 282 * @param operations the operations used for the symbolic regression 283 * @param terminals the terminal operations of the program tree 284 * @param depth the maximal tree depth (height) of newly created program 285 * trees 286 * @return a new codec, usable for symbolic regression 287 * @throws IllegalArgumentException if the tree {@code depth} is not in the 288 * valid range of {@code [0, 30)} 289 * @throws NullPointerException if the {@code operations} or {@code terminals} 290 * are {@code null} 291 */ 292 public static <T> Codec<Tree<Op<T>, ?>, ProgramGene<T>> 293 codecOf( 294 final ISeq<Op<T>> operations, 295 final ISeq<Op<T>> terminals, 296 final int depth 297 ) { 298 // Average arity of tree nodes. 299 final double k = operations.stream() 300 .collect(Collectors.averagingDouble(Op::arity)); 301 302 // The average node count between treeDepth and treeDepth + 1. 303 // 2^(k + 1) - 1 + 2^(k + 2) - 1)/2 == 3*2^k - 1 304 final int max = (int)(3*pow(k, depth) - 1); 305 306 return codecOf( 307 operations, 308 terminals, 309 depth, 310 ch -> ch.root().size() <= max 311 ); 312 } 313 314}