001/* 002 * Java Genetic Algorithm Library (jenetics-8.3.0). 003 * Copyright (c) 2007-2025 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.prog.regression; 021 022import static java.lang.Math.pow; 023import static java.lang.String.format; 024import static java.util.Objects.requireNonNull; 025 026import java.util.ArrayList; 027import java.util.List; 028import java.util.function.Function; 029import java.util.function.Predicate; 030import java.util.stream.Collectors; 031 032import io.jenetics.Genotype; 033import io.jenetics.engine.Codec; 034import io.jenetics.engine.Problem; 035import io.jenetics.util.ISeq; 036 037import io.jenetics.ext.util.Tree; 038 039import io.jenetics.prog.ProgramChromosome; 040import io.jenetics.prog.ProgramGene; 041import io.jenetics.prog.op.Op; 042import io.jenetics.prog.regression.Sampling.Result; 043 044/** 045 * This class implements a <em>symbolic</em> regression problem. The example 046 * below shows a typical usage of the {@code Regression} class. 047 * {@snippet lang="java": 048 * public class SymbolicRegression { 049 * private static final ISeq<Op<Double>> OPERATIONS = 050 * ISeq.of(MathOp.ADD, MathOp.SUB, MathOp.MUL); 051 * 052 * private static final ISeq<Op<Double>> TERMINALS = ISeq.of( 053 * Var.of("x", 0), 054 * EphemeralConst.of(() -> (double)RandomRegistry.random().nextInt(10)) 055 * ); 056 * 057 * private static final Regression<Double> REGRESSION = Regression.of( 058 * Regression.codecOf(OPERATIONS, TERMINALS, 5), 059 * Error.of(LossFunction::mse), 060 * Sample.ofDouble(-1.0, -8.0000), 061 * // ... 062 * Sample.ofDouble(0.9, 1.3860), 063 * Sample.ofDouble(1.0, 2.0000) 064 * ); 065 * 066 * public static void main(final String[] args) { 067 * final Engine<ProgramGene<Double>, Double> engine = Engine 068 * .builder(REGRESSION) 069 * .minimizing() 070 * .alterers( 071 * new SingleNodeCrossover<>(0.1), 072 * new Mutator<>()) 073 * .build(); 074 * 075 * final EvolutionResult<ProgramGene<Double>, Double> result = engine.stream() 076 * .limit(Limits.byFitnessThreshold(0.01)) 077 * .collect(EvolutionResult.toBestEvolutionResult()); 078 * 079 * final ProgramGene<Double> program = result.bestPhenotype() 080 * .genotype() 081 * .gene(); 082 * 083 * final TreeNode<Op<Double>> tree = program.toTreeNode(); 084 * MathExpr.rewrite(tree); // Simplify result program. 085 * System.out.println("Generations: " + result.totalGenerations()); 086 * System.out.println("Function: " + new MathExpr(tree)); 087 * System.out.println("Error: " + REGRESSION.error(tree)); 088 * } 089 * } 090 * } 091 * 092 * @see SampleBuffer 093 * @see Sampling 094 * 095 * @param <T> the operation type 096 * 097 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 098 * @version 6.0 099 * @since 5.0 100 */ 101public final class Regression<T> 102 implements Problem<Tree<Op<T>, ?>, ProgramGene<T>, Double> 103{ 104 105 private final Codec<Tree<Op<T>, ?>, ProgramGene<T>> _codec; 106 private final Error<T> _error; 107 private final Sampling<T> _sampling; 108 109 110 /** 111 * Create a new <em>symbolic</em> regression problem with the given data. 112 * 113 * @param codec the codec used for the problem 114 * @param error the error function 115 * @param sampling the sample values used for finding a regression. 116 */ 117 private Regression( 118 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec, 119 final Error<T> error, 120 final Sampling<T> sampling 121 ) { 122 _codec = requireNonNull(codec); 123 _error = requireNonNull(error); 124 _sampling = requireNonNull(sampling); 125 } 126 127 @Override 128 public Function<Tree<Op<T>, ?>, Double> fitness() { 129 return this::error; 130 } 131 132 @Override 133 public Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec() { 134 return _codec; 135 } 136 137 /** 138 * Calculates the actual error for the given {@code program}. 139 * 140 * @param program the program to calculate the error value for 141 * @return the overall error value of the program 142 */ 143 public double error(final Tree<? extends Op<T>, ?> program) { 144 final Result<T> result = _sampling.eval(program); 145 return result != null 146 ? _error.apply(program, result.calculated(), result.expected()) 147 : Double.MAX_VALUE; 148 } 149 150 /* ************************************************************************* 151 * Factory methods. 152 * ************************************************************************/ 153 154 /** 155 * Create a new regression problem instance with the given parameters. 156 * 157 * @see #codecOf(ISeq, ISeq, int) 158 * @see #codecOf(ISeq, ISeq, int, Predicate) 159 * 160 * @param <T> the operation type 161 * @param codec the problem codec to use 162 * @param error the error function 163 * @param sampling the sampling function 164 * @return a new regression problem instance 165 * @throws NullPointerException if on of the arguments is {@code null} 166 */ 167 public static <T> Regression<T> of( 168 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec, 169 final Error<T> error, 170 final Sampling<T> sampling 171 ) { 172 return new Regression<>(codec, error, sampling); 173 } 174 175 /** 176 * Create a new regression problem instance with the given parameters. 177 * 178 * @see #codecOf(ISeq, ISeq, int) 179 * @see #codecOf(ISeq, ISeq, int, Predicate) 180 * 181 * @param <T> the operation type 182 * @param codec the problem codec to use 183 * @param error the error function 184 * @param samples the sample points used for regression analysis 185 * @return a new regression problem instance 186 * @throws IllegalArgumentException if the given {@code samples} is empty 187 * @throws NullPointerException if one of the arguments is {@code null} 188 */ 189 public static <T> Regression<T> of( 190 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec, 191 final Error<T> error, 192 final Iterable<? extends Sample<T>> samples 193 ) { 194 if (!samples.iterator().hasNext()) { 195 throw new IllegalArgumentException("Sample list must not be empty."); 196 } 197 198 final List<Sample<T>> s = new ArrayList<>(); 199 samples.forEach(s::add); 200 201 return new Regression<>(codec, error, new SampleList<>(s)); 202 } 203 204 /** 205 * Create a new regression problem instance with the given parameters. 206 * 207 * @see #codecOf(ISeq, ISeq, int) 208 * @see #codecOf(ISeq, ISeq, int, Predicate) 209 * 210 * @param <T> the operation type 211 * @param codec the problem codec to use 212 * @param error the error function 213 * @param samples the sample points used for regression analysis 214 * @return a new regression problem instance 215 * @throws IllegalArgumentException if the given {@code samples} is empty 216 * @throws NullPointerException if one of the arguments is {@code null} 217 */ 218 @SafeVarargs 219 public static <T> Regression<T> of( 220 final Codec<Tree<Op<T>, ?>, ProgramGene<T>> codec, 221 final Error<T> error, 222 final Sample<T>... samples 223 ) { 224 return of(codec, error, List.of(samples)); 225 } 226 227 228 /* ************************************************************************* 229 * Codec factory methods. 230 * ************************************************************************/ 231 232 /** 233 * Create a new <em>codec</em>, usable for <em>symbolic regression</em> 234 * problems, with the given parameters. 235 * 236 * @param <T> the operation type 237 * @param operations the operations used for the symbolic regression 238 * @param terminals the terminal operations of the program tree 239 * @param depth the maximal tree depth (height) of newly created program 240 * trees 241 * @param validator the chromosome validator. A typical validator would 242 * check the size of the tree and if the tree is too large, mark it 243 * at <em>invalid</em>. The <em>validator</em> may be {@code null}. 244 * @return a new codec, usable for symbolic regression 245 * @throws IllegalArgumentException if the tree {@code depth} is not in the 246 * valid range of {@code [0, 30)} 247 * @throws NullPointerException if the {@code operations} or {@code terminals} 248 * are {@code null} 249 */ 250 public static <T> Codec<Tree<Op<T>, ?>, ProgramGene<T>> 251 codecOf( 252 final ISeq<Op<T>> operations, 253 final ISeq<Op<T>> terminals, 254 final int depth, 255 final Predicate<? super ProgramChromosome<T>> validator 256 ) { 257 if (depth >= 30 || depth < 0) { 258 throw new IllegalArgumentException(format( 259 "Tree depth out of range [0, 30): %d", depth 260 )); 261 } 262 263 return Codec.of( 264 Genotype.of( 265 ProgramChromosome.of( 266 depth, 267 validator, 268 operations, 269 terminals 270 ) 271 ), 272 Genotype::gene 273 ); 274 } 275 276 /** 277 * Create a new <em>codec</em>, usable for <em>symbolic regression</em> 278 * problems, with the given parameters. 279 * 280 * @param <T> the operation type 281 * @param operations the operations used for the symbolic regression 282 * @param terminals the terminal operations of the program tree 283 * @param depth the maximal tree depth (height) of newly created program 284 * trees 285 * @return a new codec, usable for symbolic regression 286 * @throws IllegalArgumentException if the tree {@code depth} is not in the 287 * valid range of {@code [0, 30)} 288 * @throws NullPointerException if the {@code operations} or {@code terminals} 289 * are {@code null} 290 */ 291 public static <T> Codec<Tree<Op<T>, ?>, ProgramGene<T>> 292 codecOf( 293 final ISeq<Op<T>> operations, 294 final ISeq<Op<T>> terminals, 295 final int depth 296 ) { 297 // Average arity of tree nodes. 298 final double k = operations.stream() 299 .collect(Collectors.averagingDouble(Op::arity)); 300 301 // The average node count between treeDepth and treeDepth + 1. 302 // 2^(k + 1) - 1 + 2^(k + 2) - 1)/2 == 3*2^k - 1 303 final int max = (int)(3*pow(k, depth) - 1); 304 305 return codecOf( 306 operations, 307 terminals, 308 depth, 309 ch -> ch.root().size() <= max 310 ); 311 } 312 313}