001/* 002 * Java Genetic Algorithm Library (jenetics-7.2.0). 003 * Copyright (c) 2007-2023 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.ext.grammar; 021 022import java.util.function.Function; 023 024import io.jenetics.BitChromosome; 025import io.jenetics.BitGene; 026import io.jenetics.Genotype; 027import io.jenetics.IntegerChromosome; 028import io.jenetics.IntegerGene; 029import io.jenetics.engine.Codec; 030import io.jenetics.util.IntRange; 031 032import io.jenetics.ext.grammar.Cfg.Rule; 033 034/** 035 * This class defines factories for different CFG ↔ Chromosome mappings 036 * (encodings). The classical mapping codec, with a bit-chromosome can be created 037 * in the following way. 038 * <pre>{@code 039 * final Cfg<String> cfg = ...; 040 * final Codec<List<Terminal<String>>, BitGene> codec = singleBitChromosomeMapper( 041 * cfg, 042 * 1000, 043 * index -> new SentenceGenerator<>(index, 1000) 044 * ); 045 * }</pre> 046 * This codec creates a mapping for the given grammar {@code cfg} and uses 047 * bit-chromosomes with length {@code 1000}. The result of the mapping will be a 048 * list of terminal symbols which has been created by the given 049 * {@link SentenceGenerator}. The sentence generator creates sentences with a 050 * maximal length of {@code 1000}. If no sentence could be created within this 051 * limit, an empty list of terminal symbols is returned. 052 * 053 * @see <a href="https://www.brinckerhoff.org/tmp/grammatica_evolution_ieee_tec_2001.pdf"> 054 * Grammatical Evolution</a> 055 * 056 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 057 * @since 7.1 058 * @version 7.1 059 */ 060public final class Mappers { 061 private Mappers() { 062 } 063 064 /** 065 * Return a classic mapping codec. It uses a bit-chromosome for creating the 066 * grammar results. The codons are created by dividing the chromosome in 067 * 8-bit junks, as described in <a href="https://www.brinckerhoff.org/tmp/grammatica_evolution_ieee_tec_2001.pdf"> 068 * Grammatical Evolution</a> by Michael O’Neill and Conor Ryan. 069 * 070 * <pre>{@code 071 * final Cfg<String> cfg = ...; 072 * final Codec<List<Terminal<String>>, BitGene> codec = singleBitChromosomeMapper( 073 * cfg, 074 * 1000, 075 * index -> new SentenceGenerator<>(index, 1000) 076 * ); 077 * }</pre> 078 * 079 * @see #singleIntegerChromosomeMapper(Cfg, IntRange, IntRange, Function) 080 * 081 * @param cfg the encoding grammar 082 * @param length the length of the bit-chromosome 083 * @param generator sentence generator function from a given 084 * {@link SymbolIndex} 085 * @param <T> the terminal token type of the grammar 086 * @param <R> the result type of the mapper 087 * @return a new mapping codec for the given {@code cfg} 088 */ 089 public static <T, R> Codec<R, BitGene> 090 singleBitChromosomeMapper( 091 final Cfg<? extends T> cfg, 092 final int length, 093 final Function<? super SymbolIndex, ? extends Generator<T, R>> generator 094 ) { 095 return Codec.of( 096 Genotype.of(BitChromosome.of(length)), 097 gt -> generator 098 .apply(Codons.ofBitGenes(gt.chromosome())) 099 .generate(cfg) 100 ); 101 } 102 103 /** 104 * Create a mapping codec, similar as in {@link #singleBitChromosomeMapper(Cfg, int, Function)}. 105 * The only difference is that the codons are encoded directly, via an 106 * integer-chromosome, so that no gene split is necessary. 107 * 108 * <pre>{@code 109 * final Cfg<String> cfg = ...; 110 * final Codec<List<Terminal<String>>, IntegerGene> codec = singleIntegerChromosomeMapper( 111 * cfg, 112 * IntRange.of(0, 256), // Value range of chromosomes. 113 * IntRange.of(100), // Length (range) ot the chromosome. 114 * index -> new SentenceGenerator<>(index, 1000) 115 * ); 116 * }</pre> 117 * 118 * @param cfg the encoding grammar 119 * @param range the value range of the integer genes 120 * @param length the length range of the integer-chromosome 121 * @param generator sentence generator function from a given 122 * {@link SymbolIndex} 123 * @param <T> the terminal token type of the grammar 124 * @param <R> the result type of the mapper 125 * @return a new mapping codec for the given {@code cfg} 126 */ 127 public static <T, R> Codec<R, IntegerGene> 128 singleIntegerChromosomeMapper( 129 final Cfg<? extends T> cfg, 130 final IntRange range, 131 final IntRange length, 132 final Function<? super SymbolIndex, ? extends Generator<T, R>> generator 133 ) { 134 return Codec.of( 135 Genotype.of(IntegerChromosome.of(range, length)), 136 gt -> generator 137 .apply(Codons.ofIntegerGenes(gt.chromosome())) 138 .generate(cfg) 139 ); 140 } 141 142 /** 143 * Create a mapping codec, similar as in {@link #singleBitChromosomeMapper(Cfg, int, Function)}. 144 * The only difference is that the codons are encoded directly, via an 145 * integer-chromosome, so that no gene split is necessary. 146 * 147 * <pre>{@code 148 * final Cfg<String> cfg = ...; 149 * final Codec<List<Terminal<String>>, IntegerGene> codec = singleIntegerChromosomeMapper( 150 * cfg, 151 * IntRange.of(0, 256), // Value range of chromosomes. 152 * 100, // Length (range) ot the chromosome. 153 * index -> new SentenceGenerator<>(index, 1000) 154 * ); 155 * }</pre> 156 * 157 * @param cfg the encoding grammar 158 * @param range the value range of the integer genes 159 * @param length the length range of the integer-chromosome 160 * @param generator sentence generator function from a given 161 * {@link SymbolIndex} 162 * @param <T> the terminal token type of the grammar 163 * @param <R> the result type of the mapper 164 * @return a new mapping codec for the given {@code cfg} 165 */ 166 public static <T, R> Codec<R, IntegerGene> 167 singleIntegerChromosomeMapper( 168 final Cfg<? extends T> cfg, 169 final IntRange range, 170 final int length, 171 final Function<? super SymbolIndex, ? extends Generator<T, R>> generator 172 ) { 173 return singleIntegerChromosomeMapper( 174 cfg, range, IntRange.of(length), generator 175 ); 176 } 177 178 /** 179 * Codec for creating <em>results</em> from a given grammar. The creation of 180 * the grammar result is controlled by a given genotype. This encoding uses 181 * separate <em>codons</em>, backed up by a {@link IntegerChromosome}, for 182 * every rule. The length of the chromosome is defined as a function of the 183 * encoded rules. This means that the following CFG, 184 * 185 * <pre>{@code 186 * (0) (1) 187 * (0) <expr> ::= (<expr><op><expr>) | <var> 188 * (0) (1) (2) (3) 189 * (1) <op> ::= + | - | * | / 190 * (0) (1) (2) (3) (4) 191 * (2) <var> ::= x | 1 | 2 | 3 | 4 192 * }</pre> 193 * 194 * will be represented by the following {@link Genotype} 195 * <pre>{@code 196 * Genotype.of( 197 * IntegerChromosome.of(IntRange.of(0, 2), length.apply(cfg.rules().get(0))), 198 * IntegerChromosome.of(IntRange.of(0, 4), length.apply(cfg.rules().get(1))), 199 * IntegerChromosome.of(IntRange.of(0, 5), length.apply(cfg.rules().get(2))) 200 * ) 201 * }</pre> 202 * 203 * The {@code length} function lets you defining the number of codons as 204 * function of the rule the chromosome is encoding. 205 * 206 * <pre>{@code 207 * final Cfg<String> cfg = Bnf.parse(...); 208 * final Codec<List<Terminal<String>>, IntegerGene> codec = multiIntegerChromosomeMapper( 209 * cfg, 210 * // The chromosome length is 25 times the 211 * // number of rule alternatives. 212 * rule -> IntRange.of(rule.alternatives().size()*25), 213 * // Using the standard sentence generator 214 * // with a maximal sentence length of 500. 215 * index -> new SentenceGenerator<>(index, 500) 216 * ); 217 * }</pre> 218 * 219 * @param cfg the encoding grammar 220 * @param length the length of the chromosome which is used for selecting 221 * rules and symbols. The input parameter for this function is the 222 * actual rule. This way it is possible to define the chromosome 223 * length dependent on the selectable alternatives. 224 * @param generator sentence generator function from a given 225 * {@link SymbolIndex} 226 * @param <T> the terminal token type of the grammar 227 * @param <R> the result type of the mapper 228 * @return a new mapping codec for the given {@code cfg} 229 */ 230 public static <T, R> Codec<R, IntegerGene> 231 multiIntegerChromosomeMapper( 232 final Cfg<? extends T> cfg, 233 final Function<? super Rule<?>, IntRange> length, 234 final Function<? super SymbolIndex, ? extends Generator<T, R>> generator 235 ) { 236 return new MultiIntegerChromosomeMapper<>(cfg, length, generator); 237 } 238 239 240}