001/*
002 * Java Genetic Algorithm Library (jenetics-7.2.0).
003 * Copyright (c) 2007-2023 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.ext.grammar;
021
022import java.util.function.Function;
023
024import io.jenetics.BitChromosome;
025import io.jenetics.BitGene;
026import io.jenetics.Genotype;
027import io.jenetics.IntegerChromosome;
028import io.jenetics.IntegerGene;
029import io.jenetics.engine.Codec;
030import io.jenetics.util.IntRange;
031
032import io.jenetics.ext.grammar.Cfg.Rule;
033
034/**
035 * This class defines factories for different CFG ↔ Chromosome mappings
036 * (encodings). The classical mapping codec, with a bit-chromosome can be created
037 * in the following way.
038 * <pre>{@code
039 * final Cfg<String> cfg = ...;
040 * final Codec<List<Terminal<String>>, BitGene> codec = singleBitChromosomeMapper(
041 *     cfg,
042 *     1000,
043 *     index -> new SentenceGenerator<>(index, 1000)
044 * );
045 * }</pre>
046 * This codec creates a mapping for the given grammar {@code cfg} and uses
047 * bit-chromosomes with length {@code 1000}. The result of the mapping will be a
048 * list of terminal symbols which has been created by the given
049 * {@link SentenceGenerator}. The sentence generator creates sentences with a
050 * maximal length of {@code 1000}. If no sentence could be created within this
051 * limit, an empty list of terminal symbols is returned.
052 *
053 * @see <a href="https://www.brinckerhoff.org/tmp/grammatica_evolution_ieee_tec_2001.pdf">
054 *       Grammatical Evolution</a>
055 *
056 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
057 * @since 7.1
058 * @version 7.1
059 */
060public final class Mappers {
061        private Mappers() {
062        }
063
064        /**
065         * Return a classic mapping codec. It uses a bit-chromosome for creating the
066         * grammar results. The codons are created by dividing the chromosome in
067         * 8-bit junks, as described in <a href="https://www.brinckerhoff.org/tmp/grammatica_evolution_ieee_tec_2001.pdf">
068         * Grammatical Evolution</a> by Michael O’Neill and Conor Ryan.
069         *
070         * <pre>{@code
071         * final Cfg<String> cfg = ...;
072         * final Codec<List<Terminal<String>>, BitGene> codec = singleBitChromosomeMapper(
073         *     cfg,
074         *     1000,
075         *     index -> new SentenceGenerator<>(index, 1000)
076         * );
077         * }</pre>
078         *
079         * @see #singleIntegerChromosomeMapper(Cfg, IntRange, IntRange, Function)
080         *
081         * @param cfg the encoding grammar
082         * @param length the length of the bit-chromosome
083         * @param generator sentence generator function from a given
084         *        {@link SymbolIndex}
085         * @param <T> the terminal token type of the grammar
086         * @param <R> the result type of the mapper
087         * @return a new mapping codec for the given {@code cfg}
088         */
089        public static <T, R> Codec<R, BitGene>
090        singleBitChromosomeMapper(
091                final Cfg<? extends T> cfg,
092                final int length,
093                final Function<? super SymbolIndex, ? extends Generator<T, R>> generator
094        ) {
095                return Codec.of(
096                        Genotype.of(BitChromosome.of(length)),
097                        gt -> generator
098                                .apply(Codons.ofBitGenes(gt.chromosome()))
099                                .generate(cfg)
100                );
101        }
102
103        /**
104         * Create a mapping codec, similar as in {@link #singleBitChromosomeMapper(Cfg, int, Function)}.
105         * The only difference is that the codons are encoded directly, via an
106         * integer-chromosome, so that no gene split is necessary.
107         *
108         * <pre>{@code
109         * final Cfg<String> cfg = ...;
110         * final Codec<List<Terminal<String>>, IntegerGene> codec = singleIntegerChromosomeMapper(
111         *     cfg,
112         *     IntRange.of(0, 256), // Value range of chromosomes.
113         *     IntRange.of(100),   // Length (range) ot the chromosome.
114         *     index -> new SentenceGenerator<>(index, 1000)
115         * );
116         * }</pre>
117         *
118         * @param cfg the encoding grammar
119         * @param range the value range of the integer genes
120         * @param length the length range of the integer-chromosome
121         * @param generator sentence generator function from a given
122         *        {@link SymbolIndex}
123         * @param <T> the terminal token type of the grammar
124         * @param <R> the result type of the mapper
125         * @return a new mapping codec for the given {@code cfg}
126         */
127        public static <T, R> Codec<R, IntegerGene>
128        singleIntegerChromosomeMapper(
129                final Cfg<? extends T> cfg,
130                final IntRange range,
131                final IntRange length,
132                final Function<? super SymbolIndex, ? extends Generator<T, R>> generator
133        ) {
134                return Codec.of(
135                        Genotype.of(IntegerChromosome.of(range, length)),
136                        gt -> generator
137                                .apply(Codons.ofIntegerGenes(gt.chromosome()))
138                                .generate(cfg)
139                );
140        }
141
142        /**
143         * Create a mapping codec, similar as in {@link #singleBitChromosomeMapper(Cfg, int, Function)}.
144         * The only difference is that the codons are encoded directly, via an
145         * integer-chromosome, so that no gene split is necessary.
146         *
147         * <pre>{@code
148         * final Cfg<String> cfg = ...;
149         * final Codec<List<Terminal<String>>, IntegerGene> codec = singleIntegerChromosomeMapper(
150         *     cfg,
151         *     IntRange.of(0, 256), // Value range of chromosomes.
152         *     100,                 // Length (range) ot the chromosome.
153         *     index -> new SentenceGenerator<>(index, 1000)
154         * );
155         * }</pre>
156         *
157         * @param cfg the encoding grammar
158         * @param range the value range of the integer genes
159         * @param length the length range of the integer-chromosome
160         * @param generator sentence generator function from a given
161         *        {@link SymbolIndex}
162         * @param <T> the terminal token type of the grammar
163         * @param <R> the result type of the mapper
164         * @return a new mapping codec for the given {@code cfg}
165         */
166        public static <T, R> Codec<R, IntegerGene>
167        singleIntegerChromosomeMapper(
168                final Cfg<? extends T> cfg,
169                final IntRange range,
170                final int length,
171                final Function<? super SymbolIndex, ? extends Generator<T, R>> generator
172        ) {
173                return singleIntegerChromosomeMapper(
174                        cfg, range, IntRange.of(length), generator
175                );
176        }
177
178        /**
179         * Codec for creating <em>results</em> from a given grammar. The creation of
180         * the grammar result is controlled by a given genotype. This encoding uses
181         * separate <em>codons</em>, backed up by a {@link IntegerChromosome}, for
182         * every rule. The length of the chromosome is defined as a function of the
183         * encoded rules. This means that the following CFG,
184         *
185         * <pre>{@code
186         *                       (0)            (1)
187         * (0) <expr> ::= (<expr><op><expr>) | <var>
188         *               (0) (1) (2) (3)
189         * (1) <op>   ::= + | - | * | /
190         *               (0) (1) (2) (3) (4)
191         * (2) <var>  ::= x | 1 | 2 | 3 | 4
192         * }</pre>
193         *
194         * will be represented by the following {@link Genotype}
195         * <pre>{@code
196         * Genotype.of(
197         *     IntegerChromosome.of(IntRange.of(0, 2), length.apply(cfg.rules().get(0))),
198         *     IntegerChromosome.of(IntRange.of(0, 4), length.apply(cfg.rules().get(1))),
199         *     IntegerChromosome.of(IntRange.of(0, 5), length.apply(cfg.rules().get(2)))
200         * )
201         * }</pre>
202         *
203         * The {@code length} function lets you defining the number of codons as
204         * function of the rule the chromosome is encoding.
205         *
206         * <pre>{@code
207         * final Cfg<String> cfg = Bnf.parse(...);
208         * final Codec<List<Terminal<String>>, IntegerGene> codec = multiIntegerChromosomeMapper(
209         *     cfg,
210         *     // The chromosome length is 25 times the
211         *     // number of rule alternatives.
212         *     rule -> IntRange.of(rule.alternatives().size()*25),
213         *     // Using the standard sentence generator
214         *     // with a maximal sentence length of 500.
215         *     index -> new SentenceGenerator<>(index, 500)
216         * );
217         * }</pre>
218         *
219         * @param cfg the encoding grammar
220         * @param length the length of the chromosome which is used for selecting
221         *        rules and symbols. The input parameter for this function is the
222         *        actual rule. This way it is possible to define the chromosome
223         *        length dependent on the selectable alternatives.
224         * @param generator sentence generator function from a given
225         *        {@link SymbolIndex}
226         * @param <T> the terminal token type of the grammar
227         * @param <R> the result type of the mapper
228         * @return a new mapping codec for the given {@code cfg}
229         */
230        public static <T, R> Codec<R, IntegerGene>
231        multiIntegerChromosomeMapper(
232                final Cfg<? extends T> cfg,
233                final Function<? super Rule<?>, IntRange> length,
234                final Function<? super SymbolIndex, ? extends Generator<T, R>> generator
235        ) {
236                return new MultiIntegerChromosomeMapper<>(cfg, length, generator);
237        }
238
239
240}