001/*
002 * Java Genetic Algorithm Library (jenetics-7.2.0).
003 * Copyright (c) 2007-2023 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.ext.grammar;
021
022import static java.util.Objects.requireNonNull;
023import static java.util.stream.Collectors.joining;
024import static io.jenetics.ext.grammar.SentenceGenerator.Expansion.LEFT_MOST;
025
026import java.util.ArrayList;
027import java.util.List;
028import java.util.ListIterator;
029
030import io.jenetics.ext.grammar.Cfg.NonTerminal;
031import io.jenetics.ext.grammar.Cfg.Symbol;
032import io.jenetics.ext.grammar.Cfg.Terminal;
033
034/**
035 * Standard implementation of a sentence generator. The generator can generate
036 * sentences by expanding the grammar in a {@link Expansion#LEFT_MOST} or
037 * {@link Expansion#LEFT_TO_RIGHT} order.
038 * <p>
039 * The following code snippet shows how to create a random sentence from a
040 * given grammar:
041 * <pre>{@code
042 * final Cfg<String> cfg = Bnf.parse("""
043 *     <expr> ::= ( <expr> <op> <expr> ) | <num> | <var> |  <fun> ( <arg>, <arg> )
044 *     <fun>  ::= FUN1 | FUN2
045 *     <arg>  ::= <expr> | <var> | <num>
046 *     <op>   ::= + | - | * | /
047 *     <var>  ::= x | y
048 *     <num>  ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
049 *     """
050 * );
051 *
052 * final var random = RandomGenerator.of("L64X256MixRandom");
053 * final var generator = new SentenceGenerator<String>(
054 *     SymbolIndex.of(random),
055 *     1_000
056 * );
057 * final List<Terminal<String>> sentence = generator.generate(cfg);
058 * final String string = sentence.stream()
059 *     .map(Symbol::name)
060 *     .collect(Collectors.joining());
061 *
062 * System.out.println(string);
063 * }</pre>
064 * <em>Some sample output:</em>
065 * <pre>{@code
066 * > ((x-FUN1(5,5))+8)
067 * > (FUN2(y,5)-FUN2(0,x))
068 * > x
069 * > FUN2(x,x)
070 * > 5
071 * > FUN2(y,FUN2((FUN1(5,FUN1(y,2))*9),y))
072 * > ((FUN1(x,5)*9)*(x/(y*FUN2(x,y))))
073 * > (9-(y*(x+x)))
074 * > }</pre>
075 *
076 * @see DerivationTreeGenerator
077 *
078 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
079 * @since 7.1
080 * @version 7.1
081 */
082public final class SentenceGenerator<T>
083        implements Generator<T, List<Terminal<T>>>
084{
085
086        /**
087         * Defines the expansion strategy used when generating the sentences.
088         */
089        public enum Expansion {
090
091                /**
092                 * The symbol replacement always starting from the leftmost nonterminal
093                 * as described in
094                 * <a href="https://www.brinckerhoff.org/tmp/grammatica_evolution_ieee_tec_2001.pdf">Grammatical Evolution</a>.
095                 */
096                LEFT_MOST,
097
098                /**
099                 * The symbol replacement is performed from left to right and is repeated
100                 * until all non-terminal symbols have been expanded.
101                 */
102                LEFT_TO_RIGHT;
103        }
104
105        private final SymbolIndex _index;
106        private final Expansion _expansion;
107        private final int _limit;
108
109        /**
110         * Create a new sentence generator from the given parameters.
111         *
112         * @param index the symbol index function used for generating the sentences
113         * @param expansion the sentence generation strategy to use for generating
114         *        the sentences
115         * @param limit the maximal allowed sentence length. If the generated
116         *        sentence exceeds this length, the generation is interrupted and
117         *        an empty sentence (empty list) is returned.
118         */
119        public SentenceGenerator(
120                final SymbolIndex index,
121                final Expansion expansion,
122                final int limit
123        ) {
124                _index = requireNonNull(index);
125                _expansion = requireNonNull(expansion);
126                _limit = limit;
127        }
128
129        /**
130         * Create a new sentence generator from the given parameters.
131         *
132         * @param index the symbol index function used for generating the sentences
133         * @param limit the maximal allowed sentence length. If the generated
134         *        sentence exceeds this length, the generation is interrupted and
135         *        an empty sentence (empty list) is returned.
136         */
137        public SentenceGenerator(
138                final SymbolIndex index,
139                final int limit
140        ) {
141                this(index, LEFT_MOST, limit);
142        }
143
144        /**
145         * Generates a new sentence from the given grammar, <em>cfg</em>.
146         *
147         * @param cfg the generating grammar
148         * @return a newly created terminal list (sentence), or an empty list if
149         *         the length of the sentence exceeds the defined sentence limit
150         */
151        @Override
152        public List<Terminal<T>> generate(final Cfg<? extends T> cfg) {
153                final var sentence = new ArrayList<Symbol<T>>();
154                generate(Cfg.upcast(cfg), sentence);
155
156                // The 'generate' step guarantees that the list only
157                // contains terminal symbols. So this cast is safe.
158                @SuppressWarnings("unchecked")
159                final var result = (List<Terminal<T>>)(Object)sentence;
160                return List.copyOf(result);
161        }
162
163        void generate(final Cfg<T> cfg, final List<Symbol<T>> symbols) {
164                symbols.add(cfg.start());
165
166                boolean proceed;
167                do {
168                        proceed = false;
169
170
171                        final ListIterator<Symbol<T>> sit = symbols.listIterator();
172                        while (sit.hasNext() &&
173                                (_expansion == Expansion.LEFT_TO_RIGHT || !proceed))
174                        {
175                                if (sit.next() instanceof NonTerminal<T> nt) {
176                                        sit.remove();
177                                        Generator.select(nt, cfg, _index).forEach(sit::add);
178                                        proceed = true;
179                                }
180                        }
181
182                        if (symbols.size() > _limit) {
183                                symbols.clear();
184                                proceed = false;
185                        }
186                } while (proceed);
187        }
188
189        /**
190         * Converts a list of symbols to a string, by concatenating the names of
191         * the given symbols.
192         *
193         * @param sentence the symbol list to covert
194         * @return the converted sentences
195         */
196        public static String toString(final List<? extends Symbol<?>> sentence) {
197                return sentence.stream().map(Symbol::name).collect(joining());
198        }
199
200}