001/*
002 * Java Genetic Algorithm Library (jenetics-8.0.0).
003 * Copyright (c) 2007-2024 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.ext.grammar;
021
022import static java.util.Objects.requireNonNull;
023import static java.util.stream.Collectors.joining;
024import static io.jenetics.ext.grammar.SentenceGenerator.Expansion.LEFT_MOST;
025
026import java.util.ArrayList;
027import java.util.List;
028import java.util.ListIterator;
029
030import io.jenetics.ext.grammar.Cfg.NonTerminal;
031import io.jenetics.ext.grammar.Cfg.Symbol;
032import io.jenetics.ext.grammar.Cfg.Terminal;
033
034/**
035 * Standard implementation of a sentence generator. The generator can generate
036 * sentences by expanding the grammar in a {@link Expansion#LEFT_MOST} or
037 * {@link Expansion#LEFT_TO_RIGHT} order.
038 * <p>
039 * The following code snippet shows how to create a random sentence from a
040 * given grammar:
041 * {@snippet lang="java":
042 * final Cfg<String> cfg = Bnf.parse("""
043 *     <expr> ::= ( <expr> <op> <expr> ) | <num> | <var> |  <fun> ( <arg>, <arg> )
044 *     <fun>  ::= FUN1 | FUN2
045 *     <arg>  ::= <expr> | <var> | <num>
046 *     <op>   ::= + | - | * | /
047 *     <var>  ::= x | y
048 *     <num>  ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
049 *     """
050 * );
051 *
052 * final var random = RandomGenerator.of("L64X256MixRandom");
053 * final var generator = new SentenceGenerator<String>(
054 *     SymbolIndex.of(random),
055 *     1_000
056 * );
057 * final List<Terminal<String>> sentence = generator.generate(cfg);
058 * final String string = sentence.stream()
059 *     .map(Symbol::name)
060 *     .collect(Collectors.joining());
061 *
062 * System.out.println(string);
063 * }
064 * <em>Some sample output:</em>
065 * <pre>{@code
066 * > ((x-FUN1(5,5))+8)
067 * > (FUN2(y,5)-FUN2(0,x))
068 * > x
069 * > FUN2(x,x)
070 * > 5
071 * > FUN2(y,FUN2((FUN1(5,FUN1(y,2))*9),y))
072 * > ((FUN1(x,5)*9)*(x/(y*FUN2(x,y))))
073 * > (9-(y*(x+x)))
074 * >
075 * }</pre>
076 *
077 * @see DerivationTreeGenerator
078 *
079 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
080 * @since 7.1
081 * @version 7.1
082 */
083public final class SentenceGenerator<T>
084        implements Generator<T, List<Terminal<T>>>
085{
086
087        /**
088         * Defines the expansion strategy used when generating the sentences.
089         */
090        public enum Expansion {
091
092                /**
093                 * The symbol replacement always starting from the leftmost nonterminal
094                 * as described in
095                 * <a href="https://www.brinckerhoff.org/tmp/grammatica_evolution_ieee_tec_2001.pdf">Grammatical Evolution</a>.
096                 */
097                LEFT_MOST,
098
099                /**
100                 * The symbol replacement is performed from left to right and is repeated
101                 * until all non-terminal symbols have been expanded.
102                 */
103                LEFT_TO_RIGHT;
104        }
105
106        private final SymbolIndex _index;
107        private final Expansion _expansion;
108        private final int _limit;
109
110        /**
111         * Create a new sentence generator from the given parameters.
112         *
113         * @param index the symbol index function used for generating the sentences
114         * @param expansion the sentence generation strategy to use for generating
115         *        the sentences
116         * @param limit the maximal allowed sentence length. If the generated
117         *        sentence exceeds this length, the generation is interrupted and
118         *        an empty sentence (empty list) is returned.
119         */
120        public SentenceGenerator(
121                final SymbolIndex index,
122                final Expansion expansion,
123                final int limit
124        ) {
125                _index = requireNonNull(index);
126                _expansion = requireNonNull(expansion);
127                _limit = limit;
128        }
129
130        /**
131         * Create a new sentence generator from the given parameters.
132         *
133         * @param index the symbol index function used for generating the sentences
134         * @param limit the maximal allowed sentence length. If the generated
135         *        sentence exceeds this length, the generation is interrupted and
136         *        an empty sentence (empty list) is returned.
137         */
138        public SentenceGenerator(
139                final SymbolIndex index,
140                final int limit
141        ) {
142                this(index, LEFT_MOST, limit);
143        }
144
145        /**
146         * Generates a new sentence from the given grammar, <em>cfg</em>.
147         *
148         * @param cfg the generating grammar
149         * @return a newly created terminal list (sentence), or an empty list if
150         *         the length of the sentence exceeds the defined sentence limit
151         */
152        @Override
153        public List<Terminal<T>> generate(final Cfg<? extends T> cfg) {
154                final var sentence = new ArrayList<Symbol<T>>();
155                generate(Cfg.upcast(cfg), sentence);
156
157                // The 'generate' step guarantees that the list only
158                // contains terminal symbols. So this cast is safe.
159                @SuppressWarnings("unchecked")
160                final var result = (List<Terminal<T>>)(Object)sentence;
161                return List.copyOf(result);
162        }
163
164        void generate(final Cfg<T> cfg, final List<Symbol<T>> symbols) {
165                symbols.add(cfg.start());
166
167                boolean proceed;
168                do {
169                        proceed = false;
170
171
172                        final ListIterator<Symbol<T>> sit = symbols.listIterator();
173                        while (sit.hasNext() &&
174                                (_expansion == Expansion.LEFT_TO_RIGHT || !proceed))
175                        {
176                                if (sit.next() instanceof NonTerminal<T> nt) {
177                                        sit.remove();
178                                        Generator.select(nt, cfg, _index).forEach(sit::add);
179                                        proceed = true;
180                                }
181                        }
182
183                        if (symbols.size() > _limit) {
184                                symbols.clear();
185                                proceed = false;
186                        }
187                } while (proceed);
188        }
189
190        /**
191         * Converts a list of symbols to a string, by concatenating the names of
192         * the given symbols.
193         *
194         * @param sentence the symbol list to covert
195         * @return the converted sentences
196         */
197        public static String toString(final List<? extends Symbol<?>> sentence) {
198                return sentence.stream().map(Symbol::name).collect(joining());
199        }
200
201}