001/*
002 * Java Genetic Algorithm Library (jenetics-8.2.0).
003 * Copyright (c) 2007-2025 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.ext.grammar;
021
022import static java.lang.String.format;
023import static java.util.Objects.requireNonNull;
024
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.HashMap;
028import java.util.HashSet;
029import java.util.List;
030import java.util.Optional;
031import java.util.Set;
032import java.util.function.Consumer;
033import java.util.function.Function;
034import java.util.stream.Collectors;
035import java.util.stream.Stream;
036
037/**
038 * Represents a <em>context-free</em> grammar
039 * (<a href="https://en.wikipedia.org/wiki/Context-free_grammar"><b>CFG</b></a>).
040 * <p>
041 * <b>Formal definition</b>
042 * <p>
043 * A context-free grammar {@code G} is defined by the 4-tuple
044 * {@code G = (N, T, R, S)}, where
045 * <ul>
046 *     <li>{@code N} is a finite set; each element {@code n ∈ N} is called a
047 *     non-terminal ({@link NonTerminal}) character or a variable. Each
048 *     variable represents a different type of phrase or clause in the sentence.
049 *     Variables are also sometimes called syntactic categories. Each variable
050 *     defines a sub-language of the language defined by {@code G}.
051 *     </li>
052 *     <li>{@code T} is a finite set of terminals ({@link Terminal}) disjoint
053 *     from {@code N}, which make up the actual content of the sentence. The set
054 *     of terminals is the alphabet of the language defined by the grammar
055 *     {@code G}.
056 *     </li>
057 *     <li>{@code R} is a finite relation in {@code N × (N ∪ T)∗}, where the
058 *     asterisk represents the <a href="https://en.wikipedia.org/wiki/Kleene_star">
059 *     Kleene star</a> operation. The members of {@code R} are called the
060 *     (rewrite) rules ({@link Rule}) or productions of the grammar.
061 *     </li>
062 *     <li>{@code S} is the start variable (or start symbol), used to represent
063 *     the whole sentence (or program). It must be an element of {@code N}
064 *     ({@link NonTerminal}).
065 *     </li>
066 * </ul>
067 *
068 * <b>Creating <em>Cfg</em> object from a given BNF grammar</b>
069 * {@snippet class="Snippets" region="parseBnf"}
070 * <p>
071 * <b>Creating <em>Cfg</em> programmatically</b>
072 * {@snippet class="Snippets" region="cfgWithoutBuilder"}
073 * <p>
074 * <b>Creating <em>Cfg</em> programmatically with builders</b>
075 * <p>
076 * Using the CFG builder makes it easier to define annotation for symbols without
077 * pushing the Java generics to its edges.
078 * {@snippet class="Snippets" region="cfgWithBuilder"}
079 * <p>
080 * Annotating CFG elements can be used to influence the {@link Generator} classes,
081 * which creates <em>sentences</em> from a given grammar.
082 *
083 * @see Bnf#parse(String)
084 *
085 * @param <T> the terminal symbol value type
086 *
087 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
088 * @since 7.1
089 * @version 8.2
090 */
091public final class Cfg<T> {
092
093        /* *************************************************************************
094         * Inner classes and interfaces.
095         * ************************************************************************/
096
097        /**
098         * Interface for annotatable CFG types.
099         *
100         * @since 8.2
101         * @version 8.2
102         *
103         * @param <T> the terminal symbol value type
104         */
105        public sealed interface Annotatable<T> {
106                /**
107                 * Return the element annotation, might be {@code null}.
108                 *
109                 * @return the annotation of the element, or {@code null} if the element
110                 *         has none
111                 */
112                Object annotation();
113
114                /**
115                 * Create a new copy of the CFG element, with the given
116                 * {@code annotation}.
117                 *
118                 * @param annotation the annotation of the newly created element,
119                 *        may be {@code null}
120                 * @return a copy of the element with the given {@code annotation}
121                 */
122                Annotatable<T> at(Object annotation);
123        }
124
125        /**
126         * Represents an element of a CFG
127         *
128         * @since 8.2
129         * @version 8.2
130         *
131         * @param <T> the terminal symbol value type
132         */
133        public sealed interface Element<T> extends Annotatable<T> {
134        }
135
136        /**
137         * Represents the <em>symbols</em> the BNF grammar consists.
138         *
139         * @param <T> the terminal symbol value type
140         */
141        public sealed interface Symbol<T> extends Element<T> {
142
143                /**
144                 * Return the name of the symbol.
145                 *
146                 * @return the name of the symbol
147                 */
148                String name();
149
150                @Override
151                Symbol<T> at(Object annotation);
152
153        }
154
155        /**
156         * Represents the non-terminal symbols of the grammar ({@code NT}).
157         *
158         * @param <T> the terminal symbol value type
159         */
160        public record NonTerminal<T>(String name, Object annotation)
161                implements Symbol<T>
162        {
163
164                /**
165                 * @param name the name of the non-terminal symbol
166                 * @throws IllegalArgumentException if the given {@code name} is not
167                 *         a valid <em>non-terminal</em> name
168                 * @throws NullPointerException if one of the arguments is {@code null}
169                 */
170                public NonTerminal {
171                        if (name.isEmpty()) {
172                                throw new IllegalArgumentException(
173                                        "Non-terminal value must not be empty."
174                                );
175                        }
176                }
177
178                public NonTerminal(final String name) {
179                        this(name, null);
180                }
181
182                @Override
183                public NonTerminal<T> at(final Object annotation) {
184                        return annotation == this.annotation
185                                ? this
186                                : new NonTerminal<>(name, annotation);
187                }
188
189        }
190
191        /**
192         * Represents a terminal symbols of the grammar ({@code T}).
193         *
194         * @param <T> the terminal symbol value type
195         */
196        public record Terminal<T>(String name, T value, Object annotation)
197                implements Symbol<T>
198        {
199
200                /**
201                 * @param name the name of the terminal symbol
202                 * @param value the value of the terminal symbol
203                 * @throws IllegalArgumentException if the given terminal {@code name}
204                 *         is empty
205                 */
206                public Terminal {
207                        if (name.isEmpty()) {
208                                throw new IllegalArgumentException(
209                                        "Terminal value must not be empty."
210                                );
211                        }
212                }
213
214                public Terminal(final String name, final T value) {
215                        this(name, value, null);
216                }
217
218                @Override
219                public Terminal<T> at(final Object annotation) {
220                        return annotation == this.annotation
221                                ? this
222                                : new Terminal<>(name, value, annotation);
223                }
224
225                /**
226                 * Return a new terminal symbol where the name of the symbol is equal
227                 * to its value.
228                 *
229                 * @param name the name (and value) of the terminal symbol
230                 * @return a new terminal symbol with the given {@code name}
231                 * @throws IllegalArgumentException if the given terminal {@code name}
232                 *         is empty
233                 */
234                public static Terminal<String> of(final String name) {
235                        return new Terminal<>(name, name);
236                }
237
238        }
239
240        /**
241         * Represents one <em>expression</em> (list of alternative symbols) a
242         * production rule consists of.
243         *
244         * @param <T> the terminal symbol value type
245         */
246        public record Expression<T>(List<Symbol<T>> symbols, Object annotation)
247                implements Element<T>
248        {
249
250                /**
251                 * @param symbols the expression symbols
252                 * @throws IllegalArgumentException if the list of {@code symbols} is
253                 *         empty
254                 */
255                public Expression {
256                        if (symbols.isEmpty()) {
257                                throw new IllegalArgumentException(
258                                        "The list of symbols must not be empty."
259                                );
260                        }
261                        symbols = List.copyOf(symbols);
262                }
263
264                public Expression(final List<Symbol<T>> symbols) {
265                        this(symbols, null);
266                }
267
268                @Override
269                public Expression<T> at(final Object annotation) {
270                        return annotation == this.annotation
271                                ? this
272                                : new Expression<>(symbols, annotation);
273                }
274
275                /**
276                 * Builder class for building rule expressions.
277                 *
278                 * @since 8.2
279                 *
280                 * @param <T> the terminal symbol value type
281                 */
282                public static final class Builder<T> {
283                        private final List<Symbol<T>> symbols = new ArrayList<>();
284                        private Object annotation;
285
286                        private Builder() {
287                        }
288
289                        /**
290                         * Add the given {@code symbol} to the expression.
291                         *
292                         * @param symbol the expression symbol
293                         * @return {@code this} builder for method chaining
294                         */
295                        public Builder<T> add(final Symbol<T> symbol) {
296                                symbols.add(requireNonNull(symbol));
297                                return this;
298                        }
299
300                        /**
301                         * Add a non-terminal symbol to the expression.
302                         *
303                         * @param name the symbol name
304                         * @param annotation the symbol annotation
305                         * @return {@code this} builder for method chaining
306                         */
307                        public Builder<T> N(final String name, final Object annotation) {
308                                return add(new NonTerminal<>(name, annotation));
309                        }
310
311                        /**
312                         * Add a non-terminal symbol to the expression.
313                         *
314                         * @param name the symbol name
315                         * @return {@code this} builder for method chaining
316                         */
317                        public Builder<T> N(final String name) {
318                                return N(name, null);
319                        }
320
321                        /**
322                         * Add a non-terminal symbol to the expression.
323                         *
324                         * @param name the symbol name
325                         * @param value the symbol value
326                         * @param annotation the symbol annotation
327                         * @return {@code this} builder for method chaining
328                         */
329                        public Builder<T>
330                        T(final String name, final T value, final Object annotation) {
331                                return add(new Terminal<>(name, value, annotation));
332                        }
333
334                        /**
335                         * Add a non-terminal symbol to the expression.
336                         *
337                         * @param name the symbol name
338                         * @param value the symbol value
339                         * @return {@code this} builder for method chaining
340                         */
341                        public Builder<T> T(final String name, final T value) {
342                                return add(new Terminal<>(name, value, null));
343                        }
344
345                        /**
346                         * Add a non-terminal symbol to the expression.
347                         *
348                         * @param name the symbol name
349                         * @throws ClassCastException if {@code T} is not a string
350                         * @return {@code this} builder for method chaining
351                         */
352                        public Builder<T> T(final String name) {
353                                @SuppressWarnings("unchecked")
354                                final T value = (T)name;
355                                return add(new Terminal<>(name, value, null));
356                        }
357
358                        /**
359                         * Set the expression annotation.
360                         *
361                         * @param annotation the expression annotation
362                         *  @return {@code this} builder for method chaining
363                         */
364                        public Builder<T> at(final Object annotation) {
365                                this.annotation = annotation;
366                                return this;
367                        }
368
369                        /**
370                         * Create a new rule object.
371                         *
372                         * @return a new rule object
373                         */
374                        public Expression<T> build() {
375                                return new Expression<>(symbols, annotation);
376                        }
377
378                }
379
380        }
381
382        /**
383         * Represents a production rule of the grammar ({@code R}).
384         *
385         * @param <T> the terminal symbol value type
386         */
387        public record Rule<T>(
388                NonTerminal<T> start,
389                List<Expression<T>> alternatives,
390                Object annotation
391        )
392                implements Annotatable<T>
393        {
394
395                /**
396                 * Creates a new rule object.
397                 *
398                 * @param start the start symbol of the rule
399                 * @param alternatives the list of alternative rule expressions
400                 * @throws IllegalArgumentException if the given list of
401                 *         {@code alternatives} is empty
402                 * @throws NullPointerException if one of the arguments is {@code null}
403                 */
404                public Rule {
405                        requireNonNull(start);
406                        if (alternatives.isEmpty()) {
407                                throw new IllegalArgumentException(
408                                        "Rule alternatives must not be empty."
409                                );
410                        }
411                        alternatives = List.copyOf(alternatives);
412                }
413
414                public Rule(NonTerminal<T> start, List<Expression<T>> alternatives) {
415                        this(start, alternatives, null);
416                }
417
418                @Override
419                public Rule<T> at(final Object annotation) {
420                        return annotation == this.annotation
421                                ? this
422                                : new Rule<>(start, alternatives, annotation);
423                }
424
425                /**
426                 * Builder class for building CFG rules.
427                 *
428                 * @since 8.2
429                 *
430                 * @param <T> the terminal symbol value type
431                 */
432                public static final class Builder<T> {
433                        private final NonTerminal<T> start;
434                        private final List<Expression<T>> expressions = new ArrayList<>();
435                        private Object annotation;
436
437                        private Builder(final NonTerminal<T> start) {
438                                this.start = requireNonNull(start);
439                        }
440
441                        /**
442                         * Builder method for creating a rule expression.
443                         *
444                         * @param builder the expression builder
445                         * @return {@code this} builder for method chaining
446                         */
447                        public Builder<T>
448                        E(final Consumer<? super Expression.Builder<T>> builder) {
449                                final var eb = new Expression.Builder<T>();
450                                builder.accept(eb);
451                                expressions.add(eb.build());
452
453                                return this;
454                        }
455
456                        /**
457                         * Builder method for creating an expression with the given
458                         * {@code symbols}.
459                         *
460                         * @param symbols the expression symbols
461                         * @throws IllegalArgumentException if the list of {@code symbols}
462                         *         is empty
463                         * @return {@code this} builder for method chaining
464                         */
465                        @SafeVarargs
466                        public final Builder<T> E(final Symbol<T>... symbols) {
467                                expressions.add(Cfg.E(symbols));
468                                return this;
469                        }
470
471                        /**
472                         * Add an expression which consists solely of the given non-terminal
473                         * symbol.
474                         *
475                         * @param name the symbol name
476                         * @param annotation the symbol annotation
477                         * @return {@code this} builder for method chaining
478                         */
479                        public Builder<T> N(final String name, final Object annotation) {
480                                return E(new NonTerminal<>(name, annotation));
481                        }
482
483                        /**
484                         * Add an expression which consists solely of the given non-terminal
485                         * symbol.
486                         *
487                         * @param name the symbol name
488                         * @return {@code this} builder for method chaining
489                         */
490                        public Builder<T> N(final String name) {
491                                return N(name, null);
492                        }
493
494                        /**
495                         * Add an expression which consists solely of the given terminal
496                         * symbol.
497                         *
498                         * @param name the symbol name
499                         * @param value the symbol value
500                         * @param annotation the symbol annotation
501                         * @return {@code this} builder for method chaining
502                         */
503                        public Builder<T> T(
504                                final String name,
505                                final T value,
506                                final Object annotation
507                        ) {
508                                return E(Cfg.T(name, value, annotation));
509                        }
510
511                        /**
512                         * Add an expression which consists solely of the given terminal
513                         * symbol.
514                         *
515                         * @param name the symbol name
516                         * @param value the symbol value
517                         * @return {@code this} builder for method chaining
518                         */
519                        public Builder<T> T(final String name, final T value) {
520                                return E(Cfg.T(name, value));
521                        }
522
523                        /**
524                         * Add an expression which consists solely of the given terminal
525                         * symbol.
526                         *
527                         * @param name the symbol name
528                         * @return {@code this} builder for method chaining
529                         */
530                        public Builder<T> T(final T name) {
531                                return E(Cfg.T(name.toString(), name));
532                        }
533
534                        /**
535                         * Sets the rule annotation.
536                         *
537                         * @param annotation the rule annotation
538                         * @return {@code this} builder for method chaining
539                         */
540                        public Builder<T> at(Object annotation) {
541                                this.annotation = annotation;
542                                return this;
543                        }
544
545                        /**
546                         * Create a new rule object.
547                         *
548                         * @return a new rule object
549                         */
550                        public Rule<T> build() {
551                                return new Rule<>(start, expressions, annotation);
552                        }
553
554                }
555
556        }
557
558        /* *************************************************************************
559         * CFG implementation.
560         * ************************************************************************/
561
562        private final List<NonTerminal<T>> nonTerminals;
563        private final List<Terminal<T>> terminals;
564        private final List<Rule<T>> rules;
565        private final NonTerminal<T> start;
566
567        /**
568         * Create a new <em>context-free</em> grammar object.
569         *
570         * @param nonTerminals the non-terminal symbols of {@code this} grammar
571         * @param terminals the terminal symbols of {@code this} grammar
572         * @param rules the <em>production</em> rules of {@code this} grammar
573         * @param start the start symbol of {@code this} grammar
574         * @throws NullPointerException if one of the arguments is {@code null}
575         * @throws IllegalArgumentException if a rule is defined more than once, the
576         *         start symbol points to a missing rule or the rules uses symbols
577         *         not defined in the list of {@link #nonTerminals()} or
578         *         {@link #terminals()}
579         * @deprecated This constructor will be removed, use {@link #of(Rule[])} or
580         *             {@link #of(List)} instead.
581         */
582        @Deprecated(forRemoval = true, since = "8.2")
583        public Cfg(
584                List<NonTerminal<T>> nonTerminals,
585                List<Terminal<T>> terminals,
586                List<Rule<T>> rules,
587                NonTerminal<T> start
588        ) {
589                if (rules.isEmpty()) {
590                        throw new IllegalArgumentException(
591                                "The given list of rules must not be empty."
592                        );
593                }
594
595                // Check the uniqueness of the rules.
596                final var duplicatedRules = rules.stream()
597                        .collect(Collectors.groupingBy(Rule::start))
598                        .values().stream()
599                        .filter(values -> values.size() > 1)
600                        .map(rule -> rule.getFirst().start.name)
601                        .toList();
602
603                if (!duplicatedRules.isEmpty()) {
604                        throw new IllegalArgumentException(
605                                "Found duplicate rule: %s.".formatted(duplicatedRules)
606                        );
607                }
608
609                // Check if start symbol points to an existing rule.
610                final var startRule = rules.stream()
611                        .filter(r -> start.equals(r.start))
612                        .findFirst();
613                if (startRule.isEmpty()) {
614                        throw new IllegalArgumentException(
615                                "No rule found for start symbol %s.".formatted(start)
616                        );
617                }
618
619                // Check that all symbols used in the given rules are also defined
620                // in the list of non-terminals and terminals.
621                final Set<Symbol<T>> required = rules.stream()
622                        .flatMap(Cfg::ruleSymbols)
623                        .map(s -> s.at(null))
624                        .collect(Collectors.toUnmodifiableSet());
625
626                final Set<Symbol<T>> available = Stream
627                        .concat(nonTerminals.stream(), terminals.stream())
628                        .map(s -> s.at(null))
629                        .collect(Collectors.toUnmodifiableSet());
630
631                final var missing = new HashSet<>(required);
632                missing.removeAll(available);
633
634                if (!missing.isEmpty()) {
635                        throw new IllegalArgumentException(
636                                "Unknown symbols defined in rules: " + missing
637                        );
638                }
639
640                // Check if the name of terminals and non-terminals are distinct.
641                final var terminalNames = terminals.stream()
642                        .map(Symbol::name)
643                        .collect(Collectors.toSet());
644
645                final var nonTerminalNames = nonTerminals.stream()
646                        .map(Symbol::name)
647                        .collect(Collectors.toSet());
648
649                terminalNames.retainAll(nonTerminalNames);
650                if (!terminalNames.isEmpty()) {
651                        throw new IllegalArgumentException(format(
652                                "Terminal and non-terminal symbols with same name: %s",
653                                terminalNames.stream().sorted().toList()
654                        ));
655                }
656
657                this.nonTerminals = List.copyOf(nonTerminals);
658                this.terminals = List.copyOf(terminals);
659                this.rules = List.copyOf(rules);
660                this.start = requireNonNull(start);
661        }
662
663        /**
664         * Return the non-terminal symbols of {@code this} grammar. The returned
665         * symbols have no annotation.
666         *
667         * @return the non-terminal symbols of {@code this} grammar
668         */
669        public List<NonTerminal<T>> nonTerminals() {
670                return nonTerminals;
671        }
672
673        /**
674         * Return the terminal symbols of {@code this} grammar. The returned
675         * symbols have no annotation.
676         *
677         * @return the terminal symbols of {@code this} grammar
678         */
679        public List<Terminal<T>> terminals() {
680                return terminals;
681        }
682
683        /**
684         * Return the rules of {@code this} grammar.
685         *
686         * @return the rules of {@code this} grammar
687         */
688        public List<Rule<T>> rules() {
689                return rules;
690        }
691
692        /**
693         * Return the start symbol of {@code this} grammar.
694         *
695         * @return the start symbol of {@code this} grammar
696         */
697        public NonTerminal<T> start() {
698                return start;
699        }
700
701        /**
702         * Return the rule for the given {@code start} symbol.
703         *
704         * @param start the start symbol of the rule
705         * @return the rule for the given {@code start} symbol
706         * @throws NullPointerException if the given {@code start} symbol is
707         *         {@code null}
708         */
709        public Optional<Rule<T>> rule(final NonTerminal<?> start) {
710                requireNonNull(start);
711                for (var rule : rules) {
712                        if (rule.start().name().equals(start.name())) {
713                                return Optional.of(rule);
714                        }
715                }
716                return Optional.empty();
717        }
718
719        /**
720         * Maps the values of the terminal symbols from type {@code T} to type
721         * {@code A}.
722         *
723         * @param mapper the mapper function
724         * @param <A> the new value type of the terminal symbols
725         * @return the mapped grammar
726         * @throws NullPointerException if the given mapper is {@code null}
727         */
728        public <A> Cfg<A>
729        map(final Function<? super Terminal<? extends T>, ? extends A> mapper) {
730                requireNonNull(mapper);
731
732                final var cache = new HashMap<Terminal<T>, Terminal<A>>();
733                final Function<Terminal<T>, Terminal<A>> mapping = t -> cache
734                        .computeIfAbsent(t, t2 -> new Terminal<>(t2.name(), mapper.apply(t2)));
735
736                @SuppressWarnings("unchecked")
737                final List<Rule<A>> rules = rules().stream()
738                        .map(rule -> new Rule<>(
739                                (NonTerminal<A>)rule.start(),
740                                rule.alternatives().stream()
741                                        .map(expr -> new Expression<>(
742                                                expr.symbols().stream()
743                                                        .map(sym ->
744                                                                sym instanceof Cfg.Terminal<T> t
745                                                                        ? mapping.apply(t) : (Symbol<A>)sym
746                                                        )
747                                                        .toList()
748                                                ))
749                                        .toList()
750                                ))
751                        .toList();
752
753                return Cfg.of(rules);
754        }
755
756        @Override
757        public int hashCode() {
758                return rules.hashCode();
759        }
760
761        @Override
762        public boolean equals(final Object obj) {
763                return obj instanceof Cfg<?> cfg && rules.equals(cfg.rules());
764        }
765
766        @Override
767        public String toString() {
768                return "Cfg[nonTerminals=%s, terminals=%s, rules=%s, start=%s]"
769                        .formatted(nonTerminals, terminals, rules, start);
770        }
771
772
773        /* *************************************************************************
774         * Factory methods
775         * ************************************************************************/
776
777        /**
778         * Create a grammar object with the given rules. Duplicated rules are merged
779         * into one rule. The <em>start</em> symbol of the first rule is chosen as
780         * the start symbol of the created CFG
781         *
782         * @param rules the rules the grammar consists of
783         * @throws IllegalArgumentException if the rule list is empty or a rule is
784         *         defined more than once, the start symbol points to a missing rule
785         *         or the rules uses symbols not defined in the list of
786         *         {@link #nonTerminals()} or {@link #terminals()}
787         * @throws NullPointerException if the list of rules is {@code null}
788         */
789        public static <T> Cfg<T> of(final List<? extends Rule<? extends T>> rules) {
790                @SuppressWarnings("unchecked")
791                final var rules0 = (List<Rule<T>>)List.copyOf(rules);
792
793                // Rules must not be empty.
794                if (rules0.isEmpty()) {
795                        throw new IllegalArgumentException(
796                                "The list of rules must not be empty."
797                        );
798                }
799
800                final List<Symbol<T>> symbols = rules0.stream()
801                        .flatMap(Cfg::ruleSymbols)
802                        .distinct()
803                        .toList();
804
805                final List<NonTerminal<T>> nonTerminals = symbols.stream()
806                        .map(rule -> rule.at(null))
807                        .distinct()
808                        .filter(NonTerminal.class::isInstance)
809                        .map(nt -> (NonTerminal<T>)nt)
810                        .toList();
811
812                final List<Terminal<T>> terminals = symbols.stream()
813                        .map(rule -> rule.at(null))
814                        .distinct()
815                        .filter(Terminal.class::isInstance)
816                        .map(nt -> (Terminal<T>)nt)
817                        .toList();
818
819                final Set<Symbol<T>> allSymbols = new HashSet<>(symbols);
820                allSymbols.addAll(nonTerminals);
821                allSymbols.addAll(terminals);
822
823                return new Cfg<>(
824                        nonTerminals.stream()
825                                .map(s -> (NonTerminal<T>)select(s, allSymbols))
826                                .toList(),
827                        terminals.stream()
828                                .map(s -> (Terminal<T>)select(s, allSymbols))
829                                .toList(),
830                        rules0.stream()
831                                .map(r -> rebuild(r, allSymbols))
832                                .toList(),
833                        (NonTerminal<T>)select(rules0.getFirst().start(), allSymbols)
834                );
835        }
836
837        /**
838         * Create a grammar object with the given rules.
839         *
840         * @param rules the rules the grammar consists of
841         * @throws IllegalArgumentException if the list of rules is empty or there
842         *         are duplicate rules
843         * @throws NullPointerException if the list of rules is {@code null}
844         */
845        @SafeVarargs
846        public static <T> Cfg<T> of(final Rule<? extends T>... rules) {
847                return Cfg.of(List.of(rules));
848        }
849
850        private static <T> Stream<Symbol<T>> ruleSymbols(final Rule<T> rule) {
851                return Stream.concat(
852                        Stream.of(rule.start),
853                        rule.alternatives.stream()
854                                .flatMap(expr -> expr.symbols().stream())
855                );
856        }
857
858        private static <T> Rule<T> rebuild(
859                final Rule<T> rule,
860                final Collection<Symbol<T>> symbols
861        ) {
862                return new Rule<>(
863                        (NonTerminal<T>)select(rule.start, symbols),
864                        rule.alternatives.stream()
865                                .map(e -> rebuild(e, symbols))
866                                .toList()
867                );
868        }
869
870        private static <T> Expression<T>
871        rebuild(final Expression<T> expression, final Collection<Symbol<T>> symbols) {
872                return new Expression<>(
873                        expression.symbols.stream()
874                                .map(s -> select(s, symbols))
875                                .toList()
876                );
877        }
878
879        private static <T> Symbol<T> select(
880                final Symbol<T> symbol,
881                final Collection<Symbol<T>> symbols
882        ) {
883                for (var s : symbols) {
884                        if (s.equals(symbol)) {
885                                return s;
886                        }
887                }
888                throw new AssertionError("Symbol not found: " + symbol);
889        }
890
891        @SuppressWarnings("unchecked")
892        static <A, B extends A> Cfg<A> upcast(final Cfg<B> seq) {
893                return (Cfg<A>)seq;
894        }
895
896
897        /* *************************************************************************
898         * Static factory methods for rule creation: DSL methods.
899         * ************************************************************************/
900
901        /**
902         * Factory method for creating a terminal symbol with the given
903         * {@code name} and {@code value}.
904         *
905         * @param name the name of the terminal symbol
906         * @param value the value of the terminal symbol
907         * @param <T> the terminal symbol value type
908         * @return a new terminal symbol
909         */
910        public static <T> Terminal<T> T(final String name, final T value) {
911                return new Terminal<>(name, value);
912        }
913
914        /**
915         * Factory method for creating a terminal symbol with the given
916         * {@code name} and {@code value}.
917         *
918         * @since 8.2
919         *
920         * @param name the name of the terminal symbol
921         * @param value the value of the terminal symbol
922         * @param annotation the terminal annotation
923         * @param <T> the terminal symbol value type
924         * @return a new terminal symbol
925         */
926        public static <T> Terminal<T> T(
927                final String name,
928                final T value,
929                final Object annotation
930        ) {
931                return new Terminal<>(name, value, annotation);
932        }
933
934        /**
935         * Factory method for creating a terminal symbol with the given
936         * {@code name}.
937         *
938         * @param name the name of the terminal symbol
939         * @return a new terminal symbol
940         */
941        public static Terminal<String> T(final String name) {
942                return new Terminal<>(name, name);
943        }
944
945        /**
946         * Factory method for creating non-terminal symbols.
947         *
948         * @param name the name of the symbol.
949         * @param <T> the terminal symbol value type
950         * @return a new non-terminal symbol
951         */
952        public static <T> NonTerminal<T> N(final String name) {
953                return new NonTerminal<>(name);
954        }
955
956        /**
957         * Factory method for creating non-terminal symbols.
958         *
959         * @since 8.2
960         *
961         * @param name the name of the symbol.
962         * @param annotation the annotation of the symbol
963         * @param <T> the terminal symbol value type
964         * @return a new non-terminal symbol
965         */
966        public static <T> NonTerminal<T> N(
967                final String name,
968                final Object annotation
969        ) {
970                return new NonTerminal<>(name, annotation);
971        }
972
973        /**
974         * Factory method for creating an expression with the given
975         * {@code symbols}.
976         *
977         * @param symbols the expression symbols
978         * @throws IllegalArgumentException if the list of {@code symbols} is
979         *         empty
980         * @param <T> the terminal symbol value type
981         * @return a new expression
982         */
983        @SafeVarargs
984        public static <T> Expression<T> E(final Symbol<T>... symbols) {
985                return new Expression<>(List.of(symbols));
986        }
987
988        /**
989         * Factory method for creating a new rule. The {@code elements} array doesn't
990         * allow {@link Rule} objects.
991         *
992         * @param name the name of start symbol of the rule
993         * @param elements the list of alternative rule expressions
994         * @throws IllegalArgumentException if the given list of
995         *         {@code alternatives} is empty
996         * @throws NullPointerException if one of the arguments is {@code null}
997         * @param <T> the terminal symbol value type
998         * @return a new rule
999         */
1000        @SafeVarargs
1001        public static <T> Rule<T> R(
1002                final String name,
1003                final Element<T>... elements
1004        ) {
1005                return R(new NonTerminal<>(name), elements);
1006        }
1007
1008        /**
1009         * Factory method for creating a new rule. The {@code elements} array doesn't
1010         * allow {@link Rule} objects.
1011         *
1012         * @param name the name of start symbol of the rule
1013         * @param alternatives the list of alternative rule expressions
1014         * @throws IllegalArgumentException if the given list of
1015         *         {@code alternatives} is empty
1016         * @throws NullPointerException if one of the arguments is {@code null}
1017         * @param <T> the terminal symbol value type
1018         * @return a new rule
1019         * @deprecated Will be removed, use {@link #R(String, Element[])} instead
1020         */
1021        @Deprecated(forRemoval = true, since = "8.2")
1022        @SafeVarargs
1023        public static <T> Rule<T> R(
1024                final String name,
1025                final Expression<T>... alternatives
1026        ) {
1027                return R(new NonTerminal<>(name), alternatives);
1028        }
1029
1030        /**
1031         * Factory method for creating a new rule. The {@code elements} array doesn't
1032         * allow {@link Rule} objects.
1033         *
1034         * @since 8.2
1035         *
1036         * @param start the start symbol of the rule
1037         * @param elements the list of alternative rule expressions
1038         * @throws NullPointerException if one of the arguments is {@code null}
1039         * @param <T> the terminal symbol value type
1040         * @return a new rule
1041         */
1042        @SafeVarargs
1043        public static <T> Rule<T> R(
1044                final NonTerminal<T> start,
1045                final Element<T>... elements
1046        ) {
1047                return new Rule<>(
1048                        start,
1049                        Stream.of(elements)
1050                                .map(expression -> switch (expression) {
1051                                        case Expression<T> e -> e;
1052                                        case Symbol<T> s -> new Expression<>(List.of(s));
1053                                })
1054                                .toList()
1055                );
1056        }
1057
1058        /**
1059         * Builder class for building {@link Cfg} objects.
1060         *
1061         * @since 8.2
1062         *
1063         * @param <T> the terminal symbol value type
1064         */
1065        public static final class Builder<T> {
1066                private final List<Rule<T>> rules = new ArrayList<>();
1067
1068                private Builder() {
1069                }
1070
1071                /**
1072                 * Building a new rule for the CFG.
1073                 *
1074                 * @param start the rule start
1075                 * @param builder the rule builder
1076                 * @return {@code this} builder for method chaining
1077                 */
1078                public Builder<T> R(
1079                        final NonTerminal<T> start,
1080                        final Consumer<? super Rule.Builder<T>> builder
1081                ) {
1082                        final var rb = new Rule.Builder<T>(start);
1083                        builder.accept(rb);
1084                        rules.add(rb.build());
1085
1086                        return this;
1087                }
1088
1089                /**
1090                 * Building a new rule for the CFG.
1091                 *
1092                 * @param name the rule name
1093                 * @param builder the rule builder
1094                 * @return {@code this} builder for method chaining
1095                 */
1096                public Builder<T> R(
1097                        final String name,
1098                        final Consumer<? super Rule.Builder<T>> builder
1099                ) {
1100                        return R(new NonTerminal<>(name), builder);
1101                }
1102
1103                /**
1104                 * Creates a new CFG object.
1105                 *
1106                 * @see #of(List)
1107                 *
1108                 * @throws IllegalArgumentException if the rule list is empty or a rule
1109                 *         is defined more than once, the start symbol points to a
1110                 *         missing rule or the rules uses symbols not defined in the
1111                 *         list of {@link #nonTerminals()} or {@link #terminals()}
1112                 * @return a newly created CFG object
1113                 */
1114                public Cfg<T> build() {
1115                        return Cfg.of(rules);
1116                }
1117
1118        }
1119
1120        /**
1121         * Return a new CFG builder.
1122         *
1123         * @since 8.2
1124         *
1125         * @return a new CFG builder
1126         * @param <T> the terminal symbol value type
1127         */
1128        public static <T> Builder<T> builder() {
1129                return new Builder<>();
1130        }
1131
1132}