001/*
002 * Java Genetic Algorithm Library (jenetics-9.0.0).
003 * Copyright (c) 2007-2026 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.ext.grammar;
021
022import static java.lang.String.format;
023import static java.util.Objects.requireNonNull;
024
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.HashMap;
028import java.util.HashSet;
029import java.util.List;
030import java.util.Optional;
031import java.util.Set;
032import java.util.function.Consumer;
033import java.util.function.Function;
034import java.util.stream.Collectors;
035import java.util.stream.Stream;
036
037/**
038 * Represents a <em>context-free</em> grammar
039 * (<a href="https://en.wikipedia.org/wiki/Context-free_grammar"><b>CFG</b></a>).
040 * <p>
041 * <b>Formal definition</b>
042 * <p>
043 * A context-free grammar {@code G} is defined by the 4-tuple
044 * {@code G = (N, T, R, S)}, where
045 * <ul>
046 *     <li>{@code N} is a finite set; each element {@code n ∈ N} is called a
047 *     non-terminal ({@link NonTerminal}) character or a variable. Each
048 *     variable represents a different type of phrase or clause in the sentence.
049 *     Variables are also sometimes called syntactic categories. Each variable
050 *     defines a sub-language of the language defined by {@code G}.
051 *     </li>
052 *     <li>{@code T} is a finite set of terminals ({@link Terminal}) disjoint
053 *     from {@code N}, which make up the actual content of the sentence. The set
054 *     of terminals is the alphabet of the language defined by the grammar
055 *     {@code G}.
056 *     </li>
057 *     <li>{@code R} is a finite relation in {@code N × (N ∪ T)∗}, where the
058 *     asterisk represents the <a href="https://en.wikipedia.org/wiki/Kleene_star">
059 *     Kleene star</a> operation. The members of {@code R} are called the
060 *     (rewrite) rules ({@link Rule}) or productions of the grammar.
061 *     </li>
062 *     <li>{@code S} is the start variable (or start symbol), used to represent
063 *     the whole sentence (or program). It must be an element of {@code N}
064 *     ({@link NonTerminal}).
065 *     </li>
066 * </ul>
067 *
068 * <b>Creating <em>Cfg</em> object from a given BNF grammar</b>
069 * {@snippet class="Snippets" region="parseBnf"}
070 * <p>
071 * <b>Creating <em>Cfg</em> programmatically</b>
072 * {@snippet class="Snippets" region="cfgWithoutBuilder"}
073 * <p>
074 * <b>Creating <em>Cfg</em> programmatically with builders</b>
075 * <p>
076 * Using the CFG builder makes it easier to define annotation for symbols without
077 * pushing the Java generics to its edges.
078 * {@snippet class="Snippets" region="cfgWithBuilder"}
079 * <p>
080 * Annotating CFG elements can be used to influence the {@link Generator} classes,
081 * which creates <em>sentences</em> from a given grammar.
082 *
083 * @see Bnf#parse(CharSequence)
084 *
085 * @param <T> the terminal symbol value type
086 *
087 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
088 * @since 7.1
089 * @version 8.2
090 */
091public final class Cfg<T> {
092
093        /* *************************************************************************
094         * Inner classes and interfaces.
095         * ************************************************************************/
096
097        /**
098         * Interface for annotatable CFG types.
099         *
100         * @since 8.2
101         * @version 8.2
102         *
103         * @param <T> the terminal symbol value type
104         */
105        public sealed interface Annotatable<T> {
106                /**
107                 * Return the element annotation, might be {@code null}.
108                 *
109                 * @return the annotation of the element, or {@code null} if the element
110                 *         has none
111                 */
112                Object annotation();
113
114                /**
115                 * Create a new copy of the CFG element, with the given
116                 * {@code annotation}.
117                 *
118                 * @param annotation the annotation of the newly created element,
119                 *        may be {@code null}
120                 * @return a copy of the element with the given {@code annotation}
121                 */
122                Annotatable<T> at(Object annotation);
123        }
124
125        /**
126         * Represents an element of a CFG
127         *
128         * @since 8.2
129         * @version 8.2
130         *
131         * @param <T> the terminal symbol value type
132         */
133        public sealed interface Element<T> extends Annotatable<T> {
134        }
135
136
137        /**
138         * Represents the <em>symbols</em> the BNF grammar consists.
139         *
140         * @param <T> the terminal symbol value type
141         */
142        public sealed interface Symbol<T> extends Element<T> {
143
144                /**
145                 * Return the name of the symbol.
146                 *
147                 * @return the name of the symbol
148                 */
149                String name();
150
151                @Override
152                Symbol<T> at(Object annotation);
153
154        }
155
156        /**
157         * Represents the non-terminal symbols of the grammar ({@code NT}).
158         *
159         * @param <T> the terminal symbol value type
160         */
161        public record NonTerminal<T>(String name, Object annotation)
162                implements Symbol<T>
163        {
164
165                /**
166                 * @param name the name of the non-terminal symbol
167                 * @throws IllegalArgumentException if the given {@code name} is not
168                 *         a valid <em>non-terminal</em> name
169                 * @throws NullPointerException if one of the arguments is {@code null}
170                 */
171                public NonTerminal {
172                        if (name.isEmpty()) {
173                                throw new IllegalArgumentException(
174                                        "Non-terminal value must not be empty."
175                                );
176                        }
177                }
178
179                public NonTerminal(final String name) {
180                        this(name, null);
181                }
182
183                @Override
184                public NonTerminal<T> at(final Object annotation) {
185                        return annotation == this.annotation
186                                ? this
187                                : new NonTerminal<>(name, annotation);
188                }
189
190        }
191
192        /**
193         * Represents a terminal symbols of the grammar ({@code T}).
194         *
195         * @param <T> the terminal symbol value type
196         */
197        public record Terminal<T>(String name, T value, Object annotation)
198                implements Symbol<T>
199        {
200
201                /**
202                 * @param name the name of the terminal symbol
203                 * @param value the value of the terminal symbol
204                 * @throws IllegalArgumentException if the given terminal {@code name}
205                 *         is empty
206                 */
207                public Terminal {
208                        if (name.isEmpty()) {
209                                throw new IllegalArgumentException(
210                                        "Terminal value must not be empty."
211                                );
212                        }
213                }
214
215                public Terminal(final String name, final T value) {
216                        this(name, value, null);
217                }
218
219                @Override
220                public Terminal<T> at(final Object annotation) {
221                        return annotation == this.annotation
222                                ? this
223                                : new Terminal<>(name, value, annotation);
224                }
225
226                /**
227                 * Return a new terminal symbol where the name of the symbol is equal
228                 * to its value.
229                 *
230                 * @param name the name (and value) of the terminal symbol
231                 * @return a new terminal symbol with the given {@code name}
232                 * @throws IllegalArgumentException if the given terminal {@code name}
233                 *         is empty
234                 */
235                public static Terminal<String> of(final String name) {
236                        return new Terminal<>(name, name);
237                }
238
239        }
240
241        /**
242         * Represents one <em>expression</em> (list of alternative symbols) a
243         * production rule consists of.
244         *
245         * @param <T> the terminal symbol value type
246         */
247        public record Expression<T>(List<Symbol<T>> symbols, Object annotation)
248                implements Element<T>
249        {
250
251                /**
252                 * @param symbols the expression symbols
253                 * @throws IllegalArgumentException if the list of {@code symbols} is
254                 *         empty
255                 */
256                public Expression {
257                        if (symbols.isEmpty()) {
258                                throw new IllegalArgumentException(
259                                        "The list of symbols must not be empty."
260                                );
261                        }
262                        symbols = List.copyOf(symbols);
263                }
264
265                public Expression(final List<Symbol<T>> symbols) {
266                        this(symbols, null);
267                }
268
269                @Override
270                public Expression<T> at(final Object annotation) {
271                        return annotation == this.annotation
272                                ? this
273                                : new Expression<>(symbols, annotation);
274                }
275
276                /**
277                 * Builder class for building rule expressions.
278                 *
279                 * @since 8.2
280                 *
281                 * @param <T> the terminal symbol value type
282                 */
283                public static final class Builder<T> {
284                        private final List<Symbol<T>> symbols = new ArrayList<>();
285                        private Object annotation;
286
287                        private Builder() {
288                        }
289
290                        /**
291                         * Add the given {@code symbol} to the expression.
292                         *
293                         * @param symbol the expression symbol
294                         * @return {@code this} builder for method chaining
295                         */
296                        public Builder<T> add(final Symbol<T> symbol) {
297                                symbols.add(requireNonNull(symbol));
298                                return this;
299                        }
300
301                        /**
302                         * Add a non-terminal symbol to the expression.
303                         *
304                         * @param name the symbol name
305                         * @param annotation the symbol annotation
306                         * @return {@code this} builder for method chaining
307                         */
308                        public Builder<T> N(final String name, final Object annotation) {
309                                return add(new NonTerminal<>(name, annotation));
310                        }
311
312                        /**
313                         * Add a non-terminal symbol to the expression.
314                         *
315                         * @param name the symbol name
316                         * @return {@code this} builder for method chaining
317                         */
318                        public Builder<T> N(final String name) {
319                                return N(name, null);
320                        }
321
322                        /**
323                         * Add a non-terminal symbol to the expression.
324                         *
325                         * @param name the symbol name
326                         * @param value the symbol value
327                         * @param annotation the symbol annotation
328                         * @return {@code this} builder for method chaining
329                         */
330                        public Builder<T>
331                        T(final String name, final T value, final Object annotation) {
332                                return add(new Terminal<>(name, value, annotation));
333                        }
334
335                        /**
336                         * Add a non-terminal symbol to the expression.
337                         *
338                         * @param name the symbol name
339                         * @param value the symbol value
340                         * @return {@code this} builder for method chaining
341                         */
342                        public Builder<T> T(final String name, final T value) {
343                                return add(new Terminal<>(name, value, null));
344                        }
345
346                        /**
347                         * Add a non-terminal symbol to the expression.
348                         *
349                         * @param name the symbol name
350                         * @throws ClassCastException if {@code T} is not a string
351                         * @return {@code this} builder for method chaining
352                         */
353                        public Builder<T> T(final String name) {
354                                @SuppressWarnings("unchecked")
355                                final T value = (T)name;
356                                return add(new Terminal<>(name, value, null));
357                        }
358
359                        /**
360                         * Set the expression annotation.
361                         *
362                         * @param annotation the expression annotation
363                         *  @return {@code this} builder for method chaining
364                         */
365                        public Builder<T> at(final Object annotation) {
366                                this.annotation = annotation;
367                                return this;
368                        }
369
370                        /**
371                         * Create a new rule object.
372                         *
373                         * @return a new rule object
374                         */
375                        public Expression<T> build() {
376                                return new Expression<>(symbols, annotation);
377                        }
378
379                }
380
381        }
382
383        /**
384         * Represents a production rule of the grammar ({@code R}).
385         *
386         * @param <T> the terminal symbol value type
387         */
388        public record Rule<T>(
389                NonTerminal<T> start,
390                List<Expression<T>> alternatives,
391                Object annotation
392        )
393                implements Annotatable<T>
394        {
395
396                /**
397                 * Creates a new rule object.
398                 *
399                 * @param start the start symbol of the rule
400                 * @param alternatives the list of alternative rule expressions
401                 * @throws IllegalArgumentException if the given list of
402                 *         {@code alternatives} is empty
403                 * @throws NullPointerException if one of the arguments is {@code null}
404                 */
405                public Rule {
406                        requireNonNull(start);
407                        if (alternatives.isEmpty()) {
408                                throw new IllegalArgumentException(
409                                        "Rule alternatives must not be empty."
410                                );
411                        }
412                        alternatives = List.copyOf(alternatives);
413                }
414
415                public Rule(NonTerminal<T> start, List<Expression<T>> alternatives) {
416                        this(start, alternatives, null);
417                }
418
419                @Override
420                public Rule<T> at(final Object annotation) {
421                        return annotation == this.annotation
422                                ? this
423                                : new Rule<>(start, alternatives, annotation);
424                }
425
426                /**
427                 * Builder class for building CFG rules.
428                 *
429                 * @since 8.2
430                 *
431                 * @param <T> the terminal symbol value type
432                 */
433                public static final class Builder<T> {
434                        private final NonTerminal<T> start;
435                        private final List<Expression<T>> expressions = new ArrayList<>();
436                        private Object annotation;
437
438                        private Builder(final NonTerminal<T> start) {
439                                this.start = requireNonNull(start);
440                        }
441
442                        /**
443                         * Builder method for creating a rule expression.
444                         *
445                         * @param builder the expression builder
446                         * @return {@code this} builder for method chaining
447                         */
448                        public Builder<T>
449                        E(final Consumer<? super Expression.Builder<T>> builder) {
450                                final var eb = new Expression.Builder<T>();
451                                builder.accept(eb);
452                                expressions.add(eb.build());
453
454                                return this;
455                        }
456
457                        /**
458                         * Builder method for creating an expression with the given
459                         * {@code symbols}.
460                         *
461                         * @param symbols the expression symbols
462                         * @throws IllegalArgumentException if the list of {@code symbols}
463                         *         is empty
464                         * @return {@code this} builder for method chaining
465                         */
466                        @SafeVarargs
467                        public final Builder<T> E(final Symbol<T>... symbols) {
468                                expressions.add(Cfg.E(symbols));
469                                return this;
470                        }
471
472                        /**
473                         * Add an expression which consists solely of the given non-terminal
474                         * symbol.
475                         *
476                         * @param name the symbol name
477                         * @param annotation the symbol annotation
478                         * @return {@code this} builder for method chaining
479                         */
480                        public Builder<T> N(final String name, final Object annotation) {
481                                return E(new NonTerminal<>(name, annotation));
482                        }
483
484                        /**
485                         * Add an expression which consists solely of the given non-terminal
486                         * symbol.
487                         *
488                         * @param name the symbol name
489                         * @return {@code this} builder for method chaining
490                         */
491                        public Builder<T> N(final String name) {
492                                return N(name, null);
493                        }
494
495                        /**
496                         * Add an expression which consists solely of the given terminal
497                         * symbol.
498                         *
499                         * @param name the symbol name
500                         * @param value the symbol value
501                         * @param annotation the symbol annotation
502                         * @return {@code this} builder for method chaining
503                         */
504                        public Builder<T> T(
505                                final String name,
506                                final T value,
507                                final Object annotation
508                        ) {
509                                return E(Cfg.T(name, value, annotation));
510                        }
511
512                        /**
513                         * Add an expression which consists solely of the given terminal
514                         * symbol.
515                         *
516                         * @param name the symbol name
517                         * @param value the symbol value
518                         * @return {@code this} builder for method chaining
519                         */
520                        public Builder<T> T(final String name, final T value) {
521                                return E(Cfg.T(name, value));
522                        }
523
524                        /**
525                         * Add an expression which consists solely of the given terminal
526                         * symbol.
527                         *
528                         * @param name the symbol name
529                         * @return {@code this} builder for method chaining
530                         */
531                        public Builder<T> T(final T name) {
532                                return E(Cfg.T(name.toString(), name));
533                        }
534
535                        /**
536                         * Sets the rule annotation.
537                         *
538                         * @param annotation the rule annotation
539                         * @return {@code this} builder for method chaining
540                         */
541                        public Builder<T> at(Object annotation) {
542                                this.annotation = annotation;
543                                return this;
544                        }
545
546                        /**
547                         * Create a new rule object.
548                         *
549                         * @return a new rule object
550                         */
551                        public Rule<T> build() {
552                                return new Rule<>(start, expressions, annotation);
553                        }
554
555                }
556
557        }
558
559        /* *************************************************************************
560         * CFG implementation.
561         * ************************************************************************/
562
563        private final List<NonTerminal<T>> nonTerminals;
564        private final List<Terminal<T>> terminals;
565        private final List<Rule<T>> rules;
566        private final NonTerminal<T> start;
567
568        /**
569         * Create a new <em>context-free</em> grammar object.
570         *
571         * @param nonTerminals the non-terminal symbols of {@code this} grammar
572         * @param terminals the terminal symbols of {@code this} grammar
573         * @param rules the <em>production</em> rules of {@code this} grammar
574         * @param start the start symbol of {@code this} grammar
575         * @throws NullPointerException if one of the arguments is {@code null}
576         * @throws IllegalArgumentException if a rule is defined more than once, the
577         *         start symbol points to a missing rule or the rules uses symbols
578         *         not defined in the list of {@link #nonTerminals()} or
579         *         {@link #terminals()}
580         */
581        Cfg(
582                List<NonTerminal<T>> nonTerminals,
583                List<Terminal<T>> terminals,
584                List<Rule<T>> rules,
585                NonTerminal<T> start
586        ) {
587                if (rules.isEmpty()) {
588                        throw new IllegalArgumentException(
589                                "The given list of rules must not be empty."
590                        );
591                }
592
593                // Check the uniqueness of the rules.
594                final var duplicatedRules = rules.stream()
595                        .collect(Collectors.groupingBy(Rule::start))
596                        .values().stream()
597                        .filter(values -> values.size() > 1)
598                        .map(rule -> rule.getFirst().start.name)
599                        .toList();
600
601                if (!duplicatedRules.isEmpty()) {
602                        throw new IllegalArgumentException(
603                                "Found duplicate rule: %s.".formatted(duplicatedRules)
604                        );
605                }
606
607                // Check if start symbol points to an existing rule.
608                final var startRule = rules.stream()
609                        .filter(r -> start.equals(r.start))
610                        .findFirst();
611                if (startRule.isEmpty()) {
612                        throw new IllegalArgumentException(
613                                "No rule found for start symbol %s.".formatted(start)
614                        );
615                }
616
617                // Check that all symbols used in the given rules are also defined
618                // in the list of non-terminals and terminals.
619                final Set<Symbol<T>> required = rules.stream()
620                        .flatMap(Cfg::ruleSymbols)
621                        .map(s -> s.at(null))
622                        .collect(Collectors.toUnmodifiableSet());
623
624                final Set<Symbol<T>> available = Stream
625                        .concat(nonTerminals.stream(), terminals.stream())
626                        .map(s -> s.at(null))
627                        .collect(Collectors.toUnmodifiableSet());
628
629                final var missing = new HashSet<>(required);
630                missing.removeAll(available);
631
632                if (!missing.isEmpty()) {
633                        throw new IllegalArgumentException(
634                                "Unknown symbols defined in rules: " + missing
635                        );
636                }
637
638                // Check if the name of terminals and non-terminals are distinct.
639                final var terminalNames = terminals.stream()
640                        .map(Symbol::name)
641                        .collect(Collectors.toSet());
642
643                final var nonTerminalNames = nonTerminals.stream()
644                        .map(Symbol::name)
645                        .collect(Collectors.toSet());
646
647                terminalNames.retainAll(nonTerminalNames);
648                if (!terminalNames.isEmpty()) {
649                        throw new IllegalArgumentException(format(
650                                "Terminal and non-terminal symbols with same name: %s",
651                                terminalNames.stream().sorted().toList()
652                        ));
653                }
654
655                this.nonTerminals = List.copyOf(nonTerminals);
656                this.terminals = List.copyOf(terminals);
657                this.rules = List.copyOf(rules);
658                this.start = requireNonNull(start);
659        }
660
661        /**
662         * Return the non-terminal symbols of {@code this} grammar. The returned
663         * symbols have no annotation.
664         *
665         * @return the non-terminal symbols of {@code this} grammar
666         */
667        public List<NonTerminal<T>> nonTerminals() {
668                return nonTerminals;
669        }
670
671        /**
672         * Return the terminal symbols of {@code this} grammar. The returned
673         * symbols have no annotation.
674         *
675         * @return the terminal symbols of {@code this} grammar
676         */
677        public List<Terminal<T>> terminals() {
678                return terminals;
679        }
680
681        /**
682         * Return the rules of {@code this} grammar.
683         *
684         * @return the rules of {@code this} grammar
685         */
686        public List<Rule<T>> rules() {
687                return rules;
688        }
689
690        /**
691         * Return the start symbol of {@code this} grammar.
692         *
693         * @return the start symbol of {@code this} grammar
694         */
695        public NonTerminal<T> start() {
696                return start;
697        }
698
699        /**
700         * Return the rule for the given {@code start} symbol.
701         *
702         * @param start the start symbol of the rule
703         * @return the rule for the given {@code start} symbol
704         * @throws NullPointerException if the given {@code start} symbol is
705         *         {@code null}
706         */
707        public Optional<Rule<T>> rule(final NonTerminal<?> start) {
708                requireNonNull(start);
709                for (var rule : rules) {
710                        if (rule.start().name().equals(start.name())) {
711                                return Optional.of(rule);
712                        }
713                }
714                return Optional.empty();
715        }
716
717        /**
718         * Maps the values of the terminal symbols from type {@code T} to type
719         * {@code A}.
720         *
721         * @param mapper the mapper function
722         * @param <A> the new value type of the terminal symbols
723         * @return the mapped grammar
724         * @throws NullPointerException if the given mapper is {@code null}
725         */
726        public <A> Cfg<A>
727        map(final Function<? super Terminal<? extends T>, ? extends A> mapper) {
728                requireNonNull(mapper);
729
730                final var cache = new HashMap<Terminal<T>, Terminal<A>>();
731                final Function<Terminal<T>, Terminal<A>> mapping = t -> cache
732                        .computeIfAbsent(t, t2 -> new Terminal<>(t2.name(), mapper.apply(t2)));
733
734                return flatMap(mapping);
735        }
736
737        /**
738         * Maps the values of the terminal symbols from type {@code T} to type
739         * {@code A}.
740         *
741         * @since 9.0
742         *
743         * @param mapper the mapper function
744         * @param <A> the new value type of the terminal symbols
745         * @return the mapped grammar
746         * @throws NullPointerException if the given mapper is {@code null}
747         */
748        public <A> Cfg<A> flatMap(
749                final Function<
750                        ? super Terminal<T>,
751                        ? extends Terminal<? extends A>
752                > mapper
753        ) {
754                requireNonNull(mapper);
755
756                final var cache = new HashMap<Terminal<T>, Terminal<A>>();
757
758                @SuppressWarnings("unchecked")
759                final Function<Terminal<T>, Terminal<A>>
760                        fn = (Function<Terminal<T>, Terminal<A>>)mapper;
761
762                final Function<Terminal<T>, Terminal<A>> mapping =
763                        t -> cache.computeIfAbsent(t, fn);
764
765                @SuppressWarnings("unchecked")
766                final List<Rule<A>> rules = rules().stream()
767                        .map(rule -> new Rule<>(
768                                (NonTerminal<A>)rule.start(),
769                                rule.alternatives().stream()
770                                        .map(expr -> new Expression<>(
771                                                expr.symbols().stream()
772                                                        .map(sym ->
773                                                                sym instanceof Cfg.Terminal<T> t
774                                                                        ? mapping.apply(t) : (Symbol<A>)sym
775                                                        )
776                                                        .toList()
777                                                ))
778                                        .toList()
779                                ))
780                        .toList();
781
782                return Cfg.of(rules);
783        }
784
785        @Override
786        public int hashCode() {
787                return rules.hashCode();
788        }
789
790        @Override
791        public boolean equals(final Object obj) {
792                return obj instanceof Cfg<?> cfg && rules.equals(cfg.rules());
793        }
794
795        @Override
796        public String toString() {
797                return "Cfg[nonTerminals=%s, terminals=%s, rules=%s, start=%s]"
798                        .formatted(nonTerminals, terminals, rules, start);
799        }
800
801
802        /* *************************************************************************
803         * Factory methods
804         * ************************************************************************/
805
806        /**
807         * Create a grammar object with the given rules. Duplicated rules are merged
808         * into one rule. The <em>start</em> symbol of the first rule is chosen as
809         * the start symbol of the created CFG
810         *
811         * @param rules the rules the grammar consists of
812         * @throws IllegalArgumentException if the rule list is empty or a rule is
813         *         defined more than once, the start symbol points to a missing rule
814         *         or the rules uses symbols not defined in the list of
815         *         {@link #nonTerminals()} or {@link #terminals()}
816         * @throws NullPointerException if the list of rules is {@code null}
817         */
818        public static <T> Cfg<T> of(final List<? extends Rule<? extends T>> rules) {
819                @SuppressWarnings("unchecked")
820                final var rules0 = (List<Rule<T>>)List.copyOf(rules);
821
822                // Rules must not be empty.
823                if (rules0.isEmpty()) {
824                        throw new IllegalArgumentException(
825                                "The list of rules must not be empty."
826                        );
827                }
828
829                final List<Symbol<T>> symbols = rules0.stream()
830                        .flatMap(Cfg::ruleSymbols)
831                        .distinct()
832                        .toList();
833
834                final List<NonTerminal<T>> nonTerminals = symbols.stream()
835                        .map(rule -> rule.at(null))
836                        .distinct()
837                        .filter(NonTerminal.class::isInstance)
838                        .map(nt -> (NonTerminal<T>)nt)
839                        .toList();
840
841                final List<Terminal<T>> terminals = symbols.stream()
842                        .map(rule -> rule.at(null))
843                        .distinct()
844                        .filter(Terminal.class::isInstance)
845                        .map(nt -> (Terminal<T>)nt)
846                        .toList();
847
848                final Set<Symbol<T>> allSymbols = new HashSet<>(symbols);
849                allSymbols.addAll(nonTerminals);
850                allSymbols.addAll(terminals);
851
852                return new Cfg<>(
853                        nonTerminals.stream()
854                                .map(s -> (NonTerminal<T>)select(s, allSymbols))
855                                .toList(),
856                        terminals.stream()
857                                .map(s -> (Terminal<T>)select(s, allSymbols))
858                                .toList(),
859                        rules0.stream()
860                                .map(r -> rebuild(r, allSymbols))
861                                .toList(),
862                        (NonTerminal<T>)select(rules0.getFirst().start(), allSymbols)
863                );
864        }
865
866        /**
867         * Create a grammar object with the given rules.
868         *
869         * @param rules the rules the grammar consists of
870         * @throws IllegalArgumentException if the list of rules is empty or there
871         *         are duplicate rules
872         * @throws NullPointerException if the list of rules is {@code null}
873         */
874        @SafeVarargs
875        public static <T> Cfg<T> of(final Rule<? extends T>... rules) {
876                return Cfg.of(List.of(rules));
877        }
878
879        private static <T> Stream<Symbol<T>> ruleSymbols(final Rule<T> rule) {
880                return Stream.concat(
881                        Stream.of(rule.start),
882                        rule.alternatives.stream()
883                                .flatMap(expr -> expr.symbols().stream())
884                );
885        }
886
887        private static <T> Rule<T> rebuild(
888                final Rule<T> rule,
889                final Collection<Symbol<T>> symbols
890        ) {
891                return new Rule<>(
892                        (NonTerminal<T>)select(rule.start, symbols),
893                        rule.alternatives.stream()
894                                .map(e -> rebuild(e, symbols))
895                                .toList()
896                );
897        }
898
899        private static <T> Expression<T>
900        rebuild(final Expression<T> expression, final Collection<Symbol<T>> symbols) {
901                return new Expression<>(
902                        expression.symbols.stream()
903                                .map(s -> select(s, symbols))
904                                .toList()
905                );
906        }
907
908        private static <T> Symbol<T> select(
909                final Symbol<T> symbol,
910                final Collection<Symbol<T>> symbols
911        ) {
912                for (var s : symbols) {
913                        if (s.equals(symbol)) {
914                                return s;
915                        }
916                }
917                throw new AssertionError("Symbol not found: " + symbol);
918        }
919
920        @SuppressWarnings("unchecked")
921        static <A, B extends A> Cfg<A> upcast(final Cfg<B> seq) {
922                return (Cfg<A>)seq;
923        }
924
925
926        /* *************************************************************************
927         * Static factory methods for rule creation: DSL methods.
928         * ************************************************************************/
929
930        /**
931         * Factory method for creating a terminal symbol with the given
932         * {@code name} and {@code value}.
933         *
934         * @param name the name of the terminal symbol
935         * @param value the value of the terminal symbol
936         * @param <T> the terminal symbol value type
937         * @return a new terminal symbol
938         */
939        public static <T> Terminal<T> T(final String name, final T value) {
940                return new Terminal<>(name, value);
941        }
942
943        /**
944         * Factory method for creating a terminal symbol with the given
945         * {@code name} and {@code value}.
946         *
947         * @since 8.2
948         *
949         * @param name the name of the terminal symbol
950         * @param value the value of the terminal symbol
951         * @param annotation the terminal annotation
952         * @param <T> the terminal symbol value type
953         * @return a new terminal symbol
954         */
955        public static <T> Terminal<T> T(
956                final String name,
957                final T value,
958                final Object annotation
959        ) {
960                return new Terminal<>(name, value, annotation);
961        }
962
963        /**
964         * Factory method for creating a terminal symbol with the given
965         * {@code name}.
966         *
967         * @since 9.0
968         *
969         * @param value the value of the terminal symbol
970         * @return a new terminal symbol
971         */
972        public static <T> Terminal<T> T(final T value) {
973                return new Terminal<>(String.valueOf(value), value);
974        }
975
976        /**
977         * Factory method for creating non-terminal symbols.
978         *
979         * @param name the name of the symbol.
980         * @param <T> the terminal symbol value type
981         * @return a new non-terminal symbol
982         */
983        public static <T> NonTerminal<T> N(final String name) {
984                return new NonTerminal<>(name);
985        }
986
987        /**
988         * Factory method for creating non-terminal symbols.
989         *
990         * @since 8.2
991         *
992         * @param name the name of the symbol.
993         * @param annotation the annotation of the symbol
994         * @param <T> the terminal symbol value type
995         * @return a new non-terminal symbol
996         */
997        public static <T> NonTerminal<T> N(
998                final String name,
999                final Object annotation
1000        ) {
1001                return new NonTerminal<>(name, annotation);
1002        }
1003
1004        /**
1005         * Factory method for creating an expression with the given
1006         * {@code symbols}.
1007         *
1008         * @param symbols the expression symbols
1009         * @throws IllegalArgumentException if the list of {@code symbols} is
1010         *         empty
1011         * @param <T> the terminal symbol value type
1012         * @return a new expression
1013         */
1014        @SafeVarargs
1015        public static <T> Expression<T> E(final Symbol<T>... symbols) {
1016                return new Expression<>(List.of(symbols));
1017        }
1018
1019        /**
1020         * Factory method for creating a new rule. The {@code elements} array doesn't
1021         * allow {@link Rule} objects.
1022         *
1023         * @param name the name of start symbol of the rule
1024         * @param elements the list of alternative rule expressions
1025         * @throws IllegalArgumentException if the given list of
1026         *         {@code alternatives} is empty
1027         * @throws NullPointerException if one of the arguments is {@code null}
1028         * @param <T> the terminal symbol value type
1029         * @return a new rule
1030         */
1031        @SafeVarargs
1032        public static <T> Rule<T> R(
1033                final String name,
1034                final Element<T>... elements
1035        ) {
1036                return R(new NonTerminal<>(name), elements);
1037        }
1038
1039        /**
1040         * Factory method for creating a new rule. The {@code elements} array doesn't
1041         * allow {@link Rule} objects.
1042         *
1043         * @since 8.2
1044         *
1045         * @param start the start symbol of the rule
1046         * @param elements the list of alternative rule expressions
1047         * @throws NullPointerException if one of the arguments is {@code null}
1048         * @param <T> the terminal symbol value type
1049         * @return a new rule
1050         */
1051        @SafeVarargs
1052        public static <T> Rule<T> R(
1053                final NonTerminal<T> start,
1054                final Element<T>... elements
1055        ) {
1056                return new Rule<>(
1057                        start,
1058                        Stream.of(elements)
1059                                .map(expression -> switch (expression) {
1060                                        case Expression<T> e -> e;
1061                                        case Symbol<T> s -> new Expression<>(List.of(s));
1062                                })
1063                                .toList()
1064                );
1065        }
1066
1067        /**
1068         * Builder class for building {@link Cfg} objects.
1069         *
1070         * @since 8.2
1071         *
1072         * @param <T> the terminal symbol value type
1073         */
1074        public static final class Builder<T> {
1075                private final List<Rule<T>> rules = new ArrayList<>();
1076
1077                private Builder() {
1078                }
1079
1080                /**
1081                 * Building a new rule for the CFG.
1082                 *
1083                 * @param start the rule start
1084                 * @param builder the rule builder
1085                 * @return {@code this} builder for method chaining
1086                 */
1087                public Builder<T> R(
1088                        final NonTerminal<T> start,
1089                        final Consumer<? super Rule.Builder<T>> builder
1090                ) {
1091                        final var rb = new Rule.Builder<T>(start);
1092                        builder.accept(rb);
1093                        rules.add(rb.build());
1094
1095                        return this;
1096                }
1097
1098                /**
1099                 * Building a new rule for the CFG.
1100                 *
1101                 * @param name the rule name
1102                 * @param builder the rule builder
1103                 * @return {@code this} builder for method chaining
1104                 */
1105                public Builder<T> R(
1106                        final String name,
1107                        final Consumer<? super Rule.Builder<T>> builder
1108                ) {
1109                        return R(new NonTerminal<>(name), builder);
1110                }
1111
1112                /**
1113                 * Creates a new CFG object.
1114                 *
1115                 * @see #of(List)
1116                 *
1117                 * @throws IllegalArgumentException if the rule list is empty or a rule
1118                 *         is defined more than once, the start symbol points to a
1119                 *         missing rule or the rules uses symbols not defined in the
1120                 *         list of {@link #nonTerminals()} or {@link #terminals()}
1121                 * @return a newly created CFG object
1122                 */
1123                public Cfg<T> build() {
1124                        return Cfg.of(rules);
1125                }
1126
1127        }
1128
1129        /**
1130         * Return a new CFG builder.
1131         *
1132         * @since 8.2
1133         *
1134         * @return a new CFG builder
1135         * @param <T> the terminal symbol value type
1136         */
1137        public static <T> Builder<T> builder() {
1138                return new Builder<>();
1139        }
1140
1141}