001/* 002 * Java Genetic Algorithm Library (jenetics-9.0.0). 003 * Copyright (c) 2007-2026 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.ext.grammar; 021 022import static java.lang.String.format; 023import static java.util.Objects.requireNonNull; 024 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.HashMap; 028import java.util.HashSet; 029import java.util.List; 030import java.util.Optional; 031import java.util.Set; 032import java.util.function.Consumer; 033import java.util.function.Function; 034import java.util.stream.Collectors; 035import java.util.stream.Stream; 036 037/** 038 * Represents a <em>context-free</em> grammar 039 * (<a href="https://en.wikipedia.org/wiki/Context-free_grammar"><b>CFG</b></a>). 040 * <p> 041 * <b>Formal definition</b> 042 * <p> 043 * A context-free grammar {@code G} is defined by the 4-tuple 044 * {@code G = (N, T, R, S)}, where 045 * <ul> 046 * <li>{@code N} is a finite set; each element {@code n ∈ N} is called a 047 * non-terminal ({@link NonTerminal}) character or a variable. Each 048 * variable represents a different type of phrase or clause in the sentence. 049 * Variables are also sometimes called syntactic categories. Each variable 050 * defines a sub-language of the language defined by {@code G}. 051 * </li> 052 * <li>{@code T} is a finite set of terminals ({@link Terminal}) disjoint 053 * from {@code N}, which make up the actual content of the sentence. The set 054 * of terminals is the alphabet of the language defined by the grammar 055 * {@code G}. 056 * </li> 057 * <li>{@code R} is a finite relation in {@code N × (N ∪ T)∗}, where the 058 * asterisk represents the <a href="https://en.wikipedia.org/wiki/Kleene_star"> 059 * Kleene star</a> operation. The members of {@code R} are called the 060 * (rewrite) rules ({@link Rule}) or productions of the grammar. 061 * </li> 062 * <li>{@code S} is the start variable (or start symbol), used to represent 063 * the whole sentence (or program). It must be an element of {@code N} 064 * ({@link NonTerminal}). 065 * </li> 066 * </ul> 067 * 068 * <b>Creating <em>Cfg</em> object from a given BNF grammar</b> 069 * {@snippet class="Snippets" region="parseBnf"} 070 * <p> 071 * <b>Creating <em>Cfg</em> programmatically</b> 072 * {@snippet class="Snippets" region="cfgWithoutBuilder"} 073 * <p> 074 * <b>Creating <em>Cfg</em> programmatically with builders</b> 075 * <p> 076 * Using the CFG builder makes it easier to define annotation for symbols without 077 * pushing the Java generics to its edges. 078 * {@snippet class="Snippets" region="cfgWithBuilder"} 079 * <p> 080 * Annotating CFG elements can be used to influence the {@link Generator} classes, 081 * which creates <em>sentences</em> from a given grammar. 082 * 083 * @see Bnf#parse(CharSequence) 084 * 085 * @param <T> the terminal symbol value type 086 * 087 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 088 * @since 7.1 089 * @version 8.2 090 */ 091public final class Cfg<T> { 092 093 /* ************************************************************************* 094 * Inner classes and interfaces. 095 * ************************************************************************/ 096 097 /** 098 * Interface for annotatable CFG types. 099 * 100 * @since 8.2 101 * @version 8.2 102 * 103 * @param <T> the terminal symbol value type 104 */ 105 public sealed interface Annotatable<T> { 106 /** 107 * Return the element annotation, might be {@code null}. 108 * 109 * @return the annotation of the element, or {@code null} if the element 110 * has none 111 */ 112 Object annotation(); 113 114 /** 115 * Create a new copy of the CFG element, with the given 116 * {@code annotation}. 117 * 118 * @param annotation the annotation of the newly created element, 119 * may be {@code null} 120 * @return a copy of the element with the given {@code annotation} 121 */ 122 Annotatable<T> at(Object annotation); 123 } 124 125 /** 126 * Represents an element of a CFG 127 * 128 * @since 8.2 129 * @version 8.2 130 * 131 * @param <T> the terminal symbol value type 132 */ 133 public sealed interface Element<T> extends Annotatable<T> { 134 } 135 136 137 /** 138 * Represents the <em>symbols</em> the BNF grammar consists. 139 * 140 * @param <T> the terminal symbol value type 141 */ 142 public sealed interface Symbol<T> extends Element<T> { 143 144 /** 145 * Return the name of the symbol. 146 * 147 * @return the name of the symbol 148 */ 149 String name(); 150 151 @Override 152 Symbol<T> at(Object annotation); 153 154 } 155 156 /** 157 * Represents the non-terminal symbols of the grammar ({@code NT}). 158 * 159 * @param <T> the terminal symbol value type 160 */ 161 public record NonTerminal<T>(String name, Object annotation) 162 implements Symbol<T> 163 { 164 165 /** 166 * @param name the name of the non-terminal symbol 167 * @throws IllegalArgumentException if the given {@code name} is not 168 * a valid <em>non-terminal</em> name 169 * @throws NullPointerException if one of the arguments is {@code null} 170 */ 171 public NonTerminal { 172 if (name.isEmpty()) { 173 throw new IllegalArgumentException( 174 "Non-terminal value must not be empty." 175 ); 176 } 177 } 178 179 public NonTerminal(final String name) { 180 this(name, null); 181 } 182 183 @Override 184 public NonTerminal<T> at(final Object annotation) { 185 return annotation == this.annotation 186 ? this 187 : new NonTerminal<>(name, annotation); 188 } 189 190 } 191 192 /** 193 * Represents a terminal symbols of the grammar ({@code T}). 194 * 195 * @param <T> the terminal symbol value type 196 */ 197 public record Terminal<T>(String name, T value, Object annotation) 198 implements Symbol<T> 199 { 200 201 /** 202 * @param name the name of the terminal symbol 203 * @param value the value of the terminal symbol 204 * @throws IllegalArgumentException if the given terminal {@code name} 205 * is empty 206 */ 207 public Terminal { 208 if (name.isEmpty()) { 209 throw new IllegalArgumentException( 210 "Terminal value must not be empty." 211 ); 212 } 213 } 214 215 public Terminal(final String name, final T value) { 216 this(name, value, null); 217 } 218 219 @Override 220 public Terminal<T> at(final Object annotation) { 221 return annotation == this.annotation 222 ? this 223 : new Terminal<>(name, value, annotation); 224 } 225 226 /** 227 * Return a new terminal symbol where the name of the symbol is equal 228 * to its value. 229 * 230 * @param name the name (and value) of the terminal symbol 231 * @return a new terminal symbol with the given {@code name} 232 * @throws IllegalArgumentException if the given terminal {@code name} 233 * is empty 234 */ 235 public static Terminal<String> of(final String name) { 236 return new Terminal<>(name, name); 237 } 238 239 } 240 241 /** 242 * Represents one <em>expression</em> (list of alternative symbols) a 243 * production rule consists of. 244 * 245 * @param <T> the terminal symbol value type 246 */ 247 public record Expression<T>(List<Symbol<T>> symbols, Object annotation) 248 implements Element<T> 249 { 250 251 /** 252 * @param symbols the expression symbols 253 * @throws IllegalArgumentException if the list of {@code symbols} is 254 * empty 255 */ 256 public Expression { 257 if (symbols.isEmpty()) { 258 throw new IllegalArgumentException( 259 "The list of symbols must not be empty." 260 ); 261 } 262 symbols = List.copyOf(symbols); 263 } 264 265 public Expression(final List<Symbol<T>> symbols) { 266 this(symbols, null); 267 } 268 269 @Override 270 public Expression<T> at(final Object annotation) { 271 return annotation == this.annotation 272 ? this 273 : new Expression<>(symbols, annotation); 274 } 275 276 /** 277 * Builder class for building rule expressions. 278 * 279 * @since 8.2 280 * 281 * @param <T> the terminal symbol value type 282 */ 283 public static final class Builder<T> { 284 private final List<Symbol<T>> symbols = new ArrayList<>(); 285 private Object annotation; 286 287 private Builder() { 288 } 289 290 /** 291 * Add the given {@code symbol} to the expression. 292 * 293 * @param symbol the expression symbol 294 * @return {@code this} builder for method chaining 295 */ 296 public Builder<T> add(final Symbol<T> symbol) { 297 symbols.add(requireNonNull(symbol)); 298 return this; 299 } 300 301 /** 302 * Add a non-terminal symbol to the expression. 303 * 304 * @param name the symbol name 305 * @param annotation the symbol annotation 306 * @return {@code this} builder for method chaining 307 */ 308 public Builder<T> N(final String name, final Object annotation) { 309 return add(new NonTerminal<>(name, annotation)); 310 } 311 312 /** 313 * Add a non-terminal symbol to the expression. 314 * 315 * @param name the symbol name 316 * @return {@code this} builder for method chaining 317 */ 318 public Builder<T> N(final String name) { 319 return N(name, null); 320 } 321 322 /** 323 * Add a non-terminal symbol to the expression. 324 * 325 * @param name the symbol name 326 * @param value the symbol value 327 * @param annotation the symbol annotation 328 * @return {@code this} builder for method chaining 329 */ 330 public Builder<T> 331 T(final String name, final T value, final Object annotation) { 332 return add(new Terminal<>(name, value, annotation)); 333 } 334 335 /** 336 * Add a non-terminal symbol to the expression. 337 * 338 * @param name the symbol name 339 * @param value the symbol value 340 * @return {@code this} builder for method chaining 341 */ 342 public Builder<T> T(final String name, final T value) { 343 return add(new Terminal<>(name, value, null)); 344 } 345 346 /** 347 * Add a non-terminal symbol to the expression. 348 * 349 * @param name the symbol name 350 * @throws ClassCastException if {@code T} is not a string 351 * @return {@code this} builder for method chaining 352 */ 353 public Builder<T> T(final String name) { 354 @SuppressWarnings("unchecked") 355 final T value = (T)name; 356 return add(new Terminal<>(name, value, null)); 357 } 358 359 /** 360 * Set the expression annotation. 361 * 362 * @param annotation the expression annotation 363 * @return {@code this} builder for method chaining 364 */ 365 public Builder<T> at(final Object annotation) { 366 this.annotation = annotation; 367 return this; 368 } 369 370 /** 371 * Create a new rule object. 372 * 373 * @return a new rule object 374 */ 375 public Expression<T> build() { 376 return new Expression<>(symbols, annotation); 377 } 378 379 } 380 381 } 382 383 /** 384 * Represents a production rule of the grammar ({@code R}). 385 * 386 * @param <T> the terminal symbol value type 387 */ 388 public record Rule<T>( 389 NonTerminal<T> start, 390 List<Expression<T>> alternatives, 391 Object annotation 392 ) 393 implements Annotatable<T> 394 { 395 396 /** 397 * Creates a new rule object. 398 * 399 * @param start the start symbol of the rule 400 * @param alternatives the list of alternative rule expressions 401 * @throws IllegalArgumentException if the given list of 402 * {@code alternatives} is empty 403 * @throws NullPointerException if one of the arguments is {@code null} 404 */ 405 public Rule { 406 requireNonNull(start); 407 if (alternatives.isEmpty()) { 408 throw new IllegalArgumentException( 409 "Rule alternatives must not be empty." 410 ); 411 } 412 alternatives = List.copyOf(alternatives); 413 } 414 415 public Rule(NonTerminal<T> start, List<Expression<T>> alternatives) { 416 this(start, alternatives, null); 417 } 418 419 @Override 420 public Rule<T> at(final Object annotation) { 421 return annotation == this.annotation 422 ? this 423 : new Rule<>(start, alternatives, annotation); 424 } 425 426 /** 427 * Builder class for building CFG rules. 428 * 429 * @since 8.2 430 * 431 * @param <T> the terminal symbol value type 432 */ 433 public static final class Builder<T> { 434 private final NonTerminal<T> start; 435 private final List<Expression<T>> expressions = new ArrayList<>(); 436 private Object annotation; 437 438 private Builder(final NonTerminal<T> start) { 439 this.start = requireNonNull(start); 440 } 441 442 /** 443 * Builder method for creating a rule expression. 444 * 445 * @param builder the expression builder 446 * @return {@code this} builder for method chaining 447 */ 448 public Builder<T> 449 E(final Consumer<? super Expression.Builder<T>> builder) { 450 final var eb = new Expression.Builder<T>(); 451 builder.accept(eb); 452 expressions.add(eb.build()); 453 454 return this; 455 } 456 457 /** 458 * Builder method for creating an expression with the given 459 * {@code symbols}. 460 * 461 * @param symbols the expression symbols 462 * @throws IllegalArgumentException if the list of {@code symbols} 463 * is empty 464 * @return {@code this} builder for method chaining 465 */ 466 @SafeVarargs 467 public final Builder<T> E(final Symbol<T>... symbols) { 468 expressions.add(Cfg.E(symbols)); 469 return this; 470 } 471 472 /** 473 * Add an expression which consists solely of the given non-terminal 474 * symbol. 475 * 476 * @param name the symbol name 477 * @param annotation the symbol annotation 478 * @return {@code this} builder for method chaining 479 */ 480 public Builder<T> N(final String name, final Object annotation) { 481 return E(new NonTerminal<>(name, annotation)); 482 } 483 484 /** 485 * Add an expression which consists solely of the given non-terminal 486 * symbol. 487 * 488 * @param name the symbol name 489 * @return {@code this} builder for method chaining 490 */ 491 public Builder<T> N(final String name) { 492 return N(name, null); 493 } 494 495 /** 496 * Add an expression which consists solely of the given terminal 497 * symbol. 498 * 499 * @param name the symbol name 500 * @param value the symbol value 501 * @param annotation the symbol annotation 502 * @return {@code this} builder for method chaining 503 */ 504 public Builder<T> T( 505 final String name, 506 final T value, 507 final Object annotation 508 ) { 509 return E(Cfg.T(name, value, annotation)); 510 } 511 512 /** 513 * Add an expression which consists solely of the given terminal 514 * symbol. 515 * 516 * @param name the symbol name 517 * @param value the symbol value 518 * @return {@code this} builder for method chaining 519 */ 520 public Builder<T> T(final String name, final T value) { 521 return E(Cfg.T(name, value)); 522 } 523 524 /** 525 * Add an expression which consists solely of the given terminal 526 * symbol. 527 * 528 * @param name the symbol name 529 * @return {@code this} builder for method chaining 530 */ 531 public Builder<T> T(final T name) { 532 return E(Cfg.T(name.toString(), name)); 533 } 534 535 /** 536 * Sets the rule annotation. 537 * 538 * @param annotation the rule annotation 539 * @return {@code this} builder for method chaining 540 */ 541 public Builder<T> at(Object annotation) { 542 this.annotation = annotation; 543 return this; 544 } 545 546 /** 547 * Create a new rule object. 548 * 549 * @return a new rule object 550 */ 551 public Rule<T> build() { 552 return new Rule<>(start, expressions, annotation); 553 } 554 555 } 556 557 } 558 559 /* ************************************************************************* 560 * CFG implementation. 561 * ************************************************************************/ 562 563 private final List<NonTerminal<T>> nonTerminals; 564 private final List<Terminal<T>> terminals; 565 private final List<Rule<T>> rules; 566 private final NonTerminal<T> start; 567 568 /** 569 * Create a new <em>context-free</em> grammar object. 570 * 571 * @param nonTerminals the non-terminal symbols of {@code this} grammar 572 * @param terminals the terminal symbols of {@code this} grammar 573 * @param rules the <em>production</em> rules of {@code this} grammar 574 * @param start the start symbol of {@code this} grammar 575 * @throws NullPointerException if one of the arguments is {@code null} 576 * @throws IllegalArgumentException if a rule is defined more than once, the 577 * start symbol points to a missing rule or the rules uses symbols 578 * not defined in the list of {@link #nonTerminals()} or 579 * {@link #terminals()} 580 */ 581 Cfg( 582 List<NonTerminal<T>> nonTerminals, 583 List<Terminal<T>> terminals, 584 List<Rule<T>> rules, 585 NonTerminal<T> start 586 ) { 587 if (rules.isEmpty()) { 588 throw new IllegalArgumentException( 589 "The given list of rules must not be empty." 590 ); 591 } 592 593 // Check the uniqueness of the rules. 594 final var duplicatedRules = rules.stream() 595 .collect(Collectors.groupingBy(Rule::start)) 596 .values().stream() 597 .filter(values -> values.size() > 1) 598 .map(rule -> rule.getFirst().start.name) 599 .toList(); 600 601 if (!duplicatedRules.isEmpty()) { 602 throw new IllegalArgumentException( 603 "Found duplicate rule: %s.".formatted(duplicatedRules) 604 ); 605 } 606 607 // Check if start symbol points to an existing rule. 608 final var startRule = rules.stream() 609 .filter(r -> start.equals(r.start)) 610 .findFirst(); 611 if (startRule.isEmpty()) { 612 throw new IllegalArgumentException( 613 "No rule found for start symbol %s.".formatted(start) 614 ); 615 } 616 617 // Check that all symbols used in the given rules are also defined 618 // in the list of non-terminals and terminals. 619 final Set<Symbol<T>> required = rules.stream() 620 .flatMap(Cfg::ruleSymbols) 621 .map(s -> s.at(null)) 622 .collect(Collectors.toUnmodifiableSet()); 623 624 final Set<Symbol<T>> available = Stream 625 .concat(nonTerminals.stream(), terminals.stream()) 626 .map(s -> s.at(null)) 627 .collect(Collectors.toUnmodifiableSet()); 628 629 final var missing = new HashSet<>(required); 630 missing.removeAll(available); 631 632 if (!missing.isEmpty()) { 633 throw new IllegalArgumentException( 634 "Unknown symbols defined in rules: " + missing 635 ); 636 } 637 638 // Check if the name of terminals and non-terminals are distinct. 639 final var terminalNames = terminals.stream() 640 .map(Symbol::name) 641 .collect(Collectors.toSet()); 642 643 final var nonTerminalNames = nonTerminals.stream() 644 .map(Symbol::name) 645 .collect(Collectors.toSet()); 646 647 terminalNames.retainAll(nonTerminalNames); 648 if (!terminalNames.isEmpty()) { 649 throw new IllegalArgumentException(format( 650 "Terminal and non-terminal symbols with same name: %s", 651 terminalNames.stream().sorted().toList() 652 )); 653 } 654 655 this.nonTerminals = List.copyOf(nonTerminals); 656 this.terminals = List.copyOf(terminals); 657 this.rules = List.copyOf(rules); 658 this.start = requireNonNull(start); 659 } 660 661 /** 662 * Return the non-terminal symbols of {@code this} grammar. The returned 663 * symbols have no annotation. 664 * 665 * @return the non-terminal symbols of {@code this} grammar 666 */ 667 public List<NonTerminal<T>> nonTerminals() { 668 return nonTerminals; 669 } 670 671 /** 672 * Return the terminal symbols of {@code this} grammar. The returned 673 * symbols have no annotation. 674 * 675 * @return the terminal symbols of {@code this} grammar 676 */ 677 public List<Terminal<T>> terminals() { 678 return terminals; 679 } 680 681 /** 682 * Return the rules of {@code this} grammar. 683 * 684 * @return the rules of {@code this} grammar 685 */ 686 public List<Rule<T>> rules() { 687 return rules; 688 } 689 690 /** 691 * Return the start symbol of {@code this} grammar. 692 * 693 * @return the start symbol of {@code this} grammar 694 */ 695 public NonTerminal<T> start() { 696 return start; 697 } 698 699 /** 700 * Return the rule for the given {@code start} symbol. 701 * 702 * @param start the start symbol of the rule 703 * @return the rule for the given {@code start} symbol 704 * @throws NullPointerException if the given {@code start} symbol is 705 * {@code null} 706 */ 707 public Optional<Rule<T>> rule(final NonTerminal<?> start) { 708 requireNonNull(start); 709 for (var rule : rules) { 710 if (rule.start().name().equals(start.name())) { 711 return Optional.of(rule); 712 } 713 } 714 return Optional.empty(); 715 } 716 717 /** 718 * Maps the values of the terminal symbols from type {@code T} to type 719 * {@code A}. 720 * 721 * @param mapper the mapper function 722 * @param <A> the new value type of the terminal symbols 723 * @return the mapped grammar 724 * @throws NullPointerException if the given mapper is {@code null} 725 */ 726 public <A> Cfg<A> 727 map(final Function<? super Terminal<? extends T>, ? extends A> mapper) { 728 requireNonNull(mapper); 729 730 final var cache = new HashMap<Terminal<T>, Terminal<A>>(); 731 final Function<Terminal<T>, Terminal<A>> mapping = t -> cache 732 .computeIfAbsent(t, t2 -> new Terminal<>(t2.name(), mapper.apply(t2))); 733 734 return flatMap(mapping); 735 } 736 737 /** 738 * Maps the values of the terminal symbols from type {@code T} to type 739 * {@code A}. 740 * 741 * @since 9.0 742 * 743 * @param mapper the mapper function 744 * @param <A> the new value type of the terminal symbols 745 * @return the mapped grammar 746 * @throws NullPointerException if the given mapper is {@code null} 747 */ 748 public <A> Cfg<A> flatMap( 749 final Function< 750 ? super Terminal<T>, 751 ? extends Terminal<? extends A> 752 > mapper 753 ) { 754 requireNonNull(mapper); 755 756 final var cache = new HashMap<Terminal<T>, Terminal<A>>(); 757 758 @SuppressWarnings("unchecked") 759 final Function<Terminal<T>, Terminal<A>> 760 fn = (Function<Terminal<T>, Terminal<A>>)mapper; 761 762 final Function<Terminal<T>, Terminal<A>> mapping = 763 t -> cache.computeIfAbsent(t, fn); 764 765 @SuppressWarnings("unchecked") 766 final List<Rule<A>> rules = rules().stream() 767 .map(rule -> new Rule<>( 768 (NonTerminal<A>)rule.start(), 769 rule.alternatives().stream() 770 .map(expr -> new Expression<>( 771 expr.symbols().stream() 772 .map(sym -> 773 sym instanceof Cfg.Terminal<T> t 774 ? mapping.apply(t) : (Symbol<A>)sym 775 ) 776 .toList() 777 )) 778 .toList() 779 )) 780 .toList(); 781 782 return Cfg.of(rules); 783 } 784 785 @Override 786 public int hashCode() { 787 return rules.hashCode(); 788 } 789 790 @Override 791 public boolean equals(final Object obj) { 792 return obj instanceof Cfg<?> cfg && rules.equals(cfg.rules()); 793 } 794 795 @Override 796 public String toString() { 797 return "Cfg[nonTerminals=%s, terminals=%s, rules=%s, start=%s]" 798 .formatted(nonTerminals, terminals, rules, start); 799 } 800 801 802 /* ************************************************************************* 803 * Factory methods 804 * ************************************************************************/ 805 806 /** 807 * Create a grammar object with the given rules. Duplicated rules are merged 808 * into one rule. The <em>start</em> symbol of the first rule is chosen as 809 * the start symbol of the created CFG 810 * 811 * @param rules the rules the grammar consists of 812 * @throws IllegalArgumentException if the rule list is empty or a rule is 813 * defined more than once, the start symbol points to a missing rule 814 * or the rules uses symbols not defined in the list of 815 * {@link #nonTerminals()} or {@link #terminals()} 816 * @throws NullPointerException if the list of rules is {@code null} 817 */ 818 public static <T> Cfg<T> of(final List<? extends Rule<? extends T>> rules) { 819 @SuppressWarnings("unchecked") 820 final var rules0 = (List<Rule<T>>)List.copyOf(rules); 821 822 // Rules must not be empty. 823 if (rules0.isEmpty()) { 824 throw new IllegalArgumentException( 825 "The list of rules must not be empty." 826 ); 827 } 828 829 final List<Symbol<T>> symbols = rules0.stream() 830 .flatMap(Cfg::ruleSymbols) 831 .distinct() 832 .toList(); 833 834 final List<NonTerminal<T>> nonTerminals = symbols.stream() 835 .map(rule -> rule.at(null)) 836 .distinct() 837 .filter(NonTerminal.class::isInstance) 838 .map(nt -> (NonTerminal<T>)nt) 839 .toList(); 840 841 final List<Terminal<T>> terminals = symbols.stream() 842 .map(rule -> rule.at(null)) 843 .distinct() 844 .filter(Terminal.class::isInstance) 845 .map(nt -> (Terminal<T>)nt) 846 .toList(); 847 848 final Set<Symbol<T>> allSymbols = new HashSet<>(symbols); 849 allSymbols.addAll(nonTerminals); 850 allSymbols.addAll(terminals); 851 852 return new Cfg<>( 853 nonTerminals.stream() 854 .map(s -> (NonTerminal<T>)select(s, allSymbols)) 855 .toList(), 856 terminals.stream() 857 .map(s -> (Terminal<T>)select(s, allSymbols)) 858 .toList(), 859 rules0.stream() 860 .map(r -> rebuild(r, allSymbols)) 861 .toList(), 862 (NonTerminal<T>)select(rules0.getFirst().start(), allSymbols) 863 ); 864 } 865 866 /** 867 * Create a grammar object with the given rules. 868 * 869 * @param rules the rules the grammar consists of 870 * @throws IllegalArgumentException if the list of rules is empty or there 871 * are duplicate rules 872 * @throws NullPointerException if the list of rules is {@code null} 873 */ 874 @SafeVarargs 875 public static <T> Cfg<T> of(final Rule<? extends T>... rules) { 876 return Cfg.of(List.of(rules)); 877 } 878 879 private static <T> Stream<Symbol<T>> ruleSymbols(final Rule<T> rule) { 880 return Stream.concat( 881 Stream.of(rule.start), 882 rule.alternatives.stream() 883 .flatMap(expr -> expr.symbols().stream()) 884 ); 885 } 886 887 private static <T> Rule<T> rebuild( 888 final Rule<T> rule, 889 final Collection<Symbol<T>> symbols 890 ) { 891 return new Rule<>( 892 (NonTerminal<T>)select(rule.start, symbols), 893 rule.alternatives.stream() 894 .map(e -> rebuild(e, symbols)) 895 .toList() 896 ); 897 } 898 899 private static <T> Expression<T> 900 rebuild(final Expression<T> expression, final Collection<Symbol<T>> symbols) { 901 return new Expression<>( 902 expression.symbols.stream() 903 .map(s -> select(s, symbols)) 904 .toList() 905 ); 906 } 907 908 private static <T> Symbol<T> select( 909 final Symbol<T> symbol, 910 final Collection<Symbol<T>> symbols 911 ) { 912 for (var s : symbols) { 913 if (s.equals(symbol)) { 914 return s; 915 } 916 } 917 throw new AssertionError("Symbol not found: " + symbol); 918 } 919 920 @SuppressWarnings("unchecked") 921 static <A, B extends A> Cfg<A> upcast(final Cfg<B> seq) { 922 return (Cfg<A>)seq; 923 } 924 925 926 /* ************************************************************************* 927 * Static factory methods for rule creation: DSL methods. 928 * ************************************************************************/ 929 930 /** 931 * Factory method for creating a terminal symbol with the given 932 * {@code name} and {@code value}. 933 * 934 * @param name the name of the terminal symbol 935 * @param value the value of the terminal symbol 936 * @param <T> the terminal symbol value type 937 * @return a new terminal symbol 938 */ 939 public static <T> Terminal<T> T(final String name, final T value) { 940 return new Terminal<>(name, value); 941 } 942 943 /** 944 * Factory method for creating a terminal symbol with the given 945 * {@code name} and {@code value}. 946 * 947 * @since 8.2 948 * 949 * @param name the name of the terminal symbol 950 * @param value the value of the terminal symbol 951 * @param annotation the terminal annotation 952 * @param <T> the terminal symbol value type 953 * @return a new terminal symbol 954 */ 955 public static <T> Terminal<T> T( 956 final String name, 957 final T value, 958 final Object annotation 959 ) { 960 return new Terminal<>(name, value, annotation); 961 } 962 963 /** 964 * Factory method for creating a terminal symbol with the given 965 * {@code name}. 966 * 967 * @since 9.0 968 * 969 * @param value the value of the terminal symbol 970 * @return a new terminal symbol 971 */ 972 public static <T> Terminal<T> T(final T value) { 973 return new Terminal<>(String.valueOf(value), value); 974 } 975 976 /** 977 * Factory method for creating non-terminal symbols. 978 * 979 * @param name the name of the symbol. 980 * @param <T> the terminal symbol value type 981 * @return a new non-terminal symbol 982 */ 983 public static <T> NonTerminal<T> N(final String name) { 984 return new NonTerminal<>(name); 985 } 986 987 /** 988 * Factory method for creating non-terminal symbols. 989 * 990 * @since 8.2 991 * 992 * @param name the name of the symbol. 993 * @param annotation the annotation of the symbol 994 * @param <T> the terminal symbol value type 995 * @return a new non-terminal symbol 996 */ 997 public static <T> NonTerminal<T> N( 998 final String name, 999 final Object annotation 1000 ) { 1001 return new NonTerminal<>(name, annotation); 1002 } 1003 1004 /** 1005 * Factory method for creating an expression with the given 1006 * {@code symbols}. 1007 * 1008 * @param symbols the expression symbols 1009 * @throws IllegalArgumentException if the list of {@code symbols} is 1010 * empty 1011 * @param <T> the terminal symbol value type 1012 * @return a new expression 1013 */ 1014 @SafeVarargs 1015 public static <T> Expression<T> E(final Symbol<T>... symbols) { 1016 return new Expression<>(List.of(symbols)); 1017 } 1018 1019 /** 1020 * Factory method for creating a new rule. The {@code elements} array doesn't 1021 * allow {@link Rule} objects. 1022 * 1023 * @param name the name of start symbol of the rule 1024 * @param elements the list of alternative rule expressions 1025 * @throws IllegalArgumentException if the given list of 1026 * {@code alternatives} is empty 1027 * @throws NullPointerException if one of the arguments is {@code null} 1028 * @param <T> the terminal symbol value type 1029 * @return a new rule 1030 */ 1031 @SafeVarargs 1032 public static <T> Rule<T> R( 1033 final String name, 1034 final Element<T>... elements 1035 ) { 1036 return R(new NonTerminal<>(name), elements); 1037 } 1038 1039 /** 1040 * Factory method for creating a new rule. The {@code elements} array doesn't 1041 * allow {@link Rule} objects. 1042 * 1043 * @since 8.2 1044 * 1045 * @param start the start symbol of the rule 1046 * @param elements the list of alternative rule expressions 1047 * @throws NullPointerException if one of the arguments is {@code null} 1048 * @param <T> the terminal symbol value type 1049 * @return a new rule 1050 */ 1051 @SafeVarargs 1052 public static <T> Rule<T> R( 1053 final NonTerminal<T> start, 1054 final Element<T>... elements 1055 ) { 1056 return new Rule<>( 1057 start, 1058 Stream.of(elements) 1059 .map(expression -> switch (expression) { 1060 case Expression<T> e -> e; 1061 case Symbol<T> s -> new Expression<>(List.of(s)); 1062 }) 1063 .toList() 1064 ); 1065 } 1066 1067 /** 1068 * Builder class for building {@link Cfg} objects. 1069 * 1070 * @since 8.2 1071 * 1072 * @param <T> the terminal symbol value type 1073 */ 1074 public static final class Builder<T> { 1075 private final List<Rule<T>> rules = new ArrayList<>(); 1076 1077 private Builder() { 1078 } 1079 1080 /** 1081 * Building a new rule for the CFG. 1082 * 1083 * @param start the rule start 1084 * @param builder the rule builder 1085 * @return {@code this} builder for method chaining 1086 */ 1087 public Builder<T> R( 1088 final NonTerminal<T> start, 1089 final Consumer<? super Rule.Builder<T>> builder 1090 ) { 1091 final var rb = new Rule.Builder<T>(start); 1092 builder.accept(rb); 1093 rules.add(rb.build()); 1094 1095 return this; 1096 } 1097 1098 /** 1099 * Building a new rule for the CFG. 1100 * 1101 * @param name the rule name 1102 * @param builder the rule builder 1103 * @return {@code this} builder for method chaining 1104 */ 1105 public Builder<T> R( 1106 final String name, 1107 final Consumer<? super Rule.Builder<T>> builder 1108 ) { 1109 return R(new NonTerminal<>(name), builder); 1110 } 1111 1112 /** 1113 * Creates a new CFG object. 1114 * 1115 * @see #of(List) 1116 * 1117 * @throws IllegalArgumentException if the rule list is empty or a rule 1118 * is defined more than once, the start symbol points to a 1119 * missing rule or the rules uses symbols not defined in the 1120 * list of {@link #nonTerminals()} or {@link #terminals()} 1121 * @return a newly created CFG object 1122 */ 1123 public Cfg<T> build() { 1124 return Cfg.of(rules); 1125 } 1126 1127 } 1128 1129 /** 1130 * Return a new CFG builder. 1131 * 1132 * @since 8.2 1133 * 1134 * @return a new CFG builder 1135 * @param <T> the terminal symbol value type 1136 */ 1137 public static <T> Builder<T> builder() { 1138 return new Builder<>(); 1139 } 1140 1141}