001/* 002 * Java Genetic Algorithm Library (jenetics-8.2.0). 003 * Copyright (c) 2007-2025 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.ext.grammar; 021 022import static java.lang.String.format; 023import static java.util.Objects.requireNonNull; 024 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.HashMap; 028import java.util.HashSet; 029import java.util.List; 030import java.util.Optional; 031import java.util.Set; 032import java.util.function.Consumer; 033import java.util.function.Function; 034import java.util.stream.Collectors; 035import java.util.stream.Stream; 036 037/** 038 * Represents a <em>context-free</em> grammar 039 * (<a href="https://en.wikipedia.org/wiki/Context-free_grammar"><b>CFG</b></a>). 040 * <p> 041 * <b>Formal definition</b> 042 * <p> 043 * A context-free grammar {@code G} is defined by the 4-tuple 044 * {@code G = (N, T, R, S)}, where 045 * <ul> 046 * <li>{@code N} is a finite set; each element {@code n ∈ N} is called a 047 * non-terminal ({@link NonTerminal}) character or a variable. Each 048 * variable represents a different type of phrase or clause in the sentence. 049 * Variables are also sometimes called syntactic categories. Each variable 050 * defines a sub-language of the language defined by {@code G}. 051 * </li> 052 * <li>{@code T} is a finite set of terminals ({@link Terminal}) disjoint 053 * from {@code N}, which make up the actual content of the sentence. The set 054 * of terminals is the alphabet of the language defined by the grammar 055 * {@code G}. 056 * </li> 057 * <li>{@code R} is a finite relation in {@code N × (N ∪ T)∗}, where the 058 * asterisk represents the <a href="https://en.wikipedia.org/wiki/Kleene_star"> 059 * Kleene star</a> operation. The members of {@code R} are called the 060 * (rewrite) rules ({@link Rule}) or productions of the grammar. 061 * </li> 062 * <li>{@code S} is the start variable (or start symbol), used to represent 063 * the whole sentence (or program). It must be an element of {@code N} 064 * ({@link NonTerminal}). 065 * </li> 066 * </ul> 067 * 068 * <b>Creating <em>Cfg</em> object from a given BNF grammar</b> 069 * {@snippet class="Snippets" region="parseBnf"} 070 * <p> 071 * <b>Creating <em>Cfg</em> programmatically</b> 072 * {@snippet class="Snippets" region="cfgWithoutBuilder"} 073 * <p> 074 * <b>Creating <em>Cfg</em> programmatically with builders</b> 075 * <p> 076 * Using the CFG builder makes it easier to define annotation for symbols without 077 * pushing the Java generics to its edges. 078 * {@snippet class="Snippets" region="cfgWithBuilder"} 079 * <p> 080 * Annotating CFG elements can be used to influence the {@link Generator} classes, 081 * which creates <em>sentences</em> from a given grammar. 082 * 083 * @see Bnf#parse(String) 084 * 085 * @param <T> the terminal symbol value type 086 * 087 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 088 * @since 7.1 089 * @version 8.2 090 */ 091public final class Cfg<T> { 092 093 /* ************************************************************************* 094 * Inner classes and interfaces. 095 * ************************************************************************/ 096 097 /** 098 * Interface for annotatable CFG types. 099 * 100 * @since 8.2 101 * @version 8.2 102 * 103 * @param <T> the terminal symbol value type 104 */ 105 public sealed interface Annotatable<T> { 106 /** 107 * Return the element annotation, might be {@code null}. 108 * 109 * @return the annotation of the element, or {@code null} if the element 110 * has none 111 */ 112 Object annotation(); 113 114 /** 115 * Create a new copy of the CFG element, with the given 116 * {@code annotation}. 117 * 118 * @param annotation the annotation of the newly created element, 119 * may be {@code null} 120 * @return a copy of the element with the given {@code annotation} 121 */ 122 Annotatable<T> at(Object annotation); 123 } 124 125 /** 126 * Represents an element of a CFG 127 * 128 * @since 8.2 129 * @version 8.2 130 * 131 * @param <T> the terminal symbol value type 132 */ 133 public sealed interface Element<T> extends Annotatable<T> { 134 } 135 136 /** 137 * Represents the <em>symbols</em> the BNF grammar consists. 138 * 139 * @param <T> the terminal symbol value type 140 */ 141 public sealed interface Symbol<T> extends Element<T> { 142 143 /** 144 * Return the name of the symbol. 145 * 146 * @return the name of the symbol 147 */ 148 String name(); 149 150 @Override 151 Symbol<T> at(Object annotation); 152 153 } 154 155 /** 156 * Represents the non-terminal symbols of the grammar ({@code NT}). 157 * 158 * @param <T> the terminal symbol value type 159 */ 160 public record NonTerminal<T>(String name, Object annotation) 161 implements Symbol<T> 162 { 163 164 /** 165 * @param name the name of the non-terminal symbol 166 * @throws IllegalArgumentException if the given {@code name} is not 167 * a valid <em>non-terminal</em> name 168 * @throws NullPointerException if one of the arguments is {@code null} 169 */ 170 public NonTerminal { 171 if (name.isEmpty()) { 172 throw new IllegalArgumentException( 173 "Non-terminal value must not be empty." 174 ); 175 } 176 } 177 178 public NonTerminal(final String name) { 179 this(name, null); 180 } 181 182 @Override 183 public NonTerminal<T> at(final Object annotation) { 184 return annotation == this.annotation 185 ? this 186 : new NonTerminal<>(name, annotation); 187 } 188 189 } 190 191 /** 192 * Represents a terminal symbols of the grammar ({@code T}). 193 * 194 * @param <T> the terminal symbol value type 195 */ 196 public record Terminal<T>(String name, T value, Object annotation) 197 implements Symbol<T> 198 { 199 200 /** 201 * @param name the name of the terminal symbol 202 * @param value the value of the terminal symbol 203 * @throws IllegalArgumentException if the given terminal {@code name} 204 * is empty 205 */ 206 public Terminal { 207 if (name.isEmpty()) { 208 throw new IllegalArgumentException( 209 "Terminal value must not be empty." 210 ); 211 } 212 } 213 214 public Terminal(final String name, final T value) { 215 this(name, value, null); 216 } 217 218 @Override 219 public Terminal<T> at(final Object annotation) { 220 return annotation == this.annotation 221 ? this 222 : new Terminal<>(name, value, annotation); 223 } 224 225 /** 226 * Return a new terminal symbol where the name of the symbol is equal 227 * to its value. 228 * 229 * @param name the name (and value) of the terminal symbol 230 * @return a new terminal symbol with the given {@code name} 231 * @throws IllegalArgumentException if the given terminal {@code name} 232 * is empty 233 */ 234 public static Terminal<String> of(final String name) { 235 return new Terminal<>(name, name); 236 } 237 238 } 239 240 /** 241 * Represents one <em>expression</em> (list of alternative symbols) a 242 * production rule consists of. 243 * 244 * @param <T> the terminal symbol value type 245 */ 246 public record Expression<T>(List<Symbol<T>> symbols, Object annotation) 247 implements Element<T> 248 { 249 250 /** 251 * @param symbols the expression symbols 252 * @throws IllegalArgumentException if the list of {@code symbols} is 253 * empty 254 */ 255 public Expression { 256 if (symbols.isEmpty()) { 257 throw new IllegalArgumentException( 258 "The list of symbols must not be empty." 259 ); 260 } 261 symbols = List.copyOf(symbols); 262 } 263 264 public Expression(final List<Symbol<T>> symbols) { 265 this(symbols, null); 266 } 267 268 @Override 269 public Expression<T> at(final Object annotation) { 270 return annotation == this.annotation 271 ? this 272 : new Expression<>(symbols, annotation); 273 } 274 275 /** 276 * Builder class for building rule expressions. 277 * 278 * @since 8.2 279 * 280 * @param <T> the terminal symbol value type 281 */ 282 public static final class Builder<T> { 283 private final List<Symbol<T>> symbols = new ArrayList<>(); 284 private Object annotation; 285 286 private Builder() { 287 } 288 289 /** 290 * Add the given {@code symbol} to the expression. 291 * 292 * @param symbol the expression symbol 293 * @return {@code this} builder for method chaining 294 */ 295 public Builder<T> add(final Symbol<T> symbol) { 296 symbols.add(requireNonNull(symbol)); 297 return this; 298 } 299 300 /** 301 * Add a non-terminal symbol to the expression. 302 * 303 * @param name the symbol name 304 * @param annotation the symbol annotation 305 * @return {@code this} builder for method chaining 306 */ 307 public Builder<T> N(final String name, final Object annotation) { 308 return add(new NonTerminal<>(name, annotation)); 309 } 310 311 /** 312 * Add a non-terminal symbol to the expression. 313 * 314 * @param name the symbol name 315 * @return {@code this} builder for method chaining 316 */ 317 public Builder<T> N(final String name) { 318 return N(name, null); 319 } 320 321 /** 322 * Add a non-terminal symbol to the expression. 323 * 324 * @param name the symbol name 325 * @param value the symbol value 326 * @param annotation the symbol annotation 327 * @return {@code this} builder for method chaining 328 */ 329 public Builder<T> 330 T(final String name, final T value, final Object annotation) { 331 return add(new Terminal<>(name, value, annotation)); 332 } 333 334 /** 335 * Add a non-terminal symbol to the expression. 336 * 337 * @param name the symbol name 338 * @param value the symbol value 339 * @return {@code this} builder for method chaining 340 */ 341 public Builder<T> T(final String name, final T value) { 342 return add(new Terminal<>(name, value, null)); 343 } 344 345 /** 346 * Add a non-terminal symbol to the expression. 347 * 348 * @param name the symbol name 349 * @throws ClassCastException if {@code T} is not a string 350 * @return {@code this} builder for method chaining 351 */ 352 public Builder<T> T(final String name) { 353 @SuppressWarnings("unchecked") 354 final T value = (T)name; 355 return add(new Terminal<>(name, value, null)); 356 } 357 358 /** 359 * Set the expression annotation. 360 * 361 * @param annotation the expression annotation 362 * @return {@code this} builder for method chaining 363 */ 364 public Builder<T> at(final Object annotation) { 365 this.annotation = annotation; 366 return this; 367 } 368 369 /** 370 * Create a new rule object. 371 * 372 * @return a new rule object 373 */ 374 public Expression<T> build() { 375 return new Expression<>(symbols, annotation); 376 } 377 378 } 379 380 } 381 382 /** 383 * Represents a production rule of the grammar ({@code R}). 384 * 385 * @param <T> the terminal symbol value type 386 */ 387 public record Rule<T>( 388 NonTerminal<T> start, 389 List<Expression<T>> alternatives, 390 Object annotation 391 ) 392 implements Annotatable<T> 393 { 394 395 /** 396 * Creates a new rule object. 397 * 398 * @param start the start symbol of the rule 399 * @param alternatives the list of alternative rule expressions 400 * @throws IllegalArgumentException if the given list of 401 * {@code alternatives} is empty 402 * @throws NullPointerException if one of the arguments is {@code null} 403 */ 404 public Rule { 405 requireNonNull(start); 406 if (alternatives.isEmpty()) { 407 throw new IllegalArgumentException( 408 "Rule alternatives must not be empty." 409 ); 410 } 411 alternatives = List.copyOf(alternatives); 412 } 413 414 public Rule(NonTerminal<T> start, List<Expression<T>> alternatives) { 415 this(start, alternatives, null); 416 } 417 418 @Override 419 public Rule<T> at(final Object annotation) { 420 return annotation == this.annotation 421 ? this 422 : new Rule<>(start, alternatives, annotation); 423 } 424 425 /** 426 * Builder class for building CFG rules. 427 * 428 * @since 8.2 429 * 430 * @param <T> the terminal symbol value type 431 */ 432 public static final class Builder<T> { 433 private final NonTerminal<T> start; 434 private final List<Expression<T>> expressions = new ArrayList<>(); 435 private Object annotation; 436 437 private Builder(final NonTerminal<T> start) { 438 this.start = requireNonNull(start); 439 } 440 441 /** 442 * Builder method for creating a rule expression. 443 * 444 * @param builder the expression builder 445 * @return {@code this} builder for method chaining 446 */ 447 public Builder<T> 448 E(final Consumer<? super Expression.Builder<T>> builder) { 449 final var eb = new Expression.Builder<T>(); 450 builder.accept(eb); 451 expressions.add(eb.build()); 452 453 return this; 454 } 455 456 /** 457 * Builder method for creating an expression with the given 458 * {@code symbols}. 459 * 460 * @param symbols the expression symbols 461 * @throws IllegalArgumentException if the list of {@code symbols} 462 * is empty 463 * @return {@code this} builder for method chaining 464 */ 465 @SafeVarargs 466 public final Builder<T> E(final Symbol<T>... symbols) { 467 expressions.add(Cfg.E(symbols)); 468 return this; 469 } 470 471 /** 472 * Add an expression which consists solely of the given non-terminal 473 * symbol. 474 * 475 * @param name the symbol name 476 * @param annotation the symbol annotation 477 * @return {@code this} builder for method chaining 478 */ 479 public Builder<T> N(final String name, final Object annotation) { 480 return E(new NonTerminal<>(name, annotation)); 481 } 482 483 /** 484 * Add an expression which consists solely of the given non-terminal 485 * symbol. 486 * 487 * @param name the symbol name 488 * @return {@code this} builder for method chaining 489 */ 490 public Builder<T> N(final String name) { 491 return N(name, null); 492 } 493 494 /** 495 * Add an expression which consists solely of the given terminal 496 * symbol. 497 * 498 * @param name the symbol name 499 * @param value the symbol value 500 * @param annotation the symbol annotation 501 * @return {@code this} builder for method chaining 502 */ 503 public Builder<T> T( 504 final String name, 505 final T value, 506 final Object annotation 507 ) { 508 return E(Cfg.T(name, value, annotation)); 509 } 510 511 /** 512 * Add an expression which consists solely of the given terminal 513 * symbol. 514 * 515 * @param name the symbol name 516 * @param value the symbol value 517 * @return {@code this} builder for method chaining 518 */ 519 public Builder<T> T(final String name, final T value) { 520 return E(Cfg.T(name, value)); 521 } 522 523 /** 524 * Add an expression which consists solely of the given terminal 525 * symbol. 526 * 527 * @param name the symbol name 528 * @return {@code this} builder for method chaining 529 */ 530 public Builder<T> T(final T name) { 531 return E(Cfg.T(name.toString(), name)); 532 } 533 534 /** 535 * Sets the rule annotation. 536 * 537 * @param annotation the rule annotation 538 * @return {@code this} builder for method chaining 539 */ 540 public Builder<T> at(Object annotation) { 541 this.annotation = annotation; 542 return this; 543 } 544 545 /** 546 * Create a new rule object. 547 * 548 * @return a new rule object 549 */ 550 public Rule<T> build() { 551 return new Rule<>(start, expressions, annotation); 552 } 553 554 } 555 556 } 557 558 /* ************************************************************************* 559 * CFG implementation. 560 * ************************************************************************/ 561 562 private final List<NonTerminal<T>> nonTerminals; 563 private final List<Terminal<T>> terminals; 564 private final List<Rule<T>> rules; 565 private final NonTerminal<T> start; 566 567 /** 568 * Create a new <em>context-free</em> grammar object. 569 * 570 * @param nonTerminals the non-terminal symbols of {@code this} grammar 571 * @param terminals the terminal symbols of {@code this} grammar 572 * @param rules the <em>production</em> rules of {@code this} grammar 573 * @param start the start symbol of {@code this} grammar 574 * @throws NullPointerException if one of the arguments is {@code null} 575 * @throws IllegalArgumentException if a rule is defined more than once, the 576 * start symbol points to a missing rule or the rules uses symbols 577 * not defined in the list of {@link #nonTerminals()} or 578 * {@link #terminals()} 579 * @deprecated This constructor will be removed, use {@link #of(Rule[])} or 580 * {@link #of(List)} instead. 581 */ 582 @Deprecated(forRemoval = true, since = "8.2") 583 public Cfg( 584 List<NonTerminal<T>> nonTerminals, 585 List<Terminal<T>> terminals, 586 List<Rule<T>> rules, 587 NonTerminal<T> start 588 ) { 589 if (rules.isEmpty()) { 590 throw new IllegalArgumentException( 591 "The given list of rules must not be empty." 592 ); 593 } 594 595 // Check the uniqueness of the rules. 596 final var duplicatedRules = rules.stream() 597 .collect(Collectors.groupingBy(Rule::start)) 598 .values().stream() 599 .filter(values -> values.size() > 1) 600 .map(rule -> rule.getFirst().start.name) 601 .toList(); 602 603 if (!duplicatedRules.isEmpty()) { 604 throw new IllegalArgumentException( 605 "Found duplicate rule: %s.".formatted(duplicatedRules) 606 ); 607 } 608 609 // Check if start symbol points to an existing rule. 610 final var startRule = rules.stream() 611 .filter(r -> start.equals(r.start)) 612 .findFirst(); 613 if (startRule.isEmpty()) { 614 throw new IllegalArgumentException( 615 "No rule found for start symbol %s.".formatted(start) 616 ); 617 } 618 619 // Check that all symbols used in the given rules are also defined 620 // in the list of non-terminals and terminals. 621 final Set<Symbol<T>> required = rules.stream() 622 .flatMap(Cfg::ruleSymbols) 623 .map(s -> s.at(null)) 624 .collect(Collectors.toUnmodifiableSet()); 625 626 final Set<Symbol<T>> available = Stream 627 .concat(nonTerminals.stream(), terminals.stream()) 628 .map(s -> s.at(null)) 629 .collect(Collectors.toUnmodifiableSet()); 630 631 final var missing = new HashSet<>(required); 632 missing.removeAll(available); 633 634 if (!missing.isEmpty()) { 635 throw new IllegalArgumentException( 636 "Unknown symbols defined in rules: " + missing 637 ); 638 } 639 640 // Check if the name of terminals and non-terminals are distinct. 641 final var terminalNames = terminals.stream() 642 .map(Symbol::name) 643 .collect(Collectors.toSet()); 644 645 final var nonTerminalNames = nonTerminals.stream() 646 .map(Symbol::name) 647 .collect(Collectors.toSet()); 648 649 terminalNames.retainAll(nonTerminalNames); 650 if (!terminalNames.isEmpty()) { 651 throw new IllegalArgumentException(format( 652 "Terminal and non-terminal symbols with same name: %s", 653 terminalNames.stream().sorted().toList() 654 )); 655 } 656 657 this.nonTerminals = List.copyOf(nonTerminals); 658 this.terminals = List.copyOf(terminals); 659 this.rules = List.copyOf(rules); 660 this.start = requireNonNull(start); 661 } 662 663 /** 664 * Return the non-terminal symbols of {@code this} grammar. The returned 665 * symbols have no annotation. 666 * 667 * @return the non-terminal symbols of {@code this} grammar 668 */ 669 public List<NonTerminal<T>> nonTerminals() { 670 return nonTerminals; 671 } 672 673 /** 674 * Return the terminal symbols of {@code this} grammar. The returned 675 * symbols have no annotation. 676 * 677 * @return the terminal symbols of {@code this} grammar 678 */ 679 public List<Terminal<T>> terminals() { 680 return terminals; 681 } 682 683 /** 684 * Return the rules of {@code this} grammar. 685 * 686 * @return the rules of {@code this} grammar 687 */ 688 public List<Rule<T>> rules() { 689 return rules; 690 } 691 692 /** 693 * Return the start symbol of {@code this} grammar. 694 * 695 * @return the start symbol of {@code this} grammar 696 */ 697 public NonTerminal<T> start() { 698 return start; 699 } 700 701 /** 702 * Return the rule for the given {@code start} symbol. 703 * 704 * @param start the start symbol of the rule 705 * @return the rule for the given {@code start} symbol 706 * @throws NullPointerException if the given {@code start} symbol is 707 * {@code null} 708 */ 709 public Optional<Rule<T>> rule(final NonTerminal<?> start) { 710 requireNonNull(start); 711 for (var rule : rules) { 712 if (rule.start().name().equals(start.name())) { 713 return Optional.of(rule); 714 } 715 } 716 return Optional.empty(); 717 } 718 719 /** 720 * Maps the values of the terminal symbols from type {@code T} to type 721 * {@code A}. 722 * 723 * @param mapper the mapper function 724 * @param <A> the new value type of the terminal symbols 725 * @return the mapped grammar 726 * @throws NullPointerException if the given mapper is {@code null} 727 */ 728 public <A> Cfg<A> 729 map(final Function<? super Terminal<? extends T>, ? extends A> mapper) { 730 requireNonNull(mapper); 731 732 final var cache = new HashMap<Terminal<T>, Terminal<A>>(); 733 final Function<Terminal<T>, Terminal<A>> mapping = t -> cache 734 .computeIfAbsent(t, t2 -> new Terminal<>(t2.name(), mapper.apply(t2))); 735 736 @SuppressWarnings("unchecked") 737 final List<Rule<A>> rules = rules().stream() 738 .map(rule -> new Rule<>( 739 (NonTerminal<A>)rule.start(), 740 rule.alternatives().stream() 741 .map(expr -> new Expression<>( 742 expr.symbols().stream() 743 .map(sym -> 744 sym instanceof Cfg.Terminal<T> t 745 ? mapping.apply(t) : (Symbol<A>)sym 746 ) 747 .toList() 748 )) 749 .toList() 750 )) 751 .toList(); 752 753 return Cfg.of(rules); 754 } 755 756 @Override 757 public int hashCode() { 758 return rules.hashCode(); 759 } 760 761 @Override 762 public boolean equals(final Object obj) { 763 return obj instanceof Cfg<?> cfg && rules.equals(cfg.rules()); 764 } 765 766 @Override 767 public String toString() { 768 return "Cfg[nonTerminals=%s, terminals=%s, rules=%s, start=%s]" 769 .formatted(nonTerminals, terminals, rules, start); 770 } 771 772 773 /* ************************************************************************* 774 * Factory methods 775 * ************************************************************************/ 776 777 /** 778 * Create a grammar object with the given rules. Duplicated rules are merged 779 * into one rule. The <em>start</em> symbol of the first rule is chosen as 780 * the start symbol of the created CFG 781 * 782 * @param rules the rules the grammar consists of 783 * @throws IllegalArgumentException if the rule list is empty or a rule is 784 * defined more than once, the start symbol points to a missing rule 785 * or the rules uses symbols not defined in the list of 786 * {@link #nonTerminals()} or {@link #terminals()} 787 * @throws NullPointerException if the list of rules is {@code null} 788 */ 789 public static <T> Cfg<T> of(final List<? extends Rule<? extends T>> rules) { 790 @SuppressWarnings("unchecked") 791 final var rules0 = (List<Rule<T>>)List.copyOf(rules); 792 793 // Rules must not be empty. 794 if (rules0.isEmpty()) { 795 throw new IllegalArgumentException( 796 "The list of rules must not be empty." 797 ); 798 } 799 800 final List<Symbol<T>> symbols = rules0.stream() 801 .flatMap(Cfg::ruleSymbols) 802 .distinct() 803 .toList(); 804 805 final List<NonTerminal<T>> nonTerminals = symbols.stream() 806 .map(rule -> rule.at(null)) 807 .distinct() 808 .filter(NonTerminal.class::isInstance) 809 .map(nt -> (NonTerminal<T>)nt) 810 .toList(); 811 812 final List<Terminal<T>> terminals = symbols.stream() 813 .map(rule -> rule.at(null)) 814 .distinct() 815 .filter(Terminal.class::isInstance) 816 .map(nt -> (Terminal<T>)nt) 817 .toList(); 818 819 final Set<Symbol<T>> allSymbols = new HashSet<>(symbols); 820 allSymbols.addAll(nonTerminals); 821 allSymbols.addAll(terminals); 822 823 return new Cfg<>( 824 nonTerminals.stream() 825 .map(s -> (NonTerminal<T>)select(s, allSymbols)) 826 .toList(), 827 terminals.stream() 828 .map(s -> (Terminal<T>)select(s, allSymbols)) 829 .toList(), 830 rules0.stream() 831 .map(r -> rebuild(r, allSymbols)) 832 .toList(), 833 (NonTerminal<T>)select(rules0.getFirst().start(), allSymbols) 834 ); 835 } 836 837 /** 838 * Create a grammar object with the given rules. 839 * 840 * @param rules the rules the grammar consists of 841 * @throws IllegalArgumentException if the list of rules is empty or there 842 * are duplicate rules 843 * @throws NullPointerException if the list of rules is {@code null} 844 */ 845 @SafeVarargs 846 public static <T> Cfg<T> of(final Rule<? extends T>... rules) { 847 return Cfg.of(List.of(rules)); 848 } 849 850 private static <T> Stream<Symbol<T>> ruleSymbols(final Rule<T> rule) { 851 return Stream.concat( 852 Stream.of(rule.start), 853 rule.alternatives.stream() 854 .flatMap(expr -> expr.symbols().stream()) 855 ); 856 } 857 858 private static <T> Rule<T> rebuild( 859 final Rule<T> rule, 860 final Collection<Symbol<T>> symbols 861 ) { 862 return new Rule<>( 863 (NonTerminal<T>)select(rule.start, symbols), 864 rule.alternatives.stream() 865 .map(e -> rebuild(e, symbols)) 866 .toList() 867 ); 868 } 869 870 private static <T> Expression<T> 871 rebuild(final Expression<T> expression, final Collection<Symbol<T>> symbols) { 872 return new Expression<>( 873 expression.symbols.stream() 874 .map(s -> select(s, symbols)) 875 .toList() 876 ); 877 } 878 879 private static <T> Symbol<T> select( 880 final Symbol<T> symbol, 881 final Collection<Symbol<T>> symbols 882 ) { 883 for (var s : symbols) { 884 if (s.equals(symbol)) { 885 return s; 886 } 887 } 888 throw new AssertionError("Symbol not found: " + symbol); 889 } 890 891 @SuppressWarnings("unchecked") 892 static <A, B extends A> Cfg<A> upcast(final Cfg<B> seq) { 893 return (Cfg<A>)seq; 894 } 895 896 897 /* ************************************************************************* 898 * Static factory methods for rule creation: DSL methods. 899 * ************************************************************************/ 900 901 /** 902 * Factory method for creating a terminal symbol with the given 903 * {@code name} and {@code value}. 904 * 905 * @param name the name of the terminal symbol 906 * @param value the value of the terminal symbol 907 * @param <T> the terminal symbol value type 908 * @return a new terminal symbol 909 */ 910 public static <T> Terminal<T> T(final String name, final T value) { 911 return new Terminal<>(name, value); 912 } 913 914 /** 915 * Factory method for creating a terminal symbol with the given 916 * {@code name} and {@code value}. 917 * 918 * @since 8.2 919 * 920 * @param name the name of the terminal symbol 921 * @param value the value of the terminal symbol 922 * @param annotation the terminal annotation 923 * @param <T> the terminal symbol value type 924 * @return a new terminal symbol 925 */ 926 public static <T> Terminal<T> T( 927 final String name, 928 final T value, 929 final Object annotation 930 ) { 931 return new Terminal<>(name, value, annotation); 932 } 933 934 /** 935 * Factory method for creating a terminal symbol with the given 936 * {@code name}. 937 * 938 * @param name the name of the terminal symbol 939 * @return a new terminal symbol 940 */ 941 public static Terminal<String> T(final String name) { 942 return new Terminal<>(name, name); 943 } 944 945 /** 946 * Factory method for creating non-terminal symbols. 947 * 948 * @param name the name of the symbol. 949 * @param <T> the terminal symbol value type 950 * @return a new non-terminal symbol 951 */ 952 public static <T> NonTerminal<T> N(final String name) { 953 return new NonTerminal<>(name); 954 } 955 956 /** 957 * Factory method for creating non-terminal symbols. 958 * 959 * @since 8.2 960 * 961 * @param name the name of the symbol. 962 * @param annotation the annotation of the symbol 963 * @param <T> the terminal symbol value type 964 * @return a new non-terminal symbol 965 */ 966 public static <T> NonTerminal<T> N( 967 final String name, 968 final Object annotation 969 ) { 970 return new NonTerminal<>(name, annotation); 971 } 972 973 /** 974 * Factory method for creating an expression with the given 975 * {@code symbols}. 976 * 977 * @param symbols the expression symbols 978 * @throws IllegalArgumentException if the list of {@code symbols} is 979 * empty 980 * @param <T> the terminal symbol value type 981 * @return a new expression 982 */ 983 @SafeVarargs 984 public static <T> Expression<T> E(final Symbol<T>... symbols) { 985 return new Expression<>(List.of(symbols)); 986 } 987 988 /** 989 * Factory method for creating a new rule. The {@code elements} array doesn't 990 * allow {@link Rule} objects. 991 * 992 * @param name the name of start symbol of the rule 993 * @param elements the list of alternative rule expressions 994 * @throws IllegalArgumentException if the given list of 995 * {@code alternatives} is empty 996 * @throws NullPointerException if one of the arguments is {@code null} 997 * @param <T> the terminal symbol value type 998 * @return a new rule 999 */ 1000 @SafeVarargs 1001 public static <T> Rule<T> R( 1002 final String name, 1003 final Element<T>... elements 1004 ) { 1005 return R(new NonTerminal<>(name), elements); 1006 } 1007 1008 /** 1009 * Factory method for creating a new rule. The {@code elements} array doesn't 1010 * allow {@link Rule} objects. 1011 * 1012 * @param name the name of start symbol of the rule 1013 * @param alternatives the list of alternative rule expressions 1014 * @throws IllegalArgumentException if the given list of 1015 * {@code alternatives} is empty 1016 * @throws NullPointerException if one of the arguments is {@code null} 1017 * @param <T> the terminal symbol value type 1018 * @return a new rule 1019 * @deprecated Will be removed, use {@link #R(String, Element[])} instead 1020 */ 1021 @Deprecated(forRemoval = true, since = "8.2") 1022 @SafeVarargs 1023 public static <T> Rule<T> R( 1024 final String name, 1025 final Expression<T>... alternatives 1026 ) { 1027 return R(new NonTerminal<>(name), alternatives); 1028 } 1029 1030 /** 1031 * Factory method for creating a new rule. The {@code elements} array doesn't 1032 * allow {@link Rule} objects. 1033 * 1034 * @since 8.2 1035 * 1036 * @param start the start symbol of the rule 1037 * @param elements the list of alternative rule expressions 1038 * @throws NullPointerException if one of the arguments is {@code null} 1039 * @param <T> the terminal symbol value type 1040 * @return a new rule 1041 */ 1042 @SafeVarargs 1043 public static <T> Rule<T> R( 1044 final NonTerminal<T> start, 1045 final Element<T>... elements 1046 ) { 1047 return new Rule<>( 1048 start, 1049 Stream.of(elements) 1050 .map(expression -> switch (expression) { 1051 case Expression<T> e -> e; 1052 case Symbol<T> s -> new Expression<>(List.of(s)); 1053 }) 1054 .toList() 1055 ); 1056 } 1057 1058 /** 1059 * Builder class for building {@link Cfg} objects. 1060 * 1061 * @since 8.2 1062 * 1063 * @param <T> the terminal symbol value type 1064 */ 1065 public static final class Builder<T> { 1066 private final List<Rule<T>> rules = new ArrayList<>(); 1067 1068 private Builder() { 1069 } 1070 1071 /** 1072 * Building a new rule for the CFG. 1073 * 1074 * @param start the rule start 1075 * @param builder the rule builder 1076 * @return {@code this} builder for method chaining 1077 */ 1078 public Builder<T> R( 1079 final NonTerminal<T> start, 1080 final Consumer<? super Rule.Builder<T>> builder 1081 ) { 1082 final var rb = new Rule.Builder<T>(start); 1083 builder.accept(rb); 1084 rules.add(rb.build()); 1085 1086 return this; 1087 } 1088 1089 /** 1090 * Building a new rule for the CFG. 1091 * 1092 * @param name the rule name 1093 * @param builder the rule builder 1094 * @return {@code this} builder for method chaining 1095 */ 1096 public Builder<T> R( 1097 final String name, 1098 final Consumer<? super Rule.Builder<T>> builder 1099 ) { 1100 return R(new NonTerminal<>(name), builder); 1101 } 1102 1103 /** 1104 * Creates a new CFG object. 1105 * 1106 * @see #of(List) 1107 * 1108 * @throws IllegalArgumentException if the rule list is empty or a rule 1109 * is defined more than once, the start symbol points to a 1110 * missing rule or the rules uses symbols not defined in the 1111 * list of {@link #nonTerminals()} or {@link #terminals()} 1112 * @return a newly created CFG object 1113 */ 1114 public Cfg<T> build() { 1115 return Cfg.of(rules); 1116 } 1117 1118 } 1119 1120 /** 1121 * Return a new CFG builder. 1122 * 1123 * @since 8.2 1124 * 1125 * @return a new CFG builder 1126 * @param <T> the terminal symbol value type 1127 */ 1128 public static <T> Builder<T> builder() { 1129 return new Builder<>(); 1130 } 1131 1132}