001/* 002 * Java Genetic Algorithm Library (jenetics-9.0.0). 003 * Copyright (c) 2007-2026 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.ext.grammar; 021 022import static java.lang.Character.isDigit; 023import static java.lang.Character.isWhitespace; 024import static io.jenetics.ext.internal.parser.CharSequenceTokenizer.isAlphabetic; 025 026import java.util.stream.Collectors; 027 028import io.jenetics.ext.internal.parser.ParsingException; 029 030/** 031 * This class contains methods for parsing and formatting <em>context-free</em> 032 * grammars in 033 * <a href="https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form">BNF</a> 034 * format. 035 * {@snippet lang="java": 036 * final Cfg<String> grammar = Bnf.parse(""" 037 * <expr> ::= <num> | <var> | '(' <expr> <op> <expr> ')' 038 * <op> ::= + | - | * | / 039 * <var> ::= x | y 040 * <num> ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 041 * """ 042 * ); 043 * } 044 * 045 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 046 * @since 7.1 047 * @version 7.1 048 */ 049public final class Bnf { 050 051 private Bnf() {} 052 053 static boolean isSymbolChar(final int ch) { 054 return switch (ch) { 055 case '<', '>', '|', ':', '=' -> true; 056 default -> false; 057 }; 058 } 059 060 static boolean isStringChar(final char c) { 061 return !isWhitespace(c) && !isSymbolChar(c); 062 } 063 064 static boolean isIdChar(final char c) { 065 return isAlphabetic(c) || isDigit(c) || (c == '-'); 066 } 067 068 /** 069 * Parses the given BNF {@code grammar} string to a {@link Cfg} object. The 070 * following example shows the grammar of a simple arithmetic expression. 071 * 072 * <pre>{@code 073 * <expr> ::= <num> | <var> | '(' <expr> <op> <expr> ')' 074 * <op> ::= + | - | * | / 075 * <var> ::= x | y 076 * <num> ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 077 * }</pre> 078 * 079 * @param grammar the BNF {@code grammar} string 080 * @return the parsed {@code BNF} object 081 * @throws ParsingException if the given <em>grammar</em> is invalid 082 * @throws NullPointerException it the given {@code grammar} string is 083 * {@code null} 084 */ 085 public static Cfg<String> parse(final CharSequence grammar) { 086 final var tokenizer = new BnfTokenizer(grammar); 087 final var parser = new BnfParser(tokenizer); 088 089 return parser.parse(); 090 } 091 092 /** 093 * Formats the given <em>CFG</em> as BNF grammar string. 094 * 095 * @param grammar the CFG to format as BNF 096 * @return the BNF formatted grammar string 097 * @throws NullPointerException if the give {@code grammar} is {@code null} 098 */ 099 public static String format(final Cfg<?> grammar) { 100 return grammar.rules().stream() 101 .map(Bnf::format) 102 .collect(Collectors.joining("\n")); 103 } 104 105 private static String format(final Cfg.Rule<?> rule) { 106 return String.format( 107 "%s ::= %s", 108 format(rule.start()), 109 rule.alternatives().stream() 110 .map(Bnf::format) 111 .collect(Collectors.joining("\n | ")) 112 ); 113 } 114 115 private static String format(final Cfg.Expression<?> expr) { 116 return expr.symbols().stream() 117 .map(Bnf::format) 118 .collect(Collectors.joining(" ")); 119 } 120 121 private static String format(final Cfg.Symbol<?> symbol) { 122 return switch (symbol) { 123 case Cfg.NonTerminal<?> nt -> String.format("<%s>", nt.name()); 124 case Cfg.Terminal<?> t -> "'" + t.name() 125 .replace("\\", "\\\\") 126 .replace("'", "\\'") + "'"; 127 }; 128 } 129 130}