001/* 002 * Java Genetic Algorithm Library (jenetics-7.2.0). 003 * Copyright (c) 2007-2023 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.ext.grammar; 021 022import static java.lang.Character.isDigit; 023import static java.lang.Character.isWhitespace; 024import static io.jenetics.ext.internal.parser.CharSequenceTokenizer.isAlphabetic; 025 026import java.util.stream.Collectors; 027 028import io.jenetics.ext.internal.parser.ParsingException; 029 030/** 031 * This class contains methods for parsing and formatting <em>context-free</em> 032 * grammars in 033 * <a href="https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form">BNF</a> 034 * format. 035 * <pre>{@code 036 * final Cfg<String> grammar = Bnf.parse(""" 037 * <expr> ::= <num> | <var> | '(' <expr> <op> <expr> ')' 038 * <op> ::= + | - | * | / 039 * <var> ::= x | y 040 * <num> ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 041 * """ 042 * ); 043 * }</pre> 044 * 045 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 046 * @since 7.1 047 * @version 7.1 048 */ 049public final class Bnf { 050 private Bnf() {} 051 052 static boolean isSymbolChar(final int ch) { 053 return switch (ch) { 054 case '<', '>', '|', ':', '=' -> true; 055 default -> false; 056 }; 057 } 058 059 static boolean isStringChar(final char c) { 060 return !isWhitespace(c) && !isSymbolChar(c); 061 } 062 063 static boolean isIdChar(final char c) { 064 return isAlphabetic(c) || isDigit(c) || (c == '-'); 065 } 066 067 /** 068 * Parses the given BNF {@code grammar} string to a {@link Cfg} object. The 069 * following example shows the grammar of a simple arithmetic expression. 070 * 071 * <pre>{@code 072 * <expr> ::= <num> | <var> | '(' <expr> <op> <expr> ')' 073 * <op> ::= + | - | * | / 074 * <var> ::= x | y 075 * <num> ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 076 * }</pre> 077 * 078 * @param grammar the BNF {@code grammar} string 079 * @return the parsed {@code BNF} object 080 * @throws ParsingException if the given <em>grammar</em> is invalid 081 * @throws NullPointerException it the given {@code grammar} string is 082 * {@code null} 083 */ 084 public static Cfg<String> parse(final String grammar) { 085 final var tokenizer = new BnfTokenizer(grammar); 086 final var parser = new BnfParser(tokenizer); 087 088 return parser.parse(); 089 } 090 091 /** 092 * Formats the given <em>CFG</em> as BNF grammar string. 093 * 094 * @param grammar the CFG to format as BNF 095 * @return the BNF formatted grammar string 096 * @throws NullPointerException if the give {@code grammar} is {@code null} 097 */ 098 public static String format(final Cfg<?> grammar) { 099 return grammar.rules().stream() 100 .map(Bnf::format) 101 .collect(Collectors.joining("\n")); 102 } 103 104 private static String format(final Cfg.Rule<?> rule) { 105 return String.format( 106 "%s ::= %s", 107 format(rule.start()), 108 rule.alternatives().stream() 109 .map(Bnf::format) 110 .collect(Collectors.joining("\n | ")) 111 ); 112 } 113 114 private static String format(final Cfg.Expression<?> expr) { 115 return expr.symbols().stream() 116 .map(Bnf::format) 117 .collect(Collectors.joining(" ")); 118 } 119 120 private static String format(final Cfg.Symbol<?> symbol) { 121 if (symbol instanceof Cfg.NonTerminal<?> nt) { 122 return String.format("<%s>", nt.name()); 123 } else if (symbol instanceof Cfg.Terminal<?> t) { 124 return "'" + t.name() 125 .replace("\\", "\\\\") 126 .replace("'", "\\'") + "'"; 127 } 128 throw new AssertionError(); 129 } 130 131}