001/*
002 * Java Genetic Algorithm Library (jenetics-7.2.0).
003 * Copyright (c) 2007-2023 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.ext.grammar;
021
022import static java.lang.Character.isDigit;
023import static java.lang.Character.isWhitespace;
024import static io.jenetics.ext.internal.parser.CharSequenceTokenizer.isAlphabetic;
025
026import java.util.stream.Collectors;
027
028import io.jenetics.ext.internal.parser.ParsingException;
029
030/**
031 * This class contains methods for parsing and formatting <em>context-free</em>
032 * grammars in
033 * <a href="https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form">BNF</a>
034 * format.
035 * <pre>{@code
036 * final Cfg<String> grammar = Bnf.parse("""
037 *     <expr> ::= <num> | <var> | '(' <expr> <op> <expr> ')'
038 *     <op>   ::= + | - | * | /
039 *     <var>  ::= x | y
040 *     <num>  ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
041 *     """
042 * );
043 * }</pre>
044 *
045 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
046 * @since 7.1
047 * @version 7.1
048 */
049public final class Bnf {
050        private Bnf() {}
051
052        static boolean isSymbolChar(final int ch) {
053                return switch (ch) {
054                        case '<', '>', '|', ':', '=' -> true;
055                        default -> false;
056                };
057        }
058
059        static boolean isStringChar(final char c) {
060                return !isWhitespace(c) && !isSymbolChar(c);
061        }
062
063        static boolean isIdChar(final char c) {
064                return isAlphabetic(c) || isDigit(c) || (c == '-');
065        }
066
067        /**
068         * Parses the given BNF {@code grammar} string to a {@link Cfg} object. The
069         * following example shows the grammar of a simple arithmetic expression.
070         *
071         * <pre>{@code
072         * <expr> ::= <num> | <var> | '(' <expr> <op> <expr> ')'
073         * <op>   ::= + | - | * | /
074         * <var>  ::= x | y
075         * <num>  ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
076         * }</pre>
077         *
078         * @param grammar the BNF {@code grammar} string
079         * @return the parsed {@code BNF} object
080         * @throws ParsingException if the given <em>grammar</em> is invalid
081         * @throws NullPointerException it the given {@code grammar} string is
082         *         {@code null}
083         */
084        public static Cfg<String> parse(final String grammar) {
085                final var tokenizer = new BnfTokenizer(grammar);
086                final var parser = new BnfParser(tokenizer);
087
088                return parser.parse();
089        }
090
091        /**
092         * Formats the given <em>CFG</em> as BNF grammar string.
093         *
094         * @param grammar the CFG to format as BNF
095         * @return the BNF formatted grammar string
096         * @throws NullPointerException if the give {@code grammar} is {@code null}
097         */
098        public static String format(final Cfg<?> grammar) {
099                return grammar.rules().stream()
100                        .map(Bnf::format)
101                        .collect(Collectors.joining("\n"));
102        }
103
104        private static String format(final Cfg.Rule<?> rule) {
105                return String.format(
106                        "%s ::= %s",
107                        format(rule.start()),
108                        rule.alternatives().stream()
109                                .map(Bnf::format)
110                                .collect(Collectors.joining("\n    | "))
111                );
112        }
113
114        private static String format(final Cfg.Expression<?> expr) {
115                return expr.symbols().stream()
116                        .map(Bnf::format)
117                        .collect(Collectors.joining(" "));
118        }
119
120        private static String format(final Cfg.Symbol<?> symbol) {
121                if (symbol instanceof Cfg.NonTerminal<?> nt) {
122                        return String.format("<%s>", nt.name());
123                } else if (symbol instanceof Cfg.Terminal<?> t) {
124                        return "'" + t.name()
125                                .replace("\\", "\\\\")
126                                .replace("'", "\\'") + "'";
127                }
128                throw new AssertionError();
129        }
130
131}