001/*
002 * Java Genetic Algorithm Library (jenetics-7.1.0).
003 * Copyright (c) 2007-2022 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.util;
021
022import static java.util.Objects.requireNonNull;
023
024import java.io.Serial;
025import java.io.Serializable;
026import java.util.Arrays;
027import java.util.regex.PatternSyntaxException;
028import java.util.stream.Collector;
029
030import io.jenetics.internal.collection.Array;
031import io.jenetics.internal.collection.ArrayISeq;
032import io.jenetics.internal.collection.CharStore;
033
034/**
035 * This class is used for holding the valid characters of an
036 * {@link io.jenetics.CharacterGene}. It is not a character sequence in the
037 * classical sense. The characters of this sequence are sorted and doesn't
038 * contain duplicate values, like a set.
039 *
040 * <pre>{@code
041 * final CharSeq cs1 = new CharSeq("abcdeaafg");
042 * final CharSeq cs2 = new CharSeq("gfedcbabb");
043 * assert(cs1.equals(cs2));
044 * }</pre>
045 *
046 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
047 * @since 1.0
048 * @version 2.0
049 */
050public final class CharSeq
051        extends CharSeqBase
052        implements
053                CharSequence,
054                ISeq<Character>,
055                Comparable<CharSeq>,
056                Serializable
057{
058        @Serial
059        private static final long serialVersionUID = 2L;
060
061        /**
062         * Create a new (distinct) CharSeq from the given {@code characters}. The
063         * given {@link CharSequence} is sorted and duplicate values are removed
064         *
065         * @see #CharSeq(CharSequence)
066         *
067         * @param characters the characters.
068         * @throws NullPointerException if the {@code characters} are {@code null}.
069         */
070        public CharSeq(final char[] characters) {
071                super(distinct(characters.clone()));
072        }
073
074        /**
075         * Create a new (distinct) CharSeq from the given {@code characters}. The
076         * given {@link CharSequence} is sorted and duplicate values are removed.
077         *
078         * @param characters the characters.
079         * @throws NullPointerException if the {@code characters} are {@code null}.
080         */
081        public CharSeq(final CharSequence characters) {
082                super(distinct(toCharArray(characters)));
083        }
084
085        private static char[] toCharArray(final CharSequence characters) {
086                requireNonNull(characters, "Characters");
087
088                final char[] chars = new char[characters.length()];
089                for (int i = chars.length; --i >= 0;) {
090                        chars[i] = characters.charAt(i);
091                }
092
093                return chars;
094        }
095
096        private static char[] distinct(final char[] chars) {
097                Arrays.sort(chars);
098
099                int j = 0;
100                for (int i = 1; i < chars.length; ++i) {
101                        if (chars[j] != chars[i]) {
102                                chars[++j] = chars[i];
103                        }
104                }
105
106                final int size = Math.min(chars.length, j + 1);
107                final char[] array = new char[size];
108                System.arraycopy(chars, 0, array, 0, size);
109                return array;
110        }
111
112        @Override
113        public boolean contains(final Object object) {
114                return object instanceof Character && contains((Character)object);
115        }
116
117        /**
118         * Test whether this character set contains the given character {@code c}.
119         *
120         * @param c the character to test.
121         * @return {@code true} if this character set contains the given character,
122         *          {@code false} otherwise.
123         * @throws NullPointerException if the given character {@code c} is
124         *          {@code null}.
125         */
126        public boolean contains(final Character c) {
127                return contains(c.charValue());
128        }
129
130        /**
131         * Test whether this character set contains the given character {@code c}.
132         *
133         * @param c the character to test.
134         * @return {@code true} if this character set contains the given character,
135         *          {@code false} otherwise.
136         */
137        public boolean contains(final char c) {
138                return Arrays.binarySearch(array, c) >= 0;
139        }
140
141        @Override
142        public char charAt(final int index) {
143                return array[index];
144        }
145
146        @Override
147        public int length() {
148                return array.length;
149        }
150
151        @Override
152        public CharSeq subSequence(final int start, final int end) {
153                return new CharSeq(new String(array, start, end - start));
154        }
155
156        /**
157         * Test whether this character set is empty.
158         *
159         * @return {@code true} if this character set is empty, {@code false}
160         *          otherwise.
161         */
162        public boolean isEmpty() {
163                return array.length == 0;
164        }
165
166        @Override
167        public int hashCode() {
168                return 17 + 31*Arrays.hashCode(array);
169        }
170
171        @Override
172        public boolean equals(final Object obj) {
173                return obj == this ||
174                        obj instanceof CharSeq other &&
175                        Arrays.equals(other.array, array);
176        }
177
178        @Override
179        public int compareTo(final CharSeq set) {
180                int result = 0;
181
182                final int n = Math.min(array.length, set.array.length);
183                for (int i = 0; i < n && result == 0; ++i) {
184                        result = array[i] - set.array[i];
185                }
186                if (result == 0) {
187                        result = array.length - set.array.length;
188                }
189
190                return result;
191        }
192
193        @Override
194        public String toString() {
195                return new String(array);
196        }
197
198        /**
199         * Expands the character range for the given {@code pattern}. E.g
200         * {@code a-zA-Z0-1} will return a string containing all upper and lower
201         * case characters (from a to z) and all digits form 0 to 9.
202         *
203         * @param pattern the {@code pattern} to expand.
204         * @return the expanded pattern.
205         * @throws PatternSyntaxException if the pattern could not be expanded.
206         * @throws NullPointerException if the pattern is {@code null}.
207         */
208        public static String expand(final CharSequence pattern) {
209                requireNonNull(pattern, "Pattern");
210                final StringBuilder out = new StringBuilder();
211
212                for (int i = 0, n = pattern.length(); i < n; ++i) {
213                        if (pattern.charAt(i) == '\\') {
214                                ++i;
215                                if (i < pattern.length()) {
216                                        out.append(pattern.charAt(i));
217                                }
218                        } else if (pattern.charAt(i) == '-') {
219                                if (i <= 0 || i >= (pattern.length() - 1)) {
220                                        throw new PatternSyntaxException(
221                                                "Dangling range operator '-'", pattern.toString(),
222                                                pattern.length() - 1
223                                        );
224                                }
225
226                                final String range = expand(
227                                        pattern.charAt(i - 1),
228                                        pattern.charAt(i + 1)
229                                );
230                                out.append(range);
231
232                                ++i;
233                        } else if (i + 1 == n || pattern.charAt(i + 1) != '-') {
234                                out.append(pattern.charAt(i));
235                        }
236                }
237
238                return out.toString();
239        }
240
241        /**
242         * Expands the characters between {@code a} and {@code b}.
243         *
244         * @param a the start character.
245         * @param b the stop character.
246         * @return the expanded characters.
247         */
248        public static String expand(final char a, final char b) {
249                final StringBuilder out = new StringBuilder();
250
251                if (a < b) {
252                        char c = a;
253                        while (c <= b) {
254                                out.append(c);
255                                c = (char) (c + 1);
256                        }
257                } else if (a > b) {
258                        char c = a;
259                        while (c >= b) {
260                                out.append(c);
261                                c = (char)(c - 1);
262                        }
263                }
264
265                return out.toString();
266        }
267
268        /**
269         * Expands the character range for the given {@code pattern}. E.g.
270         * {@code a-zA-Z0-1} will return a string containing all upper and lower
271         * case characters (from a to z) and all digits form 0 to 9.
272         *
273         * @see #expand(CharSequence)
274         *
275         * @param pattern the {@code pattern} to expand.
276         * @return the expanded pattern.
277         * @throws PatternSyntaxException if the pattern could not be expanded.
278         * @throws NullPointerException if the pattern is {@code null}.
279         */
280        public static CharSeq of(final CharSequence pattern) {
281                return new CharSeq(expand(pattern));
282        }
283
284        /**
285         * Expands the characters between {@code a} and {@code b}.
286         *
287         * @see #expand(char, char)
288         *
289         * @param a the start character.
290         * @param b the stop character.
291         * @return the expanded characters.
292         */
293        public static CharSeq of(final char a, final char b) {
294                return new CharSeq(expand(a, b));
295        }
296
297        /**
298         * Helper method for creating a sequence of characters from the given
299         * {@code CharSequence}. The returned sequence will contain all characters
300         * in the original order.
301         *
302         * @param chars the char sequence to convert.
303         * @return a sequence which will contain all given chars in the original
304         *         order.
305         */
306        public static ISeq<Character> toISeq(final CharSequence chars) {
307                final MSeq<Character> seq = MSeq.ofLength(chars.length());
308                for (int i = 0; i < chars.length(); ++i) {
309                        seq.set(i, chars.charAt(i));
310                }
311
312                return seq.toISeq();
313        }
314
315        public static Collector<Character, ?, CharSeq> toCharSeq() {
316                return Collector.of(
317                        StringBuilder::new,
318                        StringBuilder::append,
319                        (a, b) -> {a.append(b); return a;},
320                        CharSeq::new
321                );
322        }
323
324}
325
326abstract class CharSeqBase extends ArrayISeq<Character> {
327
328        @Serial
329        private static final long serialVersionUID = 1L;
330
331        final char[] array;
332
333        protected CharSeqBase(final char[] characters) {
334                super(Array.of(CharStore.of(characters)).seal());
335                array = characters;
336        }
337
338}