001/*
002 * Java Genetic Algorithm Library (jenetics-8.1.0).
003 * Copyright (c) 2007-2024 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.util;
021
022import static java.util.Objects.requireNonNull;
023
024import java.io.Serial;
025import java.io.Serializable;
026import java.util.Arrays;
027import java.util.regex.PatternSyntaxException;
028import java.util.stream.Collector;
029
030import io.jenetics.internal.collection.Array;
031import io.jenetics.internal.collection.ArrayISeq;
032import io.jenetics.internal.collection.CharStore;
033
034/**
035 * This class is used for holding the valid characters of an
036 * {@link io.jenetics.CharacterGene}. It is not a character sequence in the
037 * classical sense. The characters of this sequence are sorted and doesn't
038 * contain duplicate values, like a set.
039 * {@snippet lang="java":
040 * final CharSeq cs1 = new CharSeq("abcdeaafg");
041 * final CharSeq cs2 = new CharSeq("gfedcbabb");
042 * assert(cs1.equals(cs2));
043 * }
044 *
045 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
046 * @since 1.0
047 * @version 2.0
048 */
049public final class CharSeq
050        extends CharSeqBase
051        implements
052                CharSequence,
053                ISeq<Character>,
054                Comparable<CharSeq>,
055                Serializable
056{
057        @Serial
058        private static final long serialVersionUID = 2L;
059
060        /**
061         * Create a new (distinct) CharSeq from the given {@code characters}. The
062         * given {@link CharSequence} is sorted and duplicate values are removed
063         *
064         * @see #CharSeq(CharSequence)
065         *
066         * @param characters the characters.
067         * @throws NullPointerException if the {@code characters} are {@code null}.
068         */
069        public CharSeq(final char[] characters) {
070                super(distinct(characters.clone()));
071        }
072
073        /**
074         * Create a new (distinct) CharSeq from the given {@code characters}. The
075         * given {@link CharSequence} is sorted and duplicate values are removed.
076         *
077         * @param characters the characters.
078         * @throws NullPointerException if the {@code characters} are {@code null}.
079         */
080        public CharSeq(final CharSequence characters) {
081                super(distinct(toCharArray(characters)));
082        }
083
084        private static char[] toCharArray(final CharSequence characters) {
085                requireNonNull(characters, "Characters");
086
087                final char[] chars = new char[characters.length()];
088                for (int i = chars.length; --i >= 0;) {
089                        chars[i] = characters.charAt(i);
090                }
091
092                return chars;
093        }
094
095        private static char[] distinct(final char[] chars) {
096                Arrays.sort(chars);
097
098                int j = 0;
099                for (int i = 1; i < chars.length; ++i) {
100                        if (chars[j] != chars[i]) {
101                                chars[++j] = chars[i];
102                        }
103                }
104
105                final int size = Math.min(chars.length, j + 1);
106                final char[] array = new char[size];
107                System.arraycopy(chars, 0, array, 0, size);
108                return array;
109        }
110
111        @Override
112        public boolean contains(final Object object) {
113                return object instanceof Character && contains((Character)object);
114        }
115
116        /**
117         * Test whether this character set contains the given character {@code c}.
118         *
119         * @param c the character to test.
120         * @return {@code true} if this character set contains the given character,
121         *          {@code false} otherwise.
122         * @throws NullPointerException if the given character {@code c} is
123         *          {@code null}.
124         */
125        public boolean contains(final Character c) {
126                return contains(c.charValue());
127        }
128
129        /**
130         * Test whether this character set contains the given character {@code c}.
131         *
132         * @param c the character to test.
133         * @return {@code true} if this character set contains the given character,
134         *          {@code false} otherwise.
135         */
136        public boolean contains(final char c) {
137                return Arrays.binarySearch(array, c) >= 0;
138        }
139
140        @Override
141        public char charAt(final int index) {
142                return array[index];
143        }
144
145        @Override
146        public int length() {
147                return array.length;
148        }
149
150        @Override
151        public CharSeq subSequence(final int start, final int end) {
152                return new CharSeq(new String(array, start, end - start));
153        }
154
155        /**
156         * Test whether this character set is empty.
157         *
158         * @return {@code true} if this character set is empty, {@code false}
159         *          otherwise.
160         */
161        public boolean isEmpty() {
162                return array.length == 0;
163        }
164
165        @Override
166        public int hashCode() {
167                return 17 + 31*Arrays.hashCode(array);
168        }
169
170        @Override
171        public boolean equals(final Object obj) {
172                return obj == this ||
173                        obj instanceof CharSeq other &&
174                        Arrays.equals(other.array, array);
175        }
176
177        @Override
178        public int compareTo(final CharSeq set) {
179                int result = 0;
180
181                final int n = Math.min(array.length, set.array.length);
182                for (int i = 0; i < n && result == 0; ++i) {
183                        result = array[i] - set.array[i];
184                }
185                if (result == 0) {
186                        result = array.length - set.array.length;
187                }
188
189                return result;
190        }
191
192        @Override
193        public String toString() {
194                return new String(array);
195        }
196
197        /**
198         * Expands the character range for the given {@code pattern}. E.g.
199         * {@code a-zA-Z0-1} will return a string containing all upper and lower
200         * case characters (from a to z), and all digits form 0 to 9.
201         *
202         * @param pattern the {@code pattern} to expand.
203         * @return the expanded pattern.
204         * @throws PatternSyntaxException if the pattern could not be expanded.
205         * @throws NullPointerException if the pattern is {@code null}.
206         */
207        public static String expand(final CharSequence pattern) {
208                requireNonNull(pattern, "Pattern");
209                final StringBuilder out = new StringBuilder();
210
211                for (int i = 0, n = pattern.length(); i < n; ++i) {
212                        if (pattern.charAt(i) == '\\') {
213                                ++i;
214                                if (i < pattern.length()) {
215                                        out.append(pattern.charAt(i));
216                                }
217                        } else if (pattern.charAt(i) == '-') {
218                                if (i <= 0 || i >= (pattern.length() - 1)) {
219                                        throw new PatternSyntaxException(
220                                                "Dangling range operator '-'", pattern.toString(),
221                                                pattern.length() - 1
222                                        );
223                                }
224
225                                final String range = expand(
226                                        pattern.charAt(i - 1),
227                                        pattern.charAt(i + 1)
228                                );
229                                out.append(range);
230
231                                ++i;
232                        } else if (i + 1 == n || pattern.charAt(i + 1) != '-') {
233                                out.append(pattern.charAt(i));
234                        }
235                }
236
237                return out.toString();
238        }
239
240        /**
241         * Expands the characters between {@code a} and {@code b}.
242         *
243         * @param a the start character.
244         * @param b the stop character.
245         * @return the expanded characters.
246         */
247        public static String expand(final char a, final char b) {
248                final StringBuilder out = new StringBuilder();
249
250                if (a < b) {
251                        char c = a;
252                        while (c <= b) {
253                                out.append(c);
254                                c = (char) (c + 1);
255                        }
256                } else if (a > b) {
257                        char c = a;
258                        while (c >= b) {
259                                out.append(c);
260                                c = (char)(c - 1);
261                        }
262                }
263
264                return out.toString();
265        }
266
267        /**
268         * Expands the character range for the given {@code pattern}. E.g.
269         * {@code a-zA-Z0-1} will return a string containing all upper and lower
270         * case characters (from a to z), and all digits form 0 to 9.
271         *
272         * @see #expand(CharSequence)
273         *
274         * @param pattern the {@code pattern} to expand.
275         * @return the expanded pattern.
276         * @throws PatternSyntaxException if the pattern could not be expanded.
277         * @throws NullPointerException if the pattern is {@code null}.
278         */
279        public static CharSeq of(final CharSequence pattern) {
280                return new CharSeq(expand(pattern));
281        }
282
283        /**
284         * Expands the characters between {@code a} and {@code b}.
285         *
286         * @see #expand(char, char)
287         *
288         * @param a the start character.
289         * @param b the stop character.
290         * @return the expanded characters.
291         */
292        public static CharSeq of(final char a, final char b) {
293                return new CharSeq(expand(a, b));
294        }
295
296        /**
297         * Helper method for creating a sequence of characters from the given
298         * {@code CharSequence}. The returned sequence will contain all characters
299         * in the original order.
300         *
301         * @param chars the char sequence to convert.
302         * @return a sequence which will contain all given chars in the original
303         *         order.
304         */
305        public static ISeq<Character> toISeq(final CharSequence chars) {
306                final MSeq<Character> seq = MSeq.ofLength(chars.length());
307                for (int i = 0; i < chars.length(); ++i) {
308                        seq.set(i, chars.charAt(i));
309                }
310
311                return seq.toISeq();
312        }
313
314        public static Collector<Character, ?, CharSeq> toCharSeq() {
315                return Collector.of(
316                        StringBuilder::new,
317                        StringBuilder::append,
318                        (a, b) -> {a.append(b); return a;},
319                        CharSeq::new
320                );
321        }
322
323}
324
325abstract class CharSeqBase extends ArrayISeq<Character> {
326
327        @Serial
328        private static final long serialVersionUID = 1L;
329
330        final char[] array;
331
332        protected CharSeqBase(final char[] characters) {
333                super(Array.of(CharStore.of(characters)).seal());
334                array = characters;
335        }
336
337}