001 /*
002 * Java Genetic Algorithm Library (jenetics-8.0.0).
003 * Copyright (c) 2007-2024 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.util;
021
022 import static java.util.Objects.requireNonNull;
023
024 import java.io.Serial;
025 import java.io.Serializable;
026 import java.util.Arrays;
027 import java.util.regex.PatternSyntaxException;
028 import java.util.stream.Collector;
029
030 import io.jenetics.internal.collection.Array;
031 import io.jenetics.internal.collection.ArrayISeq;
032 import io.jenetics.internal.collection.CharStore;
033
034 /**
035 * This class is used for holding the valid characters of an
036 * {@link io.jenetics.CharacterGene}. It is not a character sequence in the
037 * classical sense. The characters of this sequence are sorted and doesn't
038 * contain duplicate values, like a set.
039 *
040 * {@snippet lang="java":
041 * final CharSeq cs1 = new CharSeq("abcdeaafg");
042 * final CharSeq cs2 = new CharSeq("gfedcbabb");
043 * assert(cs1.equals(cs2));
044 * }
045 *
046 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
047 * @since 1.0
048 * @version 2.0
049 */
050 public final class CharSeq
051 extends CharSeqBase
052 implements
053 CharSequence,
054 ISeq<Character>,
055 Comparable<CharSeq>,
056 Serializable
057 {
058 @Serial
059 private static final long serialVersionUID = 2L;
060
061 /**
062 * Create a new (distinct) CharSeq from the given {@code characters}. The
063 * given {@link CharSequence} is sorted and duplicate values are removed
064 *
065 * @see #CharSeq(CharSequence)
066 *
067 * @param characters the characters.
068 * @throws NullPointerException if the {@code characters} are {@code null}.
069 */
070 public CharSeq(final char[] characters) {
071 super(distinct(characters.clone()));
072 }
073
074 /**
075 * Create a new (distinct) CharSeq from the given {@code characters}. The
076 * given {@link CharSequence} is sorted and duplicate values are removed.
077 *
078 * @param characters the characters.
079 * @throws NullPointerException if the {@code characters} are {@code null}.
080 */
081 public CharSeq(final CharSequence characters) {
082 super(distinct(toCharArray(characters)));
083 }
084
085 private static char[] toCharArray(final CharSequence characters) {
086 requireNonNull(characters, "Characters");
087
088 final char[] chars = new char[characters.length()];
089 for (int i = chars.length; --i >= 0;) {
090 chars[i] = characters.charAt(i);
091 }
092
093 return chars;
094 }
095
096 private static char[] distinct(final char[] chars) {
097 Arrays.sort(chars);
098
099 int j = 0;
100 for (int i = 1; i < chars.length; ++i) {
101 if (chars[j] != chars[i]) {
102 chars[++j] = chars[i];
103 }
104 }
105
106 final int size = Math.min(chars.length, j + 1);
107 final char[] array = new char[size];
108 System.arraycopy(chars, 0, array, 0, size);
109 return array;
110 }
111
112 @Override
113 public boolean contains(final Object object) {
114 return object instanceof Character && contains((Character)object);
115 }
116
117 /**
118 * Test whether this character set contains the given character {@code c}.
119 *
120 * @param c the character to test.
121 * @return {@code true} if this character set contains the given character,
122 * {@code false} otherwise.
123 * @throws NullPointerException if the given character {@code c} is
124 * {@code null}.
125 */
126 public boolean contains(final Character c) {
127 return contains(c.charValue());
128 }
129
130 /**
131 * Test whether this character set contains the given character {@code c}.
132 *
133 * @param c the character to test.
134 * @return {@code true} if this character set contains the given character,
135 * {@code false} otherwise.
136 */
137 public boolean contains(final char c) {
138 return Arrays.binarySearch(array, c) >= 0;
139 }
140
141 @Override
142 public char charAt(final int index) {
143 return array[index];
144 }
145
146 @Override
147 public int length() {
148 return array.length;
149 }
150
151 @Override
152 public CharSeq subSequence(final int start, final int end) {
153 return new CharSeq(new String(array, start, end - start));
154 }
155
156 /**
157 * Test whether this character set is empty.
158 *
159 * @return {@code true} if this character set is empty, {@code false}
160 * otherwise.
161 */
162 public boolean isEmpty() {
163 return array.length == 0;
164 }
165
166 @Override
167 public int hashCode() {
168 return 17 + 31*Arrays.hashCode(array);
169 }
170
171 @Override
172 public boolean equals(final Object obj) {
173 return obj == this ||
174 obj instanceof CharSeq other &&
175 Arrays.equals(other.array, array);
176 }
177
178 @Override
179 public int compareTo(final CharSeq set) {
180 int result = 0;
181
182 final int n = Math.min(array.length, set.array.length);
183 for (int i = 0; i < n && result == 0; ++i) {
184 result = array[i] - set.array[i];
185 }
186 if (result == 0) {
187 result = array.length - set.array.length;
188 }
189
190 return result;
191 }
192
193 @Override
194 public String toString() {
195 return new String(array);
196 }
197
198 /**
199 * Expands the character range for the given {@code pattern}. E.g.
200 * {@code a-zA-Z0-1} will return a string containing all upper and lower
201 * case characters (from a to z), and all digits form 0 to 9.
202 *
203 * @param pattern the {@code pattern} to expand.
204 * @return the expanded pattern.
205 * @throws PatternSyntaxException if the pattern could not be expanded.
206 * @throws NullPointerException if the pattern is {@code null}.
207 */
208 public static String expand(final CharSequence pattern) {
209 requireNonNull(pattern, "Pattern");
210 final StringBuilder out = new StringBuilder();
211
212 for (int i = 0, n = pattern.length(); i < n; ++i) {
213 if (pattern.charAt(i) == '\\') {
214 ++i;
215 if (i < pattern.length()) {
216 out.append(pattern.charAt(i));
217 }
218 } else if (pattern.charAt(i) == '-') {
219 if (i <= 0 || i >= (pattern.length() - 1)) {
220 throw new PatternSyntaxException(
221 "Dangling range operator '-'", pattern.toString(),
222 pattern.length() - 1
223 );
224 }
225
226 final String range = expand(
227 pattern.charAt(i - 1),
228 pattern.charAt(i + 1)
229 );
230 out.append(range);
231
232 ++i;
233 } else if (i + 1 == n || pattern.charAt(i + 1) != '-') {
234 out.append(pattern.charAt(i));
235 }
236 }
237
238 return out.toString();
239 }
240
241 /**
242 * Expands the characters between {@code a} and {@code b}.
243 *
244 * @param a the start character.
245 * @param b the stop character.
246 * @return the expanded characters.
247 */
248 public static String expand(final char a, final char b) {
249 final StringBuilder out = new StringBuilder();
250
251 if (a < b) {
252 char c = a;
253 while (c <= b) {
254 out.append(c);
255 c = (char) (c + 1);
256 }
257 } else if (a > b) {
258 char c = a;
259 while (c >= b) {
260 out.append(c);
261 c = (char)(c - 1);
262 }
263 }
264
265 return out.toString();
266 }
267
268 /**
269 * Expands the character range for the given {@code pattern}. E.g.
270 * {@code a-zA-Z0-1} will return a string containing all upper and lower
271 * case characters (from a to z), and all digits form 0 to 9.
272 *
273 * @see #expand(CharSequence)
274 *
275 * @param pattern the {@code pattern} to expand.
276 * @return the expanded pattern.
277 * @throws PatternSyntaxException if the pattern could not be expanded.
278 * @throws NullPointerException if the pattern is {@code null}.
279 */
280 public static CharSeq of(final CharSequence pattern) {
281 return new CharSeq(expand(pattern));
282 }
283
284 /**
285 * Expands the characters between {@code a} and {@code b}.
286 *
287 * @see #expand(char, char)
288 *
289 * @param a the start character.
290 * @param b the stop character.
291 * @return the expanded characters.
292 */
293 public static CharSeq of(final char a, final char b) {
294 return new CharSeq(expand(a, b));
295 }
296
297 /**
298 * Helper method for creating a sequence of characters from the given
299 * {@code CharSequence}. The returned sequence will contain all characters
300 * in the original order.
301 *
302 * @param chars the char sequence to convert.
303 * @return a sequence which will contain all given chars in the original
304 * order.
305 */
306 public static ISeq<Character> toISeq(final CharSequence chars) {
307 final MSeq<Character> seq = MSeq.ofLength(chars.length());
308 for (int i = 0; i < chars.length(); ++i) {
309 seq.set(i, chars.charAt(i));
310 }
311
312 return seq.toISeq();
313 }
314
315 public static Collector<Character, ?, CharSeq> toCharSeq() {
316 return Collector.of(
317 StringBuilder::new,
318 StringBuilder::append,
319 (a, b) -> {a.append(b); return a;},
320 CharSeq::new
321 );
322 }
323
324 }
325
326 abstract class CharSeqBase extends ArrayISeq<Character> {
327
328 @Serial
329 private static final long serialVersionUID = 1L;
330
331 final char[] array;
332
333 protected CharSeqBase(final char[] characters) {
334 super(Array.of(CharStore.of(characters)).seal());
335 array = characters;
336 }
337
338 }
|