001 /*
002 * Java Genetic Algorithm Library (jenetics-4.3.0).
003 * Copyright (c) 2007-2018 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.util;
021
022 import static java.util.Objects.requireNonNull;
023
024 import java.io.Serializable;
025 import java.util.Arrays;
026 import java.util.regex.PatternSyntaxException;
027 import java.util.stream.Collector;
028
029 import io.jenetics.internal.collection.Array;
030 import io.jenetics.internal.collection.ArrayISeq;
031 import io.jenetics.internal.collection.CharStore;
032
033 /**
034 * This class is used for holding the valid characters of an
035 * {@link io.jenetics.CharacterGene}. It is not a character sequence in the
036 * classical sense. The characters of this sequence are sorted and doesn't
037 * contain duplicate values, like a set.
038 *
039 * <pre>{@code
040 * final CharSeq cs1 = new CharSeq("abcdeaafg");
041 * final CharSeq cs2 = new CharSeq("gfedcbabb");
042 * assert(cs1.equals(cs2));
043 * }</pre>
044 *
045 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
046 * @since 1.0
047 * @version 2.0
048 */
049 public final class CharSeq
050 extends CharSeqBase
051 implements
052 CharSequence,
053 ISeq<Character>,
054 Comparable<CharSeq>,
055 Serializable
056 {
057 private static final long serialVersionUID = 2L;
058
059 /**
060 * Create a new (distinct) CharSeq from the given {@code characters}. The
061 * given {@link CharSequence} is sorted and duplicate values are removed
062 *
063 * @see #CharSeq(CharSequence)
064 *
065 * @param characters the characters.
066 * @throws NullPointerException if the {@code characters} are {@code null}.
067 */
068 public CharSeq(final char[] characters) {
069 super(distinct(characters.clone()));
070 }
071
072 /**
073 * Create a new (distinct) CharSeq from the given {@code characters}. The
074 * given {@link CharSequence} is sorted and duplicate values are removed.
075 *
076 * @param characters the characters.
077 * @throws NullPointerException if the {@code characters} are {@code null}.
078 */
079 public CharSeq(final CharSequence characters) {
080 super(distinct(toCharArray(characters)));
081 }
082
083 private static char[] toCharArray(final CharSequence characters) {
084 requireNonNull(characters, "Characters");
085
086 final char[] chars = new char[characters.length()];
087 for (int i = chars.length; --i >= 0;) {
088 chars[i] = characters.charAt(i);
089 }
090
091 return chars;
092 }
093
094 private static char[] distinct(final char[] chars) {
095 Arrays.sort(chars);
096
097 int j = 0;
098 for (int i = 1; i < chars.length; ++i) {
099 if (chars[j] != chars[i]) {
100 chars[++j] = chars[i];
101 }
102 }
103
104 final int size = Math.min(chars.length, j + 1);
105 final char[] array = new char[size];
106 System.arraycopy(chars, 0, array, 0, size);
107 return array;
108 }
109
110 @Override
111 public boolean contains(final Object object) {
112 return object instanceof Character && contains((Character)object);
113 }
114
115 /**
116 * Test whether this character set contains the given character {@code c}.
117 *
118 * @param c the character to test.
119 * @return {@code true} if this character set contains the given character,
120 * {@code false} otherwise.
121 * @throws NullPointerException if the given character {@code c} is
122 * {@code null}.
123 */
124 public boolean contains(final Character c) {
125 return contains(c.charValue());
126 }
127
128 /**
129 * Test whether this character set contains the given character {@code c}.
130 *
131 * @param c the character to test.
132 * @return {@code true} if this character set contains the given character,
133 * {@code false} otherwise.
134 */
135 public boolean contains(final char c) {
136 return Arrays.binarySearch(array, c) >= 0;
137 }
138
139 @Override
140 public char charAt(final int index) {
141 return array[index];
142 }
143
144 @Override
145 public int length() {
146 return array.length;
147 }
148
149 @Override
150 public CharSeq subSequence(final int start, final int end) {
151 return new CharSeq(new String(array, start, end - start));
152 }
153
154 /**
155 * Test whether this character set is empty.
156 *
157 * @return {@code true} if this character set is empty, {@code false}
158 * otherwise.
159 */
160 public boolean isEmpty() {
161 return array.length == 0;
162 }
163
164 @Override
165 public int hashCode() {
166 return 17 + 31*Arrays.hashCode(array);
167 }
168
169 @Override
170 public boolean equals(final Object obj) {
171 return obj == this ||
172 obj instanceof CharSeq &&
173 Arrays.equals(((CharSeq)obj).array, array);
174 }
175
176 @Override
177 public int compareTo(final CharSeq set) {
178 int result = 0;
179
180 final int n = Math.min(array.length, set.array.length);
181 for (int i = 0; i < n && result == 0; ++i) {
182 result = array[i] - set.array[i];
183 }
184 if (result == 0) {
185 result = array.length - set.array.length;
186 }
187
188 return result;
189 }
190
191 @Override
192 public String toString() {
193 return new String(array);
194 }
195
196 /**
197 * Expands the character range for the given {@code pattern}. E.g
198 * {@code a-zA-Z0-1} will return a string containing all upper and lower
199 * case characters (from a to z) and all digits form 0 to 9.
200 *
201 * @param pattern the {@code pattern} to expand.
202 * @return the expanded pattern.
203 * @throws PatternSyntaxException if the pattern could not be expanded.
204 * @throws NullPointerException if the pattern is {@code null}.
205 */
206 public static String expand(final CharSequence pattern) {
207 requireNonNull(pattern, "Pattern");
208 final StringBuilder out = new StringBuilder();
209
210 for (int i = 0, n = pattern.length(); i < n; ++i) {
211 if (pattern.charAt(i) == '\\') {
212 ++i;
213 if (i < pattern.length()) {
214 out.append(pattern.charAt(i));
215 }
216 } else if (pattern.charAt(i) == '-') {
217 if (i <= 0 || i >= (pattern.length() - 1)) {
218 throw new PatternSyntaxException(
219 "Dangling range operator '-'", pattern.toString(),
220 pattern.length() - 1
221 );
222 }
223
224 final String range = expand(
225 pattern.charAt(i - 1),
226 pattern.charAt(i + 1)
227 );
228 out.append(range);
229
230 ++i;
231 } else if (i + 1 == n || pattern.charAt(i + 1) != '-') {
232 out.append(pattern.charAt(i));
233 }
234 }
235
236 return out.toString();
237 }
238
239 /**
240 * Expands the characters between {@code a} and {@code b}.
241 *
242 * @param a the start character.
243 * @param b the stop character.
244 * @return the expanded characters.
245 */
246 public static String expand(final char a, final char b) {
247 final StringBuilder out = new StringBuilder();
248
249 if (a < b) {
250 char c = a;
251 while (c <= b) {
252 out.append(c);
253 c = (char) (c + 1);
254 }
255 } else if (a > b) {
256 char c = a;
257 while (c >= b) {
258 out.append(c);
259 c = (char)(c - 1);
260 }
261 }
262
263 return out.toString();
264 }
265
266 /**
267 * Expands the character range for the given {@code pattern}. E.g.
268 * {@code a-zA-Z0-1} will return a string containing all upper and lower
269 * case characters (from a to z) and all digits form 0 to 9.
270 *
271 * @see #expand(CharSequence)
272 *
273 * @param pattern the {@code pattern} to expand.
274 * @return the expanded pattern.
275 * @throws PatternSyntaxException if the pattern could not be expanded.
276 * @throws NullPointerException if the pattern is {@code null}.
277 */
278 public static CharSeq of(final CharSequence pattern) {
279 return new CharSeq(expand(pattern));
280 }
281
282 /**
283 * Expands the characters between {@code a} and {@code b}.
284 *
285 * @see #expand(char, char)
286 *
287 * @param a the start character.
288 * @param b the stop character.
289 * @return the expanded characters.
290 */
291 public static CharSeq of(final char a, final char b) {
292 return new CharSeq(expand(a, b));
293 }
294
295 /**
296 * Helper method for creating a sequence of characters from the given
297 * {@code CharSequence}. The returned sequence will contain all characters
298 * in the original order.
299 *
300 * @param chars the char sequence to convert.
301 * @return a sequence which will contain all given chars in the original
302 * order.
303 */
304 public static ISeq<Character> toISeq(final CharSequence chars) {
305 final MSeq<Character> seq = MSeq.ofLength(chars.length());
306 for (int i = 0; i < chars.length(); ++i) {
307 seq.set(i, chars.charAt(i));
308 }
309
310 return seq.toISeq();
311 }
312
313 public static Collector<Character, ?, CharSeq> toCharSeq() {
314 return Collector.of(
315 StringBuilder::new,
316 StringBuilder::append,
317 (a, b) -> {a.append(b); return a;},
318 CharSeq::new
319 );
320 }
321
322 }
323
324 abstract class CharSeqBase extends ArrayISeq<Character> {
325 private static final long serialVersionUID = 1L;
326
327 final char[] array;
328
329 protected CharSeqBase(final char[] characters) {
330 super(Array.of(CharStore.of(characters)).seal());
331 array = characters;
332 }
333
334 }
|