001 /*
002 * Java Genetic Algorithm Library (jenetics-3.8.0).
003 * Copyright (c) 2007-2017 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
019 */
020 package org.jenetics.util;
021
022 import static java.util.Objects.requireNonNull;
023 import static org.jenetics.internal.util.Equality.eq;
024
025 import java.io.Serializable;
026 import java.util.Arrays;
027 import java.util.regex.PatternSyntaxException;
028 import java.util.stream.Collector;
029
030 import org.jenetics.internal.collection.Array;
031 import org.jenetics.internal.collection.ArrayISeq;
032 import org.jenetics.internal.collection.CharStore;
033 import org.jenetics.internal.util.Equality;
034 import org.jenetics.internal.util.Hash;
035
036 /**
037 * This class is used for holding the valid characters of an
038 * {@link org.jenetics.CharacterGene}. It is not a character sequence in the
039 * classical sense. The characters of this sequence are sorted and doesn't
040 * contain duplicate values, like a set.
041 *
042 * <pre>{@code
043 * final CharSeq cs1 = new CharSeq("abcdeaafg");
044 * final CharSeq cs2 = new CharSeq("gfedcbabb");
045 * assert(cs1.equals(cs2));
046 * }</pre>
047 *
048 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
049 * @since 1.0
050 * @version 2.0
051 */
052 public final class CharSeq
053 extends CharSeqBase
054 implements
055 CharSequence,
056 ISeq<Character>,
057 Comparable<CharSeq>,
058 Serializable
059 {
060 private static final long serialVersionUID = 2L;
061
062 /**
063 * Create a new (distinct) CharSeq from the given {@code characters}. The
064 * given {@link CharSequence} is sorted and duplicate values are removed
065 *
066 * @see #CharSeq(CharSequence)
067 *
068 * @param characters the characters.
069 * @throws NullPointerException if the {@code characters} are {@code null}.
070 */
071 public CharSeq(final char[] characters) {
072 super(distinct(characters.clone()));
073 }
074
075 /**
076 * Create a new (distinct) CharSeq from the given {@code characters}. The
077 * given {@link CharSequence} is sorted and duplicate values are removed.
078 *
079 * @param characters the characters.
080 * @throws NullPointerException if the {@code characters} are {@code null}.
081 */
082 public CharSeq(final CharSequence characters) {
083 super(distinct(toCharArray(characters)));
084 }
085
086 private static char[] toCharArray(final CharSequence characters) {
087 requireNonNull(characters, "Characters");
088
089 final char[] chars = new char[characters.length()];
090 for (int i = chars.length; --i >= 0;) {
091 chars[i] = characters.charAt(i);
092 }
093
094 return chars;
095 }
096
097 private static char[] distinct(final char[] chars) {
098 Arrays.sort(chars);
099
100 int j = 0;
101 for (int i = 1; i < chars.length; ++i) {
102 if (chars[j] != chars[i]) {
103 chars[++j] = chars[i];
104 }
105 }
106
107 final int size = Math.min(chars.length, j + 1);
108 final char[] array = new char[size];
109 System.arraycopy(chars, 0, array, 0, size);
110 return array;
111 }
112
113 @Override
114 public boolean contains(final Object object) {
115 return object instanceof Character && contains((Character)object);
116 }
117
118 /**
119 * Test whether this character set contains the given character {@code c}.
120 *
121 * @param c the character to test.
122 * @return {@code true} if this character set contains the given character,
123 * {@code false} otherwise.
124 * @throws NullPointerException if the given character {@code c} is
125 * {@code null}.
126 */
127 public boolean contains(final Character c) {
128 return contains(c.charValue());
129 }
130
131 /**
132 * Test whether this character set contains the given character {@code c}.
133 *
134 * @param c the character to test.
135 * @return {@code true} if this character set contains the given character,
136 * {@code false} otherwise.
137 */
138 public boolean contains(final char c) {
139 return Arrays.binarySearch(array, c) >= 0;
140 }
141
142 @Override
143 public char charAt(final int index) {
144 return array[index];
145 }
146
147 @Override
148 public int length() {
149 return array.length;
150 }
151
152 @Override
153 public CharSeq subSequence(final int start, final int end) {
154 return new CharSeq(new String(array, start, end - start));
155 }
156
157 /**
158 * Test whether this character set is empty.
159 *
160 * @return {@code true} if this character set is empty, {@code false}
161 * otherwise.
162 */
163 public boolean isEmpty() {
164 return array.length == 0;
165 }
166
167 @Override
168 public int hashCode() {
169 return Hash.of(getClass()).and(array).value();
170 }
171
172 @Override
173 public boolean equals(final Object obj) {
174 return Equality.of(this, obj).test(ch ->
175 eq(array, ch.array)
176 );
177 }
178
179 @Override
180 public int compareTo(final CharSeq set) {
181 int result = 0;
182
183 final int n = Math.min(array.length, set.array.length);
184 for (int i = 0; i < n && result == 0; ++i) {
185 result = array[i] - set.array[i];
186 }
187 if (result == 0) {
188 result = array.length - set.array.length;
189 }
190
191 return result;
192 }
193
194 @Override
195 public String toString() {
196 return new String(array);
197 }
198
199 /**
200 * Expands the character range for the given {@code pattern}. E.g
201 * {@code a-zA-Z0-1} will return a string containing all upper and lower
202 * case characters (from a to z) and all digits form 0 to 9.
203 *
204 * @param pattern the {@code pattern} to expand.
205 * @return the expanded pattern.
206 * @throws PatternSyntaxException if the pattern could not be expanded.
207 * @throws NullPointerException if the pattern is {@code null}.
208 */
209 public static String expand(final CharSequence pattern) {
210 requireNonNull(pattern, "Pattern");
211 final StringBuilder out = new StringBuilder();
212
213 for (int i = 0, n = pattern.length(); i < n; ++i) {
214 if (pattern.charAt(i) == '\\') {
215 ++i;
216 if (i < pattern.length()) {
217 out.append(pattern.charAt(i));
218 }
219 } else if (pattern.charAt(i) == '-') {
220 if (i <= 0 || i >= (pattern.length() - 1)) {
221 throw new PatternSyntaxException(
222 "Dangling range operator '-'", pattern.toString(),
223 pattern.length() - 1
224 );
225 }
226
227 final String range = expand(
228 pattern.charAt(i - 1),
229 pattern.charAt(i + 1)
230 );
231 out.append(range);
232
233 ++i;
234 } else if (i + 1 == n || pattern.charAt(i + 1) != '-') {
235 out.append(pattern.charAt(i));
236 }
237 }
238
239 return out.toString();
240 }
241
242 /**
243 * Expands the characters between {@code a} and {@code b}.
244 *
245 * @param a the start character.
246 * @param b the stop character.
247 * @return the expanded characters.
248 */
249 public static String expand(final char a, final char b) {
250 final StringBuilder out = new StringBuilder();
251
252 if (a < b) {
253 char c = a;
254 while (c <= b) {
255 out.append(c);
256 c = (char) (c + 1);
257 }
258 } else if (a > b) {
259 char c = a;
260 while (c >= b) {
261 out.append(c);
262 c = (char)(c - 1);
263 }
264 }
265
266 return out.toString();
267 }
268
269 /**
270 * Expands the character range for the given {@code pattern}. E.g.
271 * {@code a-zA-Z0-1} will return a string containing all upper and lower
272 * case characters (from a to z) and all digits form 0 to 9.
273 *
274 * @see #expand(CharSequence)
275 *
276 * @param pattern the {@code pattern} to expand.
277 * @return the expanded pattern.
278 * @throws PatternSyntaxException if the pattern could not be expanded.
279 * @throws NullPointerException if the pattern is {@code null}.
280 */
281 public static CharSeq of(final CharSequence pattern) {
282 return new CharSeq(expand(pattern));
283 }
284
285 /**
286 * Expands the characters between {@code a} and {@code b}.
287 *
288 * @see #expand(char, char)
289 *
290 * @param a the start character.
291 * @param b the stop character.
292 * @return the expanded characters.
293 */
294 public static CharSeq of(final char a, final char b) {
295 return new CharSeq(expand(a, b));
296 }
297
298 /**
299 * Helper method for creating a sequence of characters from the given
300 * {@code CharSequence}. The returned sequence will contain all characters
301 * in the original order.
302 *
303 * @param chars the char sequence to convert.
304 * @return a sequence which will contain all given chars in the original
305 * order.
306 */
307 public static ISeq<Character> toISeq(final CharSequence chars) {
308 final MSeq<Character> seq = MSeq.ofLength(chars.length());
309 for (int i = 0; i < chars.length(); ++i) {
310 seq.set(i, chars.charAt(i));
311 }
312
313 return seq.toISeq();
314 }
315
316 public static Collector<Character, ?, CharSeq> toCharSeq() {
317 return Collector.of(
318 StringBuilder::new,
319 StringBuilder::append,
320 (a, b) -> {a.append(b); return a;},
321 CharSeq::new
322 );
323 }
324
325 }
326
327 abstract class CharSeqBase extends ArrayISeq<Character> {
328 private static final long serialVersionUID = 1L;
329
330 final char[] array;
331
332 protected CharSeqBase(final char[] characters) {
333 super(Array.of(CharStore.of(characters)).seal());
334 array = characters;
335 }
336 }
|