001/* 002 * Java Genetic Algorithm Library (jenetics-7.2.0). 003 * Copyright (c) 2007-2023 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.util; 021 022import static java.util.Objects.requireNonNull; 023 024import java.io.Serial; 025import java.io.Serializable; 026import java.util.Arrays; 027import java.util.regex.PatternSyntaxException; 028import java.util.stream.Collector; 029 030import io.jenetics.internal.collection.Array; 031import io.jenetics.internal.collection.ArrayISeq; 032import io.jenetics.internal.collection.CharStore; 033 034/** 035 * This class is used for holding the valid characters of an 036 * {@link io.jenetics.CharacterGene}. It is not a character sequence in the 037 * classical sense. The characters of this sequence are sorted and doesn't 038 * contain duplicate values, like a set. 039 * 040 * <pre>{@code 041 * final CharSeq cs1 = new CharSeq("abcdeaafg"); 042 * final CharSeq cs2 = new CharSeq("gfedcbabb"); 043 * assert(cs1.equals(cs2)); 044 * }</pre> 045 * 046 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 047 * @since 1.0 048 * @version 2.0 049 */ 050public final class CharSeq 051 extends CharSeqBase 052 implements 053 CharSequence, 054 ISeq<Character>, 055 Comparable<CharSeq>, 056 Serializable 057{ 058 @Serial 059 private static final long serialVersionUID = 2L; 060 061 /** 062 * Create a new (distinct) CharSeq from the given {@code characters}. The 063 * given {@link CharSequence} is sorted and duplicate values are removed 064 * 065 * @see #CharSeq(CharSequence) 066 * 067 * @param characters the characters. 068 * @throws NullPointerException if the {@code characters} are {@code null}. 069 */ 070 public CharSeq(final char[] characters) { 071 super(distinct(characters.clone())); 072 } 073 074 /** 075 * Create a new (distinct) CharSeq from the given {@code characters}. The 076 * given {@link CharSequence} is sorted and duplicate values are removed. 077 * 078 * @param characters the characters. 079 * @throws NullPointerException if the {@code characters} are {@code null}. 080 */ 081 public CharSeq(final CharSequence characters) { 082 super(distinct(toCharArray(characters))); 083 } 084 085 private static char[] toCharArray(final CharSequence characters) { 086 requireNonNull(characters, "Characters"); 087 088 final char[] chars = new char[characters.length()]; 089 for (int i = chars.length; --i >= 0;) { 090 chars[i] = characters.charAt(i); 091 } 092 093 return chars; 094 } 095 096 private static char[] distinct(final char[] chars) { 097 Arrays.sort(chars); 098 099 int j = 0; 100 for (int i = 1; i < chars.length; ++i) { 101 if (chars[j] != chars[i]) { 102 chars[++j] = chars[i]; 103 } 104 } 105 106 final int size = Math.min(chars.length, j + 1); 107 final char[] array = new char[size]; 108 System.arraycopy(chars, 0, array, 0, size); 109 return array; 110 } 111 112 @Override 113 public boolean contains(final Object object) { 114 return object instanceof Character && contains((Character)object); 115 } 116 117 /** 118 * Test whether this character set contains the given character {@code c}. 119 * 120 * @param c the character to test. 121 * @return {@code true} if this character set contains the given character, 122 * {@code false} otherwise. 123 * @throws NullPointerException if the given character {@code c} is 124 * {@code null}. 125 */ 126 public boolean contains(final Character c) { 127 return contains(c.charValue()); 128 } 129 130 /** 131 * Test whether this character set contains the given character {@code c}. 132 * 133 * @param c the character to test. 134 * @return {@code true} if this character set contains the given character, 135 * {@code false} otherwise. 136 */ 137 public boolean contains(final char c) { 138 return Arrays.binarySearch(array, c) >= 0; 139 } 140 141 @Override 142 public char charAt(final int index) { 143 return array[index]; 144 } 145 146 @Override 147 public int length() { 148 return array.length; 149 } 150 151 @Override 152 public CharSeq subSequence(final int start, final int end) { 153 return new CharSeq(new String(array, start, end - start)); 154 } 155 156 /** 157 * Test whether this character set is empty. 158 * 159 * @return {@code true} if this character set is empty, {@code false} 160 * otherwise. 161 */ 162 public boolean isEmpty() { 163 return array.length == 0; 164 } 165 166 @Override 167 public int hashCode() { 168 return 17 + 31*Arrays.hashCode(array); 169 } 170 171 @Override 172 public boolean equals(final Object obj) { 173 return obj == this || 174 obj instanceof CharSeq other && 175 Arrays.equals(other.array, array); 176 } 177 178 @Override 179 public int compareTo(final CharSeq set) { 180 int result = 0; 181 182 final int n = Math.min(array.length, set.array.length); 183 for (int i = 0; i < n && result == 0; ++i) { 184 result = array[i] - set.array[i]; 185 } 186 if (result == 0) { 187 result = array.length - set.array.length; 188 } 189 190 return result; 191 } 192 193 @Override 194 public String toString() { 195 return new String(array); 196 } 197 198 /** 199 * Expands the character range for the given {@code pattern}. E.g. 200 * {@code a-zA-Z0-1} will return a string containing all upper and lower 201 * case characters (from a to z), and all digits form 0 to 9. 202 * 203 * @param pattern the {@code pattern} to expand. 204 * @return the expanded pattern. 205 * @throws PatternSyntaxException if the pattern could not be expanded. 206 * @throws NullPointerException if the pattern is {@code null}. 207 */ 208 public static String expand(final CharSequence pattern) { 209 requireNonNull(pattern, "Pattern"); 210 final StringBuilder out = new StringBuilder(); 211 212 for (int i = 0, n = pattern.length(); i < n; ++i) { 213 if (pattern.charAt(i) == '\\') { 214 ++i; 215 if (i < pattern.length()) { 216 out.append(pattern.charAt(i)); 217 } 218 } else if (pattern.charAt(i) == '-') { 219 if (i <= 0 || i >= (pattern.length() - 1)) { 220 throw new PatternSyntaxException( 221 "Dangling range operator '-'", pattern.toString(), 222 pattern.length() - 1 223 ); 224 } 225 226 final String range = expand( 227 pattern.charAt(i - 1), 228 pattern.charAt(i + 1) 229 ); 230 out.append(range); 231 232 ++i; 233 } else if (i + 1 == n || pattern.charAt(i + 1) != '-') { 234 out.append(pattern.charAt(i)); 235 } 236 } 237 238 return out.toString(); 239 } 240 241 /** 242 * Expands the characters between {@code a} and {@code b}. 243 * 244 * @param a the start character. 245 * @param b the stop character. 246 * @return the expanded characters. 247 */ 248 public static String expand(final char a, final char b) { 249 final StringBuilder out = new StringBuilder(); 250 251 if (a < b) { 252 char c = a; 253 while (c <= b) { 254 out.append(c); 255 c = (char) (c + 1); 256 } 257 } else if (a > b) { 258 char c = a; 259 while (c >= b) { 260 out.append(c); 261 c = (char)(c - 1); 262 } 263 } 264 265 return out.toString(); 266 } 267 268 /** 269 * Expands the character range for the given {@code pattern}. E.g. 270 * {@code a-zA-Z0-1} will return a string containing all upper and lower 271 * case characters (from a to z), and all digits form 0 to 9. 272 * 273 * @see #expand(CharSequence) 274 * 275 * @param pattern the {@code pattern} to expand. 276 * @return the expanded pattern. 277 * @throws PatternSyntaxException if the pattern could not be expanded. 278 * @throws NullPointerException if the pattern is {@code null}. 279 */ 280 public static CharSeq of(final CharSequence pattern) { 281 return new CharSeq(expand(pattern)); 282 } 283 284 /** 285 * Expands the characters between {@code a} and {@code b}. 286 * 287 * @see #expand(char, char) 288 * 289 * @param a the start character. 290 * @param b the stop character. 291 * @return the expanded characters. 292 */ 293 public static CharSeq of(final char a, final char b) { 294 return new CharSeq(expand(a, b)); 295 } 296 297 /** 298 * Helper method for creating a sequence of characters from the given 299 * {@code CharSequence}. The returned sequence will contain all characters 300 * in the original order. 301 * 302 * @param chars the char sequence to convert. 303 * @return a sequence which will contain all given chars in the original 304 * order. 305 */ 306 public static ISeq<Character> toISeq(final CharSequence chars) { 307 final MSeq<Character> seq = MSeq.ofLength(chars.length()); 308 for (int i = 0; i < chars.length(); ++i) { 309 seq.set(i, chars.charAt(i)); 310 } 311 312 return seq.toISeq(); 313 } 314 315 public static Collector<Character, ?, CharSeq> toCharSeq() { 316 return Collector.of( 317 StringBuilder::new, 318 StringBuilder::append, 319 (a, b) -> {a.append(b); return a;}, 320 CharSeq::new 321 ); 322 } 323 324} 325 326abstract class CharSeqBase extends ArrayISeq<Character> { 327 328 @Serial 329 private static final long serialVersionUID = 1L; 330 331 final char[] array; 332 333 protected CharSeqBase(final char[] characters) { 334 super(Array.of(CharStore.of(characters)).seal()); 335 array = characters; 336 } 337 338}