001/* 002 * Java Genetic Algorithm Library (jenetics-8.1.0). 003 * Copyright (c) 2007-2024 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.util; 021 022import static java.util.Objects.requireNonNull; 023 024import java.io.Serial; 025import java.io.Serializable; 026import java.util.Arrays; 027import java.util.regex.PatternSyntaxException; 028import java.util.stream.Collector; 029 030import io.jenetics.internal.collection.Array; 031import io.jenetics.internal.collection.ArrayISeq; 032import io.jenetics.internal.collection.CharStore; 033 034/** 035 * This class is used for holding the valid characters of an 036 * {@link io.jenetics.CharacterGene}. It is not a character sequence in the 037 * classical sense. The characters of this sequence are sorted and doesn't 038 * contain duplicate values, like a set. 039 * {@snippet lang="java": 040 * final CharSeq cs1 = new CharSeq("abcdeaafg"); 041 * final CharSeq cs2 = new CharSeq("gfedcbabb"); 042 * assert(cs1.equals(cs2)); 043 * } 044 * 045 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 046 * @since 1.0 047 * @version 2.0 048 */ 049public final class CharSeq 050 extends CharSeqBase 051 implements 052 CharSequence, 053 ISeq<Character>, 054 Comparable<CharSeq>, 055 Serializable 056{ 057 @Serial 058 private static final long serialVersionUID = 2L; 059 060 /** 061 * Create a new (distinct) CharSeq from the given {@code characters}. The 062 * given {@link CharSequence} is sorted and duplicate values are removed 063 * 064 * @see #CharSeq(CharSequence) 065 * 066 * @param characters the characters. 067 * @throws NullPointerException if the {@code characters} are {@code null}. 068 */ 069 public CharSeq(final char[] characters) { 070 super(distinct(characters.clone())); 071 } 072 073 /** 074 * Create a new (distinct) CharSeq from the given {@code characters}. The 075 * given {@link CharSequence} is sorted and duplicate values are removed. 076 * 077 * @param characters the characters. 078 * @throws NullPointerException if the {@code characters} are {@code null}. 079 */ 080 public CharSeq(final CharSequence characters) { 081 super(distinct(toCharArray(characters))); 082 } 083 084 private static char[] toCharArray(final CharSequence characters) { 085 requireNonNull(characters, "Characters"); 086 087 final char[] chars = new char[characters.length()]; 088 for (int i = chars.length; --i >= 0;) { 089 chars[i] = characters.charAt(i); 090 } 091 092 return chars; 093 } 094 095 private static char[] distinct(final char[] chars) { 096 Arrays.sort(chars); 097 098 int j = 0; 099 for (int i = 1; i < chars.length; ++i) { 100 if (chars[j] != chars[i]) { 101 chars[++j] = chars[i]; 102 } 103 } 104 105 final int size = Math.min(chars.length, j + 1); 106 final char[] array = new char[size]; 107 System.arraycopy(chars, 0, array, 0, size); 108 return array; 109 } 110 111 @Override 112 public boolean contains(final Object object) { 113 return object instanceof Character && contains((Character)object); 114 } 115 116 /** 117 * Test whether this character set contains the given character {@code c}. 118 * 119 * @param c the character to test. 120 * @return {@code true} if this character set contains the given character, 121 * {@code false} otherwise. 122 * @throws NullPointerException if the given character {@code c} is 123 * {@code null}. 124 */ 125 public boolean contains(final Character c) { 126 return contains(c.charValue()); 127 } 128 129 /** 130 * Test whether this character set contains the given character {@code c}. 131 * 132 * @param c the character to test. 133 * @return {@code true} if this character set contains the given character, 134 * {@code false} otherwise. 135 */ 136 public boolean contains(final char c) { 137 return Arrays.binarySearch(array, c) >= 0; 138 } 139 140 @Override 141 public char charAt(final int index) { 142 return array[index]; 143 } 144 145 @Override 146 public int length() { 147 return array.length; 148 } 149 150 @Override 151 public CharSeq subSequence(final int start, final int end) { 152 return new CharSeq(new String(array, start, end - start)); 153 } 154 155 /** 156 * Test whether this character set is empty. 157 * 158 * @return {@code true} if this character set is empty, {@code false} 159 * otherwise. 160 */ 161 public boolean isEmpty() { 162 return array.length == 0; 163 } 164 165 @Override 166 public int hashCode() { 167 return 17 + 31*Arrays.hashCode(array); 168 } 169 170 @Override 171 public boolean equals(final Object obj) { 172 return obj == this || 173 obj instanceof CharSeq other && 174 Arrays.equals(other.array, array); 175 } 176 177 @Override 178 public int compareTo(final CharSeq set) { 179 int result = 0; 180 181 final int n = Math.min(array.length, set.array.length); 182 for (int i = 0; i < n && result == 0; ++i) { 183 result = array[i] - set.array[i]; 184 } 185 if (result == 0) { 186 result = array.length - set.array.length; 187 } 188 189 return result; 190 } 191 192 @Override 193 public String toString() { 194 return new String(array); 195 } 196 197 /** 198 * Expands the character range for the given {@code pattern}. E.g. 199 * {@code a-zA-Z0-1} will return a string containing all upper and lower 200 * case characters (from a to z), and all digits form 0 to 9. 201 * 202 * @param pattern the {@code pattern} to expand. 203 * @return the expanded pattern. 204 * @throws PatternSyntaxException if the pattern could not be expanded. 205 * @throws NullPointerException if the pattern is {@code null}. 206 */ 207 public static String expand(final CharSequence pattern) { 208 requireNonNull(pattern, "Pattern"); 209 final StringBuilder out = new StringBuilder(); 210 211 for (int i = 0, n = pattern.length(); i < n; ++i) { 212 if (pattern.charAt(i) == '\\') { 213 ++i; 214 if (i < pattern.length()) { 215 out.append(pattern.charAt(i)); 216 } 217 } else if (pattern.charAt(i) == '-') { 218 if (i <= 0 || i >= (pattern.length() - 1)) { 219 throw new PatternSyntaxException( 220 "Dangling range operator '-'", pattern.toString(), 221 pattern.length() - 1 222 ); 223 } 224 225 final String range = expand( 226 pattern.charAt(i - 1), 227 pattern.charAt(i + 1) 228 ); 229 out.append(range); 230 231 ++i; 232 } else if (i + 1 == n || pattern.charAt(i + 1) != '-') { 233 out.append(pattern.charAt(i)); 234 } 235 } 236 237 return out.toString(); 238 } 239 240 /** 241 * Expands the characters between {@code a} and {@code b}. 242 * 243 * @param a the start character. 244 * @param b the stop character. 245 * @return the expanded characters. 246 */ 247 public static String expand(final char a, final char b) { 248 final StringBuilder out = new StringBuilder(); 249 250 if (a < b) { 251 char c = a; 252 while (c <= b) { 253 out.append(c); 254 c = (char) (c + 1); 255 } 256 } else if (a > b) { 257 char c = a; 258 while (c >= b) { 259 out.append(c); 260 c = (char)(c - 1); 261 } 262 } 263 264 return out.toString(); 265 } 266 267 /** 268 * Expands the character range for the given {@code pattern}. E.g. 269 * {@code a-zA-Z0-1} will return a string containing all upper and lower 270 * case characters (from a to z), and all digits form 0 to 9. 271 * 272 * @see #expand(CharSequence) 273 * 274 * @param pattern the {@code pattern} to expand. 275 * @return the expanded pattern. 276 * @throws PatternSyntaxException if the pattern could not be expanded. 277 * @throws NullPointerException if the pattern is {@code null}. 278 */ 279 public static CharSeq of(final CharSequence pattern) { 280 return new CharSeq(expand(pattern)); 281 } 282 283 /** 284 * Expands the characters between {@code a} and {@code b}. 285 * 286 * @see #expand(char, char) 287 * 288 * @param a the start character. 289 * @param b the stop character. 290 * @return the expanded characters. 291 */ 292 public static CharSeq of(final char a, final char b) { 293 return new CharSeq(expand(a, b)); 294 } 295 296 /** 297 * Helper method for creating a sequence of characters from the given 298 * {@code CharSequence}. The returned sequence will contain all characters 299 * in the original order. 300 * 301 * @param chars the char sequence to convert. 302 * @return a sequence which will contain all given chars in the original 303 * order. 304 */ 305 public static ISeq<Character> toISeq(final CharSequence chars) { 306 final MSeq<Character> seq = MSeq.ofLength(chars.length()); 307 for (int i = 0; i < chars.length(); ++i) { 308 seq.set(i, chars.charAt(i)); 309 } 310 311 return seq.toISeq(); 312 } 313 314 public static Collector<Character, ?, CharSeq> toCharSeq() { 315 return Collector.of( 316 StringBuilder::new, 317 StringBuilder::append, 318 (a, b) -> {a.append(b); return a;}, 319 CharSeq::new 320 ); 321 } 322 323} 324 325abstract class CharSeqBase extends ArrayISeq<Character> { 326 327 @Serial 328 private static final long serialVersionUID = 1L; 329 330 final char[] array; 331 332 protected CharSeqBase(final char[] characters) { 333 super(Array.of(CharStore.of(characters)).seal()); 334 array = characters; 335 } 336 337}