001/*
002 * Java Genetic Algorithm Library (jenetics-8.3.0).
003 * Copyright (c) 2007-2025 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.ext.util;
021
022import static java.util.Objects.requireNonNull;
023
024import java.io.IOException;
025import java.io.UncheckedIOException;
026import java.nio.CharBuffer;
027import java.util.Arrays;
028import java.util.List;
029import java.util.Objects;
030import java.util.function.Function;
031import java.util.function.Supplier;
032import java.util.stream.Collector;
033import java.util.stream.Collectors;
034import java.util.stream.Stream;
035
036import io.jenetics.internal.util.Lifecycle.IOValue;
037
038/**
039 * This class contains helper classes, which are the building blocks for handling
040 * CSV files.
041 * <ul>
042 *     <li>{@link LineReader}: This class allows you to read the lines of a
043 *     CSV file. The result will be a {@link Stream} of CSV lines and are
044 *     not split.</li>
045 *     <li>{@link LineSplitter}: This class is responsible for splitting one
046 *     CSV line into column values.</li>
047 *     <li>{@link ColumnIndexes}: Allows defining the projection/embedding of
048 *     the split/joined column values.</li>
049 *     <li>{@link ColumnJoiner}: Joining a column array into a CSV line, which
050 *     can be joined into a whole CSV string.</li>
051 * </ul>
052 * <p>
053 * Additionally, this class contains a set of helper methods for CSV handling
054 * using default configurations.
055 * <p>
056 * <b>Reading and splitting CSV lines</b>
057 * {@snippet class="Snippets" region="readRows"}
058 * <p>
059 * <b>Joining columns and creating CSV string</b>
060 * {@snippet class="Snippets" region="CsvSupportSnippets.collect"}
061 * <p>
062 * <b>Parsing CSV string</b>
063 * {@snippet class="Snippets" region="parseCsv"}
064 * <p>
065 * <b>Parsing double values, given as CSV string</b>
066 * <p>
067 * Another example is to parse double values, which are given as CSV string and
068 * use this data for running a regression analysis.
069 * {@snippet class="Snippets" region="DoublesParsingSnippets.parseDoubles"}
070 *
071 * @see <a href="https://tools.ietf.org/html/rfc4180">RFC-4180</a>
072 *
073 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
074 * @version 8.2
075 * @since 8.1
076 */
077public final class CsvSupport {
078
079        /**
080         * Holds the CSV column <em>separator</em> character.
081         *
082         * @param value the separator character
083         *
084         * @version 8.1
085         * @since 8.1
086         */
087        public record Separator(char value) {
088
089                /**
090                 * The default separator character, '{@code ,}'.
091                 */
092                public static final Separator DEFAULT = new Separator(',');
093
094                /**
095                 * Creates a new Separator char object.
096                 *
097                 * @param value the separator character
098                 * @throws IllegalArgumentException if the given separator character is
099                 *         a line break character
100                 */
101                public Separator {
102                        if (isLineBreak(value)) {
103                                throw new IllegalArgumentException(
104                                        "Given separator char is a line break character."
105                                );
106                        }
107                }
108        }
109
110        /**
111         * Holds the CSV column <em>quote</em> character. The following excerpt from
112         * <a href="https://tools.ietf.org/html/rfc4180">RFC-4180</a> defines when
113         * a quote character has to be used.
114         * <pre>
115         *     5.  Each field may or may not be enclosed in double quotes (however
116         *         some programs, such as Microsoft Excel, do not use double quotes
117         *         at all).  If fields are not enclosed with double quotes, then
118         *         double quotes may not appear inside the fields.  For example:
119         *
120         *         "aaa","bbb","ccc" CRLF
121         *         zzz,yyy,xxx
122         *
123         *     6.  Fields containing line breaks (CRLF), double quotes, and commas
124         *         should be enclosed in double-quotes.  For example:
125         *
126         *         "aaa","b CRLF
127         *         bb","ccc" CRLF
128         *         zzz,yyy,xxx
129         *
130         *     7.  If double-quotes are used to enclose fields, then a double-quote
131         *         appearing inside a field must be escaped by preceding it with
132         *         another double quote.  For example:
133         *
134         *         "aaa","b""bb","ccc"
135         * </pre>
136         *
137         * @param value the quote character
138         *
139         * @version 8.1
140         * @since 8.1
141         */
142        public record Quote(char value) {
143
144                /**
145                 * The default quote character, '{@code "}'.
146                 */
147                public static final Quote DEFAULT = new Quote('"');
148
149                /**
150                 * The zero '\0' character.
151                 */
152                public static final Quote ZERO = new Quote('\0');
153
154                /**
155                 * Creates a new Quote char object.
156                 *
157                 * @param value the quote character
158                 * @throws IllegalArgumentException if the given quote character is
159                 *         a line break character
160                 */
161                public Quote {
162                        if (isLineBreak(value)) {
163                                throw new IllegalArgumentException(
164                                        "Given quote char is a line break character."
165                                );
166                        }
167                }
168        }
169
170        /**
171         * Holds the column indexes, which should be part of the split or join
172         * operation. When used in the {@link LineSplitter}, it lets you filter the
173         * split column and define its order. When used in the {@link ColumnJoiner},
174         * it can be used to define the column index in the resulting CSV for a
175         * given row array.
176         *
177         * @apiNote
178         * The column indexes is <em>thread-safe</em> and can be shared between
179         * different threads.
180         *
181         * @see LineSplitter
182         * @see ColumnJoiner
183         *
184         * @param values the column indexes which are part of the split result
185         *
186         * @version 8.1
187         * @since 8.1
188         */
189        public record ColumnIndexes(int... values) {
190
191                /**
192                 * Indicating that <em>all</em> columns should be part of the split
193                 * result.
194                 */
195                public static final ColumnIndexes ALL = new ColumnIndexes();
196
197                /**
198                 * Create a new column indexes object.
199                 *
200                 * @param values the column indexes
201                 */
202                public ColumnIndexes {
203                        values = values.clone();
204                }
205
206                @Override
207                public int[] values() {
208                        return values.clone();
209                }
210
211                @Override
212                public int hashCode() {
213                        return Arrays.hashCode(values);
214                }
215
216                @Override
217                public boolean equals(final Object obj) {
218                        return obj instanceof ColumnIndexes ci &&
219                                Arrays.equals(values, ci.values);
220                }
221
222                @Override
223                public String toString() {
224                        return Arrays.toString(values);
225                }
226        }
227
228        /**
229         * The newline string used for writing the CSV file: {@code \r\n}.
230         */
231        public static final String EOL = "\r\n";
232
233
234        private CsvSupport() {
235        }
236
237        private static boolean isLineBreak(final char c) {
238                return switch (c) {
239                        case '\n', '\r' -> true;
240                        default -> false;
241                };
242        }
243
244        /**
245         * Splits the CSV file, given by the {@code reader}, into a  {@link Stream}
246         * of CSV lines. The CSV is split at line breaks, as long as they are not
247         * part of a quoted column. For reading the CSV lines, the default quote
248         * character, {@link Quote#DEFAULT}, is used.
249         *
250         * @apiNote
251         * The returned stream must be closed by the caller, which also closes the
252         * CSV {@code reader}.
253         *
254         * @see #readAllLines(Readable)
255         *
256         * @param reader the CSV source reader. The reader is automatically closed
257         *        when the returned line stream is closed.
258         * @return the stream of CSV lines
259         * @throws NullPointerException if the given {@code reader} is {@code null}
260         */
261        public static Stream<String> lines(final Readable reader) {
262                return LineReader.DEFAULT.read(reader);
263        }
264
265        /**
266         * Splits the CSV file, given by the {@code reader}, into a  {@code Stream}
267         * of CSV rows. The CSV is split at line breaks, as long as they are not
268         * part of a quoted column. For reading the CSV lines, the default quote
269         * character, {@link Quote#DEFAULT}, is used. Then each line is split into
270         * its columns using the default separator character.
271         *
272         * @apiNote
273         * The returned stream must be closed by the caller, which also closes the
274         * CSV {@code reader}.
275         *
276         * @see #readAllRows(Readable)
277         *
278         * @param reader the CSV source reader. The reader is automatically closed
279         *        when the returned line stream is closed.
280         * @return the stream of CSV rows
281         * @throws NullPointerException if the given {@code reader} is {@code null}
282         */
283        public static Stream<String[]> rows(final Readable reader) {
284                final var splitter = new LineSplitter();
285                return lines(reader).map(splitter::split);
286        }
287
288        /**
289         * Splits the CSV file, given by the {@code reader}, into a  {@code List}
290         * of CSV lines. The CSV is split at line breaks, as long as they are not
291         * part of a quoted column. For reading the CSV lines, the default quote
292         * character, {@link Quote#DEFAULT}, is used.
293         *
294         * @see #lines(Readable)
295         *
296         * @param reader the reader stream to split into CSV lines
297         * @return the list of CSV lines
298         * @throws NullPointerException if the given {@code reader} is {@code null}
299         * @throws IOException if reading the CSV lines fails
300         */
301        public static List<String> readAllLines(final Readable reader)
302                throws IOException
303        {
304                try (var lines = lines(reader)) {
305                        return lines.toList();
306                } catch (UncheckedIOException e) {
307                        throw e.getCause();
308                }
309        }
310
311        /**
312         * Splits the CSV file, given by the {@code reader}, into a  {@code List}
313         * of CSV lines. The CSV is split at line breaks, as long as they are not
314         * part of a quoted column. For reading the CSV lines, the default quote
315         * character, {@link Quote#DEFAULT}, is used. Then each line is split into
316         * its columns using the default separator character.
317         *
318         * @see #rows(Readable)
319         *
320         * @param reader the reader stream to split into CSV lines
321         * @return the list of CSV rows
322         * @throws NullPointerException if the given {@code reader} is {@code null}
323         * @throws IOException if reading the CSV lines fails
324         */
325        public static List<String[]> readAllRows(final Readable reader)
326                throws IOException
327        {
328                try (var rows = rows(reader)) {
329                        return rows.toList();
330                } catch (UncheckedIOException e) {
331                        throw e.getCause();
332                }
333        }
334
335        /**
336         * Parses the given CSV string into a list of <em>records</em>. The records
337         * are created from a <em>row</em> ({@code String[]} array) by applying the
338         * given {@code mapper}.
339         *
340         * @param csv the CSV string to parse
341         * @param mapper the record mapper
342         * @return the parsed record list
343         * @param <T> the record type
344         */
345        public static <T> List<T> parse(
346                final CharSequence csv,
347                final Function<? super String[], ? extends T> mapper
348        ) {
349                requireNonNull(csv);
350                requireNonNull(mapper);
351
352                try (var rows = rows(CharBuffer.wrap(csv))) {
353                        return rows
354                                .map(mapper)
355                                .collect(Collectors.toUnmodifiableList());
356                }
357        }
358
359        /**
360         * Parses the given CSV string into a list of rows.
361         *
362         * @param csv the CSV string to parse
363         * @return the parsed CSV rows
364         */
365        public static List<String[]> parse(final CharSequence csv) {
366                return parse(csv, Function.identity());
367        }
368
369        /**
370         * Parses the given CSV string into a list of {@code double[]} array rows.
371         *
372         * @param csv the CSV string to parse
373         * @return the parsed double data
374         */
375        public static List<double[]> parseDoubles(final CharSequence csv) {
376                return parse(csv, CsvSupport::toDoubles);
377        }
378
379        private static double[] toDoubles(final String[] values) {
380                final var result = new double[values.length];
381                for (int i = 0; i < result.length; ++i) {
382                        result[i] = Double.parseDouble(values[i].trim());
383                }
384                return result;
385        }
386
387        /**
388         * Splits a given CSV {@code line} into columns. The default values for the
389         * separator and quote character are used ({@link Separator#DEFAULT},
390         * {@link Quote#DEFAULT}) for splitting the line.
391         *
392         * @param line the CSV line to split
393         * @return the split CSV lines
394         * @throws NullPointerException if the given {@code line} is {@code null}
395         */
396        public static String[] split(final CharSequence line) {
397                return new LineSplitter().split(line);
398        }
399
400        /**
401         * Joins the given CSV {@code columns} to one CSV line. The default values
402         * for the separator and quote character are used ({@link Separator#DEFAULT},
403         * {@link Quote#DEFAULT}) for joining the columns.
404         *
405         * @see #join(Object[])
406         *
407         * @param columns the CSV columns to join
408         * @return the CSV line, joined from the given {@code columns}
409         * @throws NullPointerException if the given {@code columns} is {@code null}
410         */
411        public static String join(final Iterable<?> columns) {
412                return ColumnJoiner.DEFAULT.join(columns);
413        }
414
415        /**
416         * Joins the given CSV {@code columns} to one CSV line. The default values
417         * for the separator and quote character are used ({@link Separator#DEFAULT},
418         * {@link Quote#DEFAULT}) for joining the columns.
419         *
420         * @see #join(Iterable)
421         *
422         * @param columns the CSV columns to join
423         * @return the CSV line, joined from the given {@code columns}
424         * @throws NullPointerException if the given {@code columns} is {@code null}
425         */
426        public static String join(final Object[] columns) {
427                return ColumnJoiner.DEFAULT.join(columns);
428        }
429
430        /**
431         * Joins the given CSV {@code columns} to one CSV line. The default values
432         * for the separator and quote character are used ({@link Separator#DEFAULT},
433         * {@link Quote#DEFAULT}) for joining the columns.
434         *
435         * @see #join(Iterable)
436         * @see #join(Object[])
437         *
438         * @param columns the CSV columns to join
439         * @return the CSV line, joined from the given {@code columns}
440         * @throws NullPointerException if the given {@code columns} is {@code null}
441         */
442        public static String join(final String... columns) {
443                return ColumnJoiner.DEFAULT.join(columns);
444        }
445
446        /**
447         * Converts the given {@code record} into its components.
448         *
449         * @param record the record to convert
450         * @return the record components
451         */
452        public static Object[] toComponents(final Record record) {
453                try {
454                        final var components = record.getClass().getRecordComponents();
455                        final var elements = new Object[components.length];
456                        for (int i = 0; i < elements.length; ++i) {
457                                elements[i] = components[i].getAccessor().invoke(record);
458                        }
459
460                        return elements;
461                } catch (ReflectiveOperationException e) {
462                        throw new IllegalArgumentException(e);
463                }
464        }
465
466        /**
467         * Return a collector for joining a list of CSV rows into one CSV string.
468         *
469         * @return a collector for joining a list of CSV rows into one CSV string
470         */
471        public static Collector<CharSequence, ?, String> toCsv() {
472                return toCsv(EOL);
473        }
474
475        /**
476         * Return a collector for joining a list of CSV rows into one CSV string.
477         * For the line breaks, the given {@code eol} sequence is used.
478         *
479         * @param eol the end of line sequence used for line breaks
480         * @return a collector for joining a list of CSV rows into one CSV string
481         */
482        public static Collector<CharSequence, ?, String> toCsv(String eol) {
483                if (eol.isEmpty()) {
484                        throw new IllegalArgumentException("EOL must not be empty.");
485                }
486                for (int i = 0; i < eol.length(); ++i) {
487                        if (!isLineBreak(eol.charAt(i))) {
488                                throw new IllegalArgumentException(
489                                        "EOl contains non-linebreak char: '%s'.".formatted(eol)
490                                );
491                        }
492                }
493
494                return Collectors.joining(eol, "", eol);
495        }
496
497
498        /* *************************************************************************
499         * Base CSV classes.
500         * ************************************************************************/
501
502        /**
503         * This class reads CSV files and splits it into lines. It takes a quote
504         * character as a parameter, which is necessary for not splitting on quoted
505         * line feeds.
506         * {@snippet lang="java":
507         * final var csv = """
508         *     0.0,0.0000
509         *     0.1,0.0740
510         *     0.2,0.1120
511         *     0.3,0.1380
512         *     0.4,0.1760
513         *     0.5,0.2500
514         *     0.6,0.3840
515         *     0.7,0.6020
516         *     0.8,0.9280
517         *     0.9,1.3860
518         *     1.0,2.0000
519         *     """;
520         *
521         * final var reader = new LineReader(new Quote('"'));
522         * try (Stream<String> lines = reader.read(CharBuffer.wrap(csv))) {
523         *     lines.forEach(System.out::println);
524         * }
525         * }
526         *
527         * @apiNote
528         * This reader obeys <em>escaped</em> line breaks according
529         * <a href="https://tools.ietf.org/html/rfc4180">RFC-4180</a>. It is
530         * thread-safe and can be shared between different reading threads.
531         *
532         * @version 8.1
533         * @since 8.1
534         */
535        public static final class LineReader {
536
537                private static final LineReader DEFAULT = new LineReader(Quote.DEFAULT);
538
539                private final Quote quote;
540
541                /**
542                 * Create a new line-reader with the given {@code quote} character,
543                 * which is used in the CSV file which is read.
544                 *
545                 * @param quote the quoting character
546                 * @throws NullPointerException if the {@code quote} character is
547                 *         {@code null}
548                 */
549                public LineReader(final Quote quote) {
550                        this.quote = requireNonNull(quote);
551                }
552
553                /**
554                 * Create a new line reader with default quote character {@code '"'}
555                 * ({@link Quote#DEFAULT}).
556                 */
557                public LineReader() {
558                        this(Quote.DEFAULT);
559                }
560
561                /**
562                 * Reads all CSV lines from the given {@code reader}.
563                 *
564                 * @apiNote
565                 * This method must be used within a try-with-resources statement or
566                 * similar control structure to ensure that the stream's open file is
567                 * closed promptly after the stream's operations have completed.
568                 *
569                 * @param readable the readable from which to read the CSV content
570                 * @return the CSV lines from the file as a {@code Stream}
571                 */
572                public Stream<String> read(final Readable readable) {
573                        requireNonNull(readable);
574
575                        final IOValue<Stream<String>> result = new IOValue<>(resources -> {
576                                final Readable rdr = resources.use(
577                                        readable,
578                                        resource -> {
579                                                if (resource instanceof AutoCloseable closeable) {
580                                                        try {
581                                                                closeable.close();
582                                                        } catch (IOException | RuntimeException | Error e) {
583                                                                throw e;
584                                                        } catch (Exception e) {
585                                                                throw new IOException(e);
586                                                        }
587                                                }
588                                        }
589                                );
590
591                                final var source = CharCursor.of(rdr);
592                                final var line = new CharAppender();
593
594                                final Supplier<String> nextLine = () -> {
595                                        line.reset();
596                                        try {
597                                                return nextLine(source, line) ? line.toString() : null;
598                                        } catch (IOException e) {
599                                                throw new UncheckedIOException(e);
600                                        }
601                                };
602
603                                return Stream.generate(nextLine)
604                                        .takeWhile(Objects::nonNull);
605                        });
606
607                        return result.get().onClose(() ->
608                                result.release(UncheckedIOException::new)
609                        );
610                }
611
612                private boolean nextLine(final CharCursor chars, final CharAppender line)
613                        throws IOException
614                {
615                        boolean quoted = false;
616                        boolean escaped = false;
617                        boolean eol = false;
618
619                        int next = -2;
620                        int i = 0;
621
622                        while (next >= 0 || (i = chars.next()) != -1) {
623                                final char current = next != -2 ? (char)next : (char)i;
624                                next = -2;
625
626                                if (isLineBreak(current)) {
627                                        if (quoted) {
628                                                line.append(current);
629                                        } else {
630                                                eol = true;
631                                        }
632                                } else if (current == quote.value) {
633                                        if (quoted) {
634                                                if (!escaped && (next = chars.next()) == quote.value) {
635                                                        escaped = true;
636                                                } else {
637                                                        if (escaped) {
638                                                                escaped = false;
639                                                        } else {
640                                                                quoted = false;
641                                                        }
642                                                }
643                                        } else {
644                                                quoted = true;
645                                        }
646                                        line.append(current);
647                                } else {
648                                        line.append(current);
649                                }
650
651                                if (eol) {
652                                        eol = false;
653                                        if (line.nonEmpty()) {
654                                                return true;
655                                        }
656                                }
657                        }
658
659                        if (quoted) {
660                                throw new IllegalArgumentException(
661                                        "Unbalanced quote character: '%s'."
662                                                .formatted(toString(line))
663                                );
664                        }
665                        return line.nonEmpty();
666                }
667
668                private static String toString(final Object value) {
669                        final var line = value.toString();
670                        return line.length() > 15 ? line.substring(0, 15) + "..." : line;
671                }
672        }
673
674        /**
675         * Splitting a CSV line into columns (records).
676         * <h2>Examples</h2>
677         * <b>Simple usage</b>
678         * {@snippet class="Snippets" region="LineSplitterSnippets.simpleSplit"}
679         *
680         * <b>Projecting and re-ordering columns</b>
681         * {@snippet class="Snippets" region="LineSplitterSnippets.projectingSplit"}
682         *
683         * @implNote
684         * The split {@code String[]} array will never contain {@code null} values.
685         * Empty columns will be returned as empty strings.
686         *
687         * @apiNote
688         * A line splitter ist <b>not</b> thread-safe and can't be shared between
689         * different threads.
690         *
691         * @version 8.1
692         * @since 8.1
693         */
694        public static final class LineSplitter {
695                private final Separator separator;
696                private final Quote quote;
697
698                private final ColumnList columns;
699                private final CharAppender column = new CharAppender();
700
701                /**
702                 * Create a new line splitter with the given parameters.
703                 *
704                 * @param separator the separator character used by the CSV line to split
705                 * @param quote the quote character used by the CSV line to split
706                 * @param projection the column indexes which should be part of the split
707                 *        result
708                 * @throws NullPointerException if one of the parameters is {@code null}
709                 */
710                public LineSplitter(
711                        final Separator separator,
712                        final Quote quote,
713                        final ColumnIndexes projection
714                ) {
715                        if (separator.value == quote.value) {
716                                throw new IllegalArgumentException(
717                                        "Separator and quote char must be different: %s == %s."
718                                                .formatted(separator.value, quote.value)
719                                );
720                        }
721
722                        this.separator = separator;
723                        this.quote = quote;
724                        this.columns = new ColumnList(projection);
725                }
726
727                /**
728                 * Create a new line splitter with the given parameters.
729                 *
730                 * @param separator the separator character used by the CSV line to split
731                 * @param quote the quote character used by the CSV line to split
732                 * @throws NullPointerException if one of the parameters is {@code null}
733                 */
734                public LineSplitter(final Separator separator, final Quote quote) {
735                        this(separator, quote, ColumnIndexes.ALL);
736                }
737
738                /**
739                 * Create a new line splitter with the given parameters. The default
740                 * quote character, {@link Quote#DEFAULT}, will be used by the created
741                 * splitter.
742                 *
743                 * @param separator the separator character used by the CSV line to split
744                 * @throws NullPointerException if one of the parameters is {@code null}
745                 */
746                public LineSplitter(final Separator separator) {
747                        this(separator, Quote.DEFAULT, ColumnIndexes.ALL);
748                }
749
750                /**
751                 * Create a new line splitter with the given parameters. The default
752                 * separator character, {@link Separator#DEFAULT}, will be used by the
753                 * created splitter.
754                 *
755                 * @param quote the quote character used by the CSV line to split
756                 * @throws NullPointerException if one of the parameters is {@code null}
757                 */
758                public LineSplitter(final Quote quote) {
759                        this(Separator.DEFAULT, quote, ColumnIndexes.ALL);
760                }
761
762                /**
763                 * Create a new line splitter with the given parameters. Only the defined
764                 * columns will be part of the split result and the default separator
765                 * character, {@link Separator#DEFAULT}, and default quote character,
766                 * {@link Quote#DEFAULT}, is used by the created splitter.
767                 *
768                 * @param projection the column indexes which should be part of the split
769                 *        result
770                 * @throws NullPointerException if one of the parameters is {@code null}
771                 */
772                public LineSplitter(final ColumnIndexes projection) {
773                        this(Separator.DEFAULT, Quote.DEFAULT, projection);
774                }
775
776                /**
777                 * Create a new line splitter with default values.
778                 */
779                public LineSplitter() {
780                        this(Separator.DEFAULT, Quote.DEFAULT, ColumnIndexes.ALL);
781                }
782
783                /**
784                 * Splitting the given CSV {@code line} into its columns.
785                 *
786                 * @implNote
787                 * The split {@code String[]} array will never contain {@code null} values.
788                 * Empty columns will be returned as empty strings.
789                 *
790                 * @param line the CSV line to split
791                 * @return the split CSV columns
792                 * @throws NullPointerException if the CSV {@code line} is {@code null}
793                 */
794                public String[] split(final CharSequence line) {
795                        columns.clear();
796                        column.reset();
797
798                        boolean quoted = false;
799                        boolean escaped = false;
800                        boolean full = false;
801
802                        int quoteIndex = 0;
803
804                        for (int i = 0, n = line.length(); i < n && !full; ++i) {
805                                final int previous = i > 0 ? line.charAt(i - 1) : -1;
806                                final char current = line.charAt(i);
807                                final int next = i + 1 < line.length() ? line.charAt(i + 1) : -1;
808
809                                if (current == quote.value) {
810                                        if (quoted) {
811                                                if (!escaped && quote.value == next) {
812                                                        escaped = true;
813                                                } else {
814                                                        if (escaped) {
815                                                                column.append(quote.value);
816                                                                escaped = false;
817                                                        } else {
818                                                                if (next != -1 && separator.value != next) {
819                                                                        throw new IllegalArgumentException("""
820                                                                                Only separator character, '%s', allowed \
821                                                                                after quote, but found '%c':
822                                                                                %s
823                                                                                """.formatted(
824                                                                                        separator.value,
825                                                                                next,
826                                                                                        toErrorDesc(line, i + 1)
827                                                                                )
828                                                                        );
829                                                                }
830
831                                                                add(column);
832                                                                full = columns.isFull();
833                                                                quoted = false;
834                                                        }
835                                                }
836                                        } else {
837                                                if (previous != -1 && separator.value != previous) {
838                                                        throw new IllegalArgumentException("""
839                                                                Only separator character, '%s', allowed before \
840                                                                quote, but found '%c':
841                                                                %s
842                                                                """.formatted(
843                                                                        separator.value,
844                                                                previous,
845                                                                        toErrorDesc(line, Math.max(i - 1, 0))
846                                                                )
847                                                        );
848                                                }
849
850                                                quoted = true;
851                                                quoteIndex = i;
852                                        }
853                                } else if (current == separator.value) {
854                                        if (quoted) {
855                                                column.append(current);
856                                        } else if (separator.value == previous || previous == -1) {
857                                                add(column);
858                                                full = columns.isFull();
859                                        }
860                                } else {
861                                        // Read till the next token separator.
862                                        int j = i;
863                                        char c;
864                                        while (j < line.length() && !isTokenSeparator(c = line.charAt(j))) {
865                                                column.append(c);
866                                                ++j;
867                                        }
868                                        if (j != i - 1) {
869                                                i = j - 1;
870                                        }
871
872                                        if (!quoted) {
873                                                add(column);
874                                                full = columns.isFull();
875                                        }
876                                }
877                        }
878
879                        if (quoted) {
880                                throw new IllegalArgumentException("""
881                                        Unbalanced quote character.
882                                        %s
883                                        """.formatted(toErrorDesc(line, quoteIndex))
884                                );
885                        }
886                        if (line.isEmpty() ||
887                                separator.value == line.charAt(line.length() - 1))
888                        {
889                                add(column);
890                        }
891
892                        return columns.toArray();
893                }
894
895                private void add(final CharAppender column) {
896                        columns.add(column.toString());
897                        column.reset();
898                }
899
900                private boolean isTokenSeparator(final char c) {
901                        return c == separator.value || c == quote.value;
902                }
903
904                private static String toErrorDesc(final CharSequence line, final int pos) {
905                        return """
906                                %s
907                                %s
908                                """.formatted(
909                                        line.toString().stripTrailing(),
910                                        " ".repeat(pos) + "^"
911                                );
912                }
913        }
914
915
916        /**
917         * Column collection, which is backed up by a string list.
918         */
919        static final class ColumnList {
920                private final StringList columns = new StringList();
921                private final ColumnIndexes projection;
922
923                private int index = 0;
924                private int count = 0;
925
926                ColumnList(final ColumnIndexes projection) {
927                        this.projection = requireNonNull(projection);
928                }
929
930                /**
931                 * Appends a {@code column} to the column collection.
932                 *
933                 * @param column the column to add
934                 */
935                void add(String column) {
936                        if (!isFull()) {
937                                count += set(column, index++);
938                        }
939                }
940
941                private int set(String element, int column) {
942                        int updated = 0;
943
944                        if (projection.values.length == 0) {
945                                columns.add(element);
946                                ++updated;
947                        } else {
948                                int pos = -1;
949                                while ((pos = indexOf(projection.values, pos + 1, column)) != -1) {
950                                        for (int i = columns.size(); i <= pos; ++i) {
951                                                columns.add(null);
952                                        }
953                                        columns.set(pos, element);
954                                        ++updated;
955                                }
956                        }
957
958                        return updated;
959                }
960
961                private static int indexOf(int[] array, int start, int value) {
962                        for (int i = start; i < array.length; ++i) {
963                                if (array[i] == value) {
964                                        return i;
965                                }
966                        }
967
968                        return -1;
969                }
970
971                /**
972                 * Checks whether another column can be added.
973                 *
974                 * @return {@code true} if another column can be added to this
975                 *         collection, {@code false} otherwise
976                 */
977                boolean isFull() {
978                        return
979                                projection.values.length > 0 &&
980                                projection.values.length <= count;
981                }
982
983                /**
984                 * Removes all columns.
985                 */
986                public void clear() {
987                        columns.clear();
988                        index = 0;
989                        count = 0;
990                }
991
992                String[] toArray() {
993                        for (int i = columns.size(); i < projection.values.length; ++i) {
994                                columns.add(null);
995                        }
996                        return columns.toArray();
997                }
998
999        }
1000
1001        /**
1002         * This class joins an array of columns into one CSV line.
1003         *
1004         * <h2>Examples</h2>
1005         * <b>Simple usage</b>
1006         * {@snippet class="Snippets" region="ColumnJoinerSnippets.simpleJoin"}
1007         *
1008         * <b>Embedding and re-ordering data</b>
1009         * {@snippet class="Snippets" region="ColumnJoinerSnippets.embedToCsv"}
1010         *
1011         * @apiNote
1012         * The column joiner is <em>thread-safe</em> and can be shared between
1013         * different threads.
1014         *
1015         * @version 8.1
1016         * @since 8.1
1017         */
1018        public static final class ColumnJoiner {
1019
1020                /**
1021                 * Default column joiner, which is using default separator character,
1022                 * {@link Separator#DEFAULT}, and default quote character,
1023                 * {@link Quote#DEFAULT}.
1024                 */
1025                public static final ColumnJoiner DEFAULT = new ColumnJoiner(
1026                        Separator.DEFAULT,
1027                        Quote.DEFAULT,
1028                        ColumnIndexes.ALL
1029                );
1030
1031                /**
1032                 * The CSV line splitter parameter.
1033                 *
1034                 * @param separator the column separator char
1035                 * @param quote the qute char
1036                 * @param embedding the column indices to read. If empty, all split
1037                 *        columns are used.
1038                 */
1039                private record Param(char separator, char quote, int... embedding) {
1040
1041                        private String escape(Object value) {
1042                                final var quoteStr = String.valueOf(quote);
1043
1044                                if (value == null) {
1045                                        return "";
1046                                } else {
1047                                        var stringValue = value.toString();
1048                                        var string = stringValue.replace(quoteStr, quoteStr + quoteStr);
1049
1050                                        if (stringValue.length() != string.length() || mustEscape(string)) {
1051                                                return quoteStr + string + quoteStr;
1052                                        } else {
1053                                                return stringValue;
1054                                        }
1055                                }
1056                        }
1057
1058                        private boolean mustEscape(CharSequence value) {
1059                                for (int i = 0; i < value.length(); ++i) {
1060                                        final char c = value.charAt(i);
1061                                        if (c == separator || isLineBreak(c)) {
1062                                                return true;
1063                                        }
1064                                }
1065                                return false;
1066                        }
1067                }
1068
1069                private final Param param;
1070                private final int columnCount;
1071
1072                /**
1073                 * Create a new column joiner with the given parameters.
1074                 *
1075                 * @param separator the CSV separator character used by the joiner
1076                 * @param quote the CSV quote character used by the joiner
1077                 * @param embedding the column indexes to join
1078                 * @throws NullPointerException if one of the parameters is {@code null}
1079                 */
1080                public ColumnJoiner(
1081                        final Separator separator,
1082                        final Quote quote,
1083                        final ColumnIndexes embedding
1084                ) {
1085                        if (separator.value == quote.value) {
1086                                throw new IllegalArgumentException(
1087                                        "Separator and quote char must be different: %s == %s."
1088                                                .formatted(separator.value, quote.value)
1089                                );
1090                        }
1091
1092                        param = new Param(separator.value, quote.value, embedding.values);
1093                        columnCount = Math.max(max(param.embedding) + 1, 0);
1094                }
1095
1096                /**
1097                 * Create a new column joiner with the given parameters.
1098                 *
1099                 * @param separator the CSV separator character used by the joiner
1100                 * @param quote the CSV quote character used by the joiner
1101                 * @throws NullPointerException if one of the parameters is {@code null}
1102                 */
1103                public ColumnJoiner(final Separator separator, final Quote quote) {
1104                        this(separator, quote, ColumnIndexes.ALL);
1105                }
1106
1107                /**
1108                 * Create a new column joiner with the given parameters.
1109                 *
1110                 * @param separator the CSV separator character used by the joiner
1111                 * @throws NullPointerException if one of the parameters is {@code null}
1112                 */
1113                public ColumnJoiner(final Separator separator) {
1114                        this(separator, Quote.DEFAULT, ColumnIndexes.ALL);
1115                }
1116
1117                /**
1118                 * Create a new column joiner with the given parameters.
1119                 *
1120                 * @param separator the CSV separator character used by the joiner
1121                 * @param embedding the column indexes to join
1122                 * @throws NullPointerException if one of the parameters is {@code null}
1123                 */
1124                public ColumnJoiner(final Separator separator, final ColumnIndexes embedding) {
1125                        this(separator, Quote.DEFAULT, embedding);
1126                }
1127
1128
1129                /**
1130                 * Create a new column joiner with the given parameters.
1131                 *
1132                 * @param quote the CSV quote character used by the joiner
1133                 * @throws NullPointerException if one of the parameters is {@code null}
1134                 */
1135                public ColumnJoiner(final Quote quote) {
1136                        this(Separator.DEFAULT, quote, ColumnIndexes.ALL);
1137                }
1138
1139                /**
1140                 * Create a new column joiner with the given <em>embedding</em> column
1141                 * indexes.
1142                 *
1143                 * @param embedding the embedding column indexes
1144                 */
1145                public ColumnJoiner(final ColumnIndexes embedding) {
1146                        this(Separator.DEFAULT, Quote.DEFAULT, embedding);
1147                }
1148
1149                /**
1150                 * Create a new column joiner with the given parameters.
1151                 *
1152                 * @param quote the CSV quote character used by the joiner
1153                 * @param embedding the column indexes to join
1154                 * @throws NullPointerException if one of the parameters is {@code null}
1155                 */
1156                public ColumnJoiner(final Quote quote, final ColumnIndexes embedding) {
1157                        this(Separator.DEFAULT, quote, embedding);
1158                }
1159
1160                private static int max(int[] array) {
1161                        int max = Integer.MIN_VALUE;
1162                        for (int value : array) {
1163                                if (value > max) {
1164                                        max = value;
1165                                }
1166                        }
1167                        return max;
1168                }
1169
1170                /**
1171                 * Joins the given CSV {@code columns}, using the given separator and
1172                 * quote character.
1173                 *
1174                 * @param columns the CSV columns to join
1175                 * @return the joined CSV columns
1176                 */
1177                public String join(final Iterable<?> columns) {
1178                        if (param.embedding.length == 0) {
1179                                return join0(columns);
1180                        } else {
1181                                final var values = new Object[columnCount];
1182                                final var it = columns.iterator();
1183                                int i = 0;
1184                                while (it.hasNext() && i < param.embedding.length) {
1185                                        final var col = it.next();
1186                                        final var index = param.embedding[i++];
1187                                        if (index >= 0) {
1188                                                values[index] = col;
1189                                        }
1190                                }
1191
1192                                return join0(Arrays.asList(values));
1193                        }
1194                }
1195
1196                private String join0(final Iterable<?> cols) {
1197                        final var row = new StringBuilder();
1198                        final var it = cols.iterator();
1199                        while (it.hasNext()) {
1200                                final var column = it.next();
1201                                row.append(param.escape(column));
1202                                if (it.hasNext()) {
1203                                        row.append(param.separator);
1204                                }
1205                        }
1206
1207                        return row.toString();
1208                }
1209
1210                /**
1211                 * Joins the given CSV {@code columns}, using the given separator and
1212                 * quote character.
1213                 *
1214                 * @param columns the CSV columns to join
1215                 * @return the joined CSV columns
1216                 */
1217                public String join(final Object[] columns) {
1218                        return join(Arrays.asList(columns));
1219                }
1220        }
1221
1222        /**
1223         * Character source interface.
1224         *
1225         * @since 8.2
1226         * @version 8.2
1227         */
1228        sealed interface CharCursor {
1229                /**
1230                 * Return the next character or -1 if there is no one.
1231                 *
1232                 * @return the next character or -1 if there is no one
1233                 * @throws IOException if reading the next character failed
1234                 */
1235                int next() throws IOException;
1236
1237                /**
1238                 * Return the correct kind of {@code CharCursor}, depending on the
1239                 * given {@code readable} type
1240                 *
1241                 * @param readable the character source
1242                 * @return a new character cursor
1243                 */
1244                static CharCursor of(final Readable readable) {
1245                        return readable instanceof CharBuffer cb
1246                                ? new CharBufferCharCursor(cb)
1247                                : new ReadableCharCursor(readable);
1248                }
1249        }
1250
1251        /**
1252         * Cursor <em>view</em> on a readable object.
1253         *
1254         * @since 8.2
1255         * @version 8.2
1256         */
1257        static final class ReadableCharCursor implements CharCursor {
1258                private static final int SIZE = 1024;
1259                private final Readable readable;
1260                private final CharBuffer buffer;
1261
1262                ReadableCharCursor(final Readable readable) {
1263                        this.readable = requireNonNull(readable);
1264                        this.buffer = CharBuffer.allocate(SIZE).flip();
1265                }
1266
1267                @Override
1268                public int next() throws IOException {
1269                        if (!buffer.hasRemaining()) {
1270                                if (!fill()) {
1271                                        return -1;
1272                                }
1273                        }
1274
1275                        return buffer.get();
1276                }
1277
1278                private boolean fill() throws IOException {
1279                        int n;
1280                        int i = 0;
1281                        buffer.clear();
1282                        do {
1283                                n = readable.read(buffer);
1284                        } while (n == 0 && i++ < 1000); // Make sure re-read will terminate.
1285                        buffer.flip();
1286
1287                        return n > 0;
1288                }
1289        }
1290
1291        /**
1292         * Cursor <em>view</em> on a character buffer.
1293         *
1294         * @since 8.2
1295         * @version 8.2
1296         */
1297        static final class CharBufferCharCursor implements CharCursor {
1298                private final CharBuffer buffer;
1299
1300                CharBufferCharCursor(final CharBuffer buffer) {
1301                        this.buffer = requireNonNull(buffer);
1302                }
1303
1304                @Override
1305                public int next() {
1306                        if (!buffer.hasRemaining()) {
1307                                return -1;
1308                        }
1309                        return buffer.get();
1310                }
1311        }
1312
1313        /**
1314         * Allows appending chars in bulks to {@link StringBuilder}.
1315         *
1316         * @since 8.2
1317         * @version 8.2
1318         */
1319        static final class CharAppender {
1320                private static final int SIZE = 32;
1321
1322                private char[] buffer = new char[SIZE];
1323                private int index = 0;
1324
1325                CharAppender() {
1326                }
1327
1328                boolean nonEmpty() {
1329                        return index != 0;
1330                }
1331
1332                void append(final char c) {
1333                        if (index == buffer.length) {
1334                                increaseSize(buffer.length*2);
1335                        }
1336
1337                        buffer[index++] = c;
1338                }
1339
1340                @Override
1341                public String toString() {
1342                        return String.valueOf(buffer, 0, index);
1343                }
1344
1345                void reset() {
1346                        index = 0;
1347                }
1348
1349                private void increaseSize(final int newSize) {
1350                        final char[] newBuffer = new char[newSize];
1351                        System.arraycopy(buffer, 0, newBuffer, 0, index);
1352                        buffer = newBuffer;
1353                }
1354        }
1355
1356        /**
1357         * Simple growing list of strings.
1358         *
1359         * @since 8.2
1360         * @version 8.2
1361         */
1362        static final class StringList {
1363                private static final int SIZE = 16;
1364                private String[] elements;
1365                private int size;
1366
1367                StringList() {
1368                        size = 0;
1369                        elements = new String[SIZE];
1370                }
1371
1372                public int size() {
1373                        return size;
1374                }
1375
1376                public void add(final String value) {
1377                        if (size == elements.length) {
1378                                increaseSize(elements.length*2);
1379                        }
1380                        elements[size++] = value;
1381                }
1382
1383                public void set(final int index, final String value) {
1384                        elements[index] = value;
1385                }
1386
1387                public void clear() {
1388                        size = 0;
1389                }
1390
1391                public String[] toArray() {
1392                        final var result = new String[size];
1393                        System.arraycopy(elements, 0, result, 0, size);
1394                        return result;
1395                }
1396
1397                private void increaseSize(final int newSize) {
1398                        final String[] newElements = new String[newSize];
1399                        System.arraycopy(elements, 0, newElements, 0, size);
1400                        elements = newElements;
1401                }
1402
1403        }
1404
1405}
1406
1407