001/*
002 * Java Genetic Algorithm Library (jenetics-8.2.0).
003 * Copyright (c) 2007-2025 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.ext.util;
021
022import static java.util.Objects.requireNonNull;
023
024import java.io.IOException;
025import java.io.UncheckedIOException;
026import java.nio.CharBuffer;
027import java.util.Arrays;
028import java.util.List;
029import java.util.Objects;
030import java.util.function.Function;
031import java.util.function.Supplier;
032import java.util.stream.Collector;
033import java.util.stream.Collectors;
034import java.util.stream.Stream;
035
036import io.jenetics.internal.util.Lifecycle.IOValue;
037
038/**
039 * This class contains helper classes, which are the building blocks for handling
040 * CSV files.
041 * <ul>
042 *     <li>{@link LineReader}: This class allows you to read the lines of a
043 *     CSV file. The result will be a {@link Stream} of CSV lines and are
044 *     not split.</li>
045 *     <li>{@link LineSplitter}: This class is responsible for splitting one
046 *     CSV line into column values.</li>
047 *     <li>{@link ColumnIndexes}: Allows to define the projection/embedding of
048 *     the split/joined column values.</li>
049 *     <li>{@link ColumnJoiner}: Joining a column array into a CSV line, which
050 *     can be joined into a whole CSV string.</li>
051 * </ul>
052 * <p>
053 * Additionally, this class contains a set of helper methods for CSV handling
054 * using default configurations.
055 * <p>
056 * <b>Reading and splitting CSV lines</b>
057 * {@snippet class="Snippets" region="readRows"}
058 * <p>
059 * <b>Joining columns and creating CSV string</b>
060 * {@snippet class="Snippets" region="CsvSupportSnippets.collect"}
061 * <p>
062 * <b>Parsing CSV string</b>
063 * {@snippet class="Snippets" region="parseCsv"}
064 * <p>
065 * <b>Parsing double values, given as CSV string</b>
066 * <p>
067 * Another example is to parse double values, which are given as CSV string and
068 * use this data for running a regression analysis.
069 * {@snippet class="Snippets" region="DoublesParsingSnippets.parseDoubles"}
070 *
071 * @see <a href="https://tools.ietf.org/html/rfc4180">RFC-4180</a>
072 *
073 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
074 * @version 8.2
075 * @since 8.1
076 */
077public final class CsvSupport {
078
079        /**
080         * Holds the CSV column <em>separator</em> character.
081         *
082         * @param value the separator character
083         *
084         * @version 8.1
085         * @since 8.1
086         */
087        public record Separator(char value) {
088
089                /**
090                 * The default separator character, '{@code ,}'.
091                 */
092                public static final Separator DEFAULT = new Separator(',');
093
094                /**
095                 * Creates a new Separator char object.
096                 *
097                 * @param value the separator character
098                 * @throws IllegalArgumentException if the given separator character is
099                 *         a line break character
100                 */
101                public Separator {
102                        if (isLineBreak(value)) {
103                                throw new IllegalArgumentException(
104                                        "Given separator char is a line break character."
105                                );
106                        }
107                }
108        }
109
110        /**
111         * Holds the CSV column <em>quote</em> character. The following excerpt from
112         * <a href="https://tools.ietf.org/html/rfc4180">RFC-4180</a> defines when
113         * a quote character has to be used.
114         * <pre>
115         *     5.  Each field may or may not be enclosed in double quotes (however
116         *         some programs, such as Microsoft Excel, do not use double quotes
117         *         at all).  If fields are not enclosed with double quotes, then
118         *         double quotes may not appear inside the fields.  For example:
119         *
120         *         "aaa","bbb","ccc" CRLF
121         *         zzz,yyy,xxx
122         *
123         *     6.  Fields containing line breaks (CRLF), double quotes, and commas
124         *         should be enclosed in double-quotes.  For example:
125         *
126         *         "aaa","b CRLF
127         *         bb","ccc" CRLF
128         *         zzz,yyy,xxx
129         *
130         *     7.  If double-quotes are used to enclose fields, then a double-quote
131         *         appearing inside a field must be escaped by preceding it with
132         *         another double quote.  For example:
133         *
134         *         "aaa","b""bb","ccc"
135         * </pre>
136         *
137         * @param value the quote character
138         *
139         * @version 8.1
140         * @since 8.1
141         */
142        public record Quote(char value) {
143
144                /**
145                 * The default quote character, '{@code "}'.
146                 */
147                public static final Quote DEFAULT = new Quote('"');
148
149                /**
150                 * The zero '\0' character.
151                 */
152                public static final Quote ZERO = new Quote('\0');
153
154                /**
155                 * Creates a new Quote char object.
156                 *
157                 * @param value the quote character
158                 * @throws IllegalArgumentException if the given quote character is
159                 *         a line break character
160                 */
161                public Quote {
162                        if (isLineBreak(value)) {
163                                throw new IllegalArgumentException(
164                                        "Given quote char is a line break character."
165                                );
166                        }
167                }
168        }
169
170        /**
171         * Holds the column indexes, which should be part of the split or join
172         * operation. When used in the {@link LineSplitter}, it lets you filter the
173         * split column and define its order. When used in the {@link ColumnJoiner},
174         * it can be used to define the column index in the resulting CSV for a
175         * given row array.
176         *
177         * @apiNote
178         * The column indexes is <em>thread-safe</em> and can be shared between
179         * different threads.
180         *
181         * @see LineSplitter
182         * @see ColumnJoiner
183         *
184         * @param values the column indexes which are part of the split result
185         *
186         * @version 8.1
187         * @since 8.1
188         */
189        public record ColumnIndexes(int... values) {
190
191                /**
192                 * Indicating that <em>all</em> columns should be part of the split
193                 * result.
194                 */
195                public static final ColumnIndexes ALL = new ColumnIndexes();
196
197                /**
198                 * Create a new column indexes object.
199                 *
200                 * @param values the column indexes
201                 */
202                public ColumnIndexes {
203                        values = values.clone();
204                }
205
206                @Override
207                public int[] values() {
208                        return values.clone();
209                }
210
211                @Override
212                public int hashCode() {
213                        return Arrays.hashCode(values);
214                }
215
216                @Override
217                public boolean equals(final Object obj) {
218                        return obj == this ||
219                                obj instanceof ColumnIndexes ci &&
220                                Arrays.equals(values, ci.values);
221                }
222
223                @Override
224                public String toString() {
225                        return Arrays.toString(values);
226                }
227        }
228
229        /**
230         * The newline string used for writing the CSV file: {@code \r\n}.
231         */
232        public static final String EOL = "\r\n";
233
234
235        private CsvSupport() {
236        }
237
238        private static boolean isLineBreak(final char c) {
239                return switch (c) {
240                        case '\n', '\r' -> true;
241                        default -> false;
242                };
243        }
244
245        /**
246         * Splits the CSV file, given by the {@code reader}, into a  {@link Stream}
247         * of CSV lines. The CSV is split at line breaks, as long as they are not
248         * part of a quoted column. For reading the CSV lines, the default quote
249         * character, {@link Quote#DEFAULT}, is used.
250         *
251         * @apiNote
252         * The returned stream must be closed by the caller, which also closes the
253         * CSV {@code reader}.
254         *
255         * @see #readAllLines(Readable)
256         *
257         * @param reader the CSV source reader. The reader is automatically closed
258         *        when the returned line stream is closed.
259         * @return the stream of CSV lines
260         * @throws NullPointerException if the given {@code reader} is {@code null}
261         */
262        public static Stream<String> lines(final Readable reader) {
263                return LineReader.DEFAULT.read(reader);
264        }
265
266        /**
267         * Splits the CSV file, given by the {@code reader}, into a  {@code Stream}
268         * of CSV rows. The CSV is split at line breaks, as long as they are not
269         * part of a quoted column. For reading the CSV lines, the default quote
270         * character, {@link Quote#DEFAULT}, is used. Then each line is split into
271         * its columns using the default separator character.
272         *
273         * @apiNote
274         * The returned stream must be closed by the caller, which also closes the
275         * CSV {@code reader}.
276         *
277         * @see #readAllRows(Readable)
278         *
279         * @param reader the CSV source reader. The reader is automatically closed
280         *        when the returned line stream is closed.
281         * @return the stream of CSV rows
282         * @throws NullPointerException if the given {@code reader} is {@code null}
283         */
284        public static Stream<String[]> rows(final Readable reader) {
285                final var splitter = new LineSplitter();
286                return lines(reader).map(splitter::split);
287        }
288
289        /**
290         * Splits the CSV file, given by the {@code reader}, into a  {@code List}
291         * of CSV lines. The CSV is split at line breaks, as long as they are not
292         * part of a quoted column. For reading the CSV lines, the default quote
293         * character, {@link Quote#DEFAULT}, is used.
294         *
295         * @see #lines(Readable)
296         *
297         * @param reader the reader stream to split into CSV lines
298         * @return the list of CSV lines
299         * @throws NullPointerException if the given {@code reader} is {@code null}
300         * @throws IOException if reading the CSV lines fails
301         */
302        public static List<String> readAllLines(final Readable reader)
303                throws IOException
304        {
305                try (var lines = lines(reader)) {
306                        return lines.toList();
307                } catch (UncheckedIOException e) {
308                        throw e.getCause();
309                }
310        }
311
312        /**
313         * Splits the CSV file, given by the {@code reader}, into a  {@code List}
314         * of CSV lines. The CSV is split at line breaks, as long as they are not
315         * part of a quoted column. For reading the CSV lines, the default quote
316         * character, {@link Quote#DEFAULT}, is used. Then each line is split into
317         * its columns using the default separator character.
318         *
319         * @see #rows(Readable)
320         *
321         * @param reader the reader stream to split into CSV lines
322         * @return the list of CSV rows
323         * @throws NullPointerException if the given {@code reader} is {@code null}
324         * @throws IOException if reading the CSV lines fails
325         */
326        public static List<String[]> readAllRows(final Readable reader)
327                throws IOException
328        {
329                try (var rows = rows(reader)) {
330                        return rows.toList();
331                } catch (UncheckedIOException e) {
332                        throw e.getCause();
333                }
334        }
335
336        /**
337         * Parses the given CSV string into a list of <em>records</em>. The records
338         * are created from a <em>row</em> ({@code String[]} array) by applying the
339         * given {@code mapper}.
340         *
341         * @param csv the CSV string to parse
342         * @param mapper the record mapper
343         * @return the parsed record list
344         * @param <T> the record type
345         */
346        public static <T> List<T> parse(
347                final CharSequence csv,
348                final Function<? super String[], ? extends T> mapper
349        ) {
350                requireNonNull(csv);
351                requireNonNull(mapper);
352
353                try (var rows = rows(CharBuffer.wrap(csv))) {
354                        return rows
355                                .map(mapper)
356                                .collect(Collectors.toUnmodifiableList());
357                }
358        }
359
360        /**
361         * Parses the given CSV string into a list of rows.
362         *
363         * @param csv the CSV string to parse
364         * @return the parsed CSV rows
365         */
366        public static List<String[]> parse(final CharSequence csv) {
367                return parse(csv, Function.identity());
368        }
369
370        /**
371         * Parses the given CSV string into a list of {@code double[]} array rows.
372         *
373         * @param csv the CSV string to parse
374         * @return the parsed double data
375         */
376        public static List<double[]> parseDoubles(final CharSequence csv) {
377                return parse(csv, CsvSupport::toDoubles);
378        }
379
380        private static double[] toDoubles(final String[] values) {
381                final var result = new double[values.length];
382                for (int i = 0; i < result.length; ++i) {
383                        result[i] = Double.parseDouble(values[i].trim());
384                }
385                return result;
386        }
387
388        /**
389         * Splits a given CSV {@code line} into columns. The default values for the
390         * separator and quote character are used ({@link Separator#DEFAULT},
391         * {@link Quote#DEFAULT}) for splitting the line.
392         *
393         * @param line the CSV line to split
394         * @return the split CSV lines
395         * @throws NullPointerException if the given {@code line} is {@code null}
396         */
397        public static String[] split(final CharSequence line) {
398                return new LineSplitter().split(line);
399        }
400
401        /**
402         * Joins the given CSV {@code columns} to one CSV line. The default values
403         * for the separator and quote character are used ({@link Separator#DEFAULT},
404         * {@link Quote#DEFAULT}) for joining the columns.
405         *
406         * @see #join(Object[])
407         *
408         * @param columns the CSV columns to join
409         * @return the CSV line, joined from the given {@code columns}
410         * @throws NullPointerException if the given {@code columns} is {@code null}
411         */
412        public static String join(final Iterable<?> columns) {
413                return ColumnJoiner.DEFAULT.join(columns);
414        }
415
416        /**
417         * Joins the given CSV {@code columns} to one CSV line. The default values
418         * for the separator and quote character are used ({@link Separator#DEFAULT},
419         * {@link Quote#DEFAULT}) for joining the columns.
420         *
421         * @see #join(Iterable)
422         *
423         * @param columns the CSV columns to join
424         * @return the CSV line, joined from the given {@code columns}
425         * @throws NullPointerException if the given {@code columns} is {@code null}
426         */
427        public static String join(final Object[] columns) {
428                return ColumnJoiner.DEFAULT.join(columns);
429        }
430
431        /**
432         * Joins the given CSV {@code columns} to one CSV line. The default values
433         * for the separator and quote character are used ({@link Separator#DEFAULT},
434         * {@link Quote#DEFAULT}) for joining the columns.
435         *
436         * @see #join(Iterable)
437         * @see #join(Object[])
438         *
439         * @param columns the CSV columns to join
440         * @return the CSV line, joined from the given {@code columns}
441         * @throws NullPointerException if the given {@code columns} is {@code null}
442         */
443        public static String join(final String... columns) {
444                return ColumnJoiner.DEFAULT.join(columns);
445        }
446
447        /**
448         * Converts the given {@code record} into its components.
449         *
450         * @param record the record to convert
451         * @return the record components
452         */
453        public static Object[] toComponents(final Record record) {
454                try {
455                        final var components = record.getClass().getRecordComponents();
456                        final var elements = new Object[components.length];
457                        for (int i = 0; i < elements.length; ++i) {
458                                elements[i] = components[i].getAccessor().invoke(record);
459                        }
460
461                        return elements;
462                } catch (ReflectiveOperationException e) {
463                        throw new IllegalArgumentException(e);
464                }
465        }
466
467        /**
468         * Return a collector for joining a list of CSV rows into one CSV string.
469         *
470         * @return a collector for joining a list of CSV rows into one CSV string
471         */
472        public static Collector<CharSequence, ?, String> toCsv() {
473                return toCsv(EOL);
474        }
475
476        /**
477         * Return a collector for joining a list of CSV rows into one CSV string.
478         * For the line breaks, the given {@code eol} sequence is used.
479         *
480         * @param eol the end of line sequence used for line breaks
481         * @return a collector for joining a list of CSV rows into one CSV string
482         */
483        public static Collector<CharSequence, ?, String> toCsv(String eol) {
484                if (eol.isEmpty()) {
485                        throw new IllegalArgumentException("EOL must not be empty.");
486                }
487                for (int i = 0; i < eol.length(); ++i) {
488                        if (!isLineBreak(eol.charAt(i))) {
489                                throw new IllegalArgumentException(
490                                        "EOl contains non-linebreak char: '%s'.".formatted(eol)
491                                );
492                        }
493                }
494
495                return Collectors.joining(eol, "", eol);
496        }
497
498
499        /* *************************************************************************
500         * Base CSV classes.
501         * ************************************************************************/
502
503        /**
504         * This class reads CSV files and splits it into lines. It takes a quote
505         * character as a parameter, which is necessary for not splitting on quoted
506         * line feeds.
507         * {@snippet lang="java":
508         * final var csv = """
509         *     0.0,0.0000
510         *     0.1,0.0740
511         *     0.2,0.1120
512         *     0.3,0.1380
513         *     0.4,0.1760
514         *     0.5,0.2500
515         *     0.6,0.3840
516         *     0.7,0.6020
517         *     0.8,0.9280
518         *     0.9,1.3860
519         *     1.0,2.0000
520         *     """;
521         *
522         * final var reader = new LineReader(new Quote('"'));
523         * try (Stream<String> lines = reader.read(CharBuffer.wrap(csv))) {
524         *     lines.forEach(System.out::println);
525         * }
526         * }
527         *
528         * @apiNote
529         * This reader obeys <em>escaped</em> line breaks according
530         * <a href="https://tools.ietf.org/html/rfc4180">RFC-4180</a>. It is
531         * thread-safe and can be shared between different reading threads.
532         *
533         * @version 8.1
534         * @since 8.1
535         */
536        public static final class LineReader {
537
538                private static final LineReader DEFAULT = new LineReader(Quote.DEFAULT);
539
540                private final Quote quote;
541
542                /**
543                 * Create a new line-reader with the given {@code quote} character,
544                 * which is used in the CSV file which is read.
545                 *
546                 * @param quote the quoting character
547                 * @throws NullPointerException if the {@code quote} character is
548                 *         {@code null}
549                 */
550                public LineReader(final Quote quote) {
551                        this.quote = requireNonNull(quote);
552                }
553
554                /**
555                 * Create a new line reader with default quote character {@code '"'}
556                 * ({@link Quote#DEFAULT}).
557                 */
558                public LineReader() {
559                        this(Quote.DEFAULT);
560                }
561
562                /**
563                 * Reads all CSV lines from the given {@code reader}.
564                 *
565                 * @apiNote
566                 * This method must be used within a try-with-resources statement or
567                 * similar control structure to ensure that the stream's open file is
568                 * closed promptly after the stream's operations have completed.
569                 *
570                 * @param readable the readable from which to read the CSV content
571                 * @return the CSV lines from the file as a {@code Stream}
572                 */
573                public Stream<String> read(final Readable readable) {
574                        requireNonNull(readable);
575
576                        final IOValue<Stream<String>> result = new IOValue<>(resources -> {
577                                final Readable rdr = resources.use(
578                                        readable,
579                                        resource -> {
580                                                if (resource instanceof AutoCloseable closeable) {
581                                                        try {
582                                                                closeable.close();
583                                                        } catch (IOException | RuntimeException | Error e) {
584                                                                throw e;
585                                                        } catch (Exception e) {
586                                                                throw new IOException(e);
587                                                        }
588                                                }
589                                        }
590                                );
591
592                                final var source = CharCursor.of(rdr);
593                                final var line = new CharAppender();
594
595                                final Supplier<String> nextLine = () -> {
596                                        line.reset();
597                                        try {
598                                                return nextLine(source, line) ? line.toString() : null;
599                                        } catch (IOException e) {
600                                                throw new UncheckedIOException(e);
601                                        }
602                                };
603
604                                return Stream.generate(nextLine)
605                                        .takeWhile(Objects::nonNull);
606                        });
607
608                        return result.get().onClose(() ->
609                                result.release(UncheckedIOException::new)
610                        );
611                }
612
613                private boolean nextLine(final CharCursor chars, final CharAppender line)
614                        throws IOException
615                {
616                        boolean quoted = false;
617                        boolean escaped = false;
618                        boolean eol = false;
619
620                        int next = -2;
621                        int i = 0;
622
623                        while (next >= 0 || (i = chars.next()) != -1) {
624                                final char current = next != -2 ? (char)next : (char)i;
625                                next = -2;
626
627                                if (isLineBreak(current)) {
628                                        if (quoted) {
629                                                line.append(current);
630                                        } else {
631                                                eol = true;
632                                        }
633                                } else if (current == quote.value) {
634                                        if (quoted) {
635                                                if (!escaped && (next = chars.next()) == quote.value) {
636                                                        escaped = true;
637                                                } else {
638                                                        if (escaped) {
639                                                                escaped = false;
640                                                        } else {
641                                                                quoted = false;
642                                                        }
643                                                }
644                                        } else {
645                                                quoted = true;
646                                        }
647                                        line.append(current);
648                                } else {
649                                        line.append(current);
650                                }
651
652                                if (eol) {
653                                        eol = false;
654                                        if (line.nonEmpty()) {
655                                                return true;
656                                        }
657                                }
658                        }
659
660                        if (quoted) {
661                                throw new IllegalArgumentException(
662                                        "Unbalanced quote character: '%s'."
663                                                .formatted(toString(line))
664                                );
665                        }
666                        return line.nonEmpty();
667                }
668
669                private static String toString(final Object value) {
670                        final var line = value.toString();
671                        return line.length() > 15 ? line.substring(0, 15) + "..." : line;
672                }
673        }
674
675        /**
676         * Splitting a CSV line into columns (records).
677         * <h2>Examples</h2>
678         * <b>Simple usage</b>
679         * {@snippet class="Snippets" region="LineSplitterSnippets.simpleSplit"}
680         *
681         * <b>Projecting and re-ordering columns</b>
682         * {@snippet class="Snippets" region="LineSplitterSnippets.projectingSplit"}
683         *
684         * @implNote
685         * The split {@code String[]} array will never contain {@code null} values.
686         * Empty columns will be returned as empty strings.
687         *
688         * @apiNote
689         * A line splitter ist <b>not</b> thread-safe and can't be shared between
690         * different threads.
691         *
692         * @version 8.1
693         * @since 8.1
694         */
695        public static final class LineSplitter {
696                private final Separator separator;
697                private final Quote quote;
698
699                private final ColumnList columns;
700                private final CharAppender column = new CharAppender();
701
702                /**
703                 * Create a new line splitter with the given parameters.
704                 *
705                 * @param separator the separator character used by the CSV line to split
706                 * @param quote the quote character used by the CSV line to split
707                 * @param projection the column indexes which should be part of the split
708                 *        result
709                 * @throws NullPointerException if one of the parameters is {@code null}
710                 */
711                public LineSplitter(
712                        final Separator separator,
713                        final Quote quote,
714                        final ColumnIndexes projection
715                ) {
716                        if (separator.value == quote.value) {
717                                throw new IllegalArgumentException(
718                                        "Separator and quote char must be different: %s == %s."
719                                                .formatted(separator.value, quote.value)
720                                );
721                        }
722
723                        this.separator = separator;
724                        this.quote = quote;
725                        this.columns = new ColumnList(projection);
726                }
727
728                /**
729                 * Create a new line splitter with the given parameters.
730                 *
731                 * @param separator the separator character used by the CSV line to split
732                 * @param quote the quote character used by the CSV line to split
733                 * @throws NullPointerException if one of the parameters is {@code null}
734                 */
735                public LineSplitter(final Separator separator, final Quote quote) {
736                        this(separator, quote, ColumnIndexes.ALL);
737                }
738
739                /**
740                 * Create a new line splitter with the given parameters. The default
741                 * quote character, {@link Quote#DEFAULT}, will be used by the created
742                 * splitter.
743                 *
744                 * @param separator the separator character used by the CSV line to split
745                 * @throws NullPointerException if one of the parameters is {@code null}
746                 */
747                public LineSplitter(final Separator separator) {
748                        this(separator, Quote.DEFAULT, ColumnIndexes.ALL);
749                }
750
751                /**
752                 * Create a new line splitter with the given parameters. The default
753                 * separator character, {@link Separator#DEFAULT}, will be used by the
754                 * created splitter.
755                 *
756                 * @param quote the quote character used by the CSV line to split
757                 * @throws NullPointerException if one of the parameters is {@code null}
758                 */
759                public LineSplitter(final Quote quote) {
760                        this(Separator.DEFAULT, quote, ColumnIndexes.ALL);
761                }
762
763                /**
764                 * Create a new line splitter with the given parameters. Only the defined
765                 * columns will be part of the split result and the default separator
766                 * character, {@link Separator#DEFAULT}, and default quote character,
767                 * {@link Quote#DEFAULT}, is used by the created splitter.
768                 *
769                 * @param projection the column indexes which should be part of the split
770                 *        result
771                 * @throws NullPointerException if one of the parameters is {@code null}
772                 */
773                public LineSplitter(final ColumnIndexes projection) {
774                        this(Separator.DEFAULT, Quote.DEFAULT, projection);
775                }
776
777                /**
778                 * Create a new line splitter with default values.
779                 */
780                public LineSplitter() {
781                        this(Separator.DEFAULT, Quote.DEFAULT, ColumnIndexes.ALL);
782                }
783
784                /**
785                 * Splitting the given CSV {@code line} into its columns.
786                 *
787                 * @implNote
788                 * The split {@code String[]} array will never contain {@code null} values.
789                 * Empty columns will be returned as empty strings.
790                 *
791                 * @param line the CSV line to split
792                 * @return the split CSV columns
793                 * @throws NullPointerException if the CSV {@code line} is {@code null}
794                 */
795                public String[] split(final CharSequence line) {
796                        columns.clear();
797                        column.reset();
798
799                        boolean quoted = false;
800                        boolean escaped = false;
801                        boolean full = false;
802
803                        int quoteIndex = 0;
804
805                        for (int i = 0, n = line.length(); i < n && !full; ++i) {
806                                final int previous = i > 0 ? line.charAt(i - 1) : -1;
807                                final char current = line.charAt(i);
808                                final int next = i + 1 < line.length() ? line.charAt(i + 1) : -1;
809
810                                if (current == quote.value) {
811                                        if (quoted) {
812                                                if (!escaped && quote.value == next) {
813                                                        escaped = true;
814                                                } else {
815                                                        if (escaped) {
816                                                                column.append(quote.value);
817                                                                escaped = false;
818                                                        } else {
819                                                                if (next != -1 && separator.value != next) {
820                                                                        throw new IllegalArgumentException("""
821                                                                                Only separator character, '%s', allowed \
822                                                                                after quote, but found '%c':
823                                                                                %s
824                                                                                """.formatted(
825                                                                                        separator.value,
826                                                                                next,
827                                                                                        toErrorDesc(line, i + 1)
828                                                                                )
829                                                                        );
830                                                                }
831
832                                                                add(column);
833                                                                full = columns.isFull();
834                                                                quoted = false;
835                                                        }
836                                                }
837                                        } else {
838                                                if (previous != -1 && separator.value != previous) {
839                                                        throw new IllegalArgumentException("""
840                                                                Only separator character, '%s', allowed before \
841                                                                quote, but found '%c':
842                                                                %s
843                                                                """.formatted(
844                                                                        separator.value,
845                                                                previous,
846                                                                        toErrorDesc(line, Math.max(i - 1, 0))
847                                                                )
848                                                        );
849                                                }
850
851                                                quoted = true;
852                                                quoteIndex = i;
853                                        }
854                                } else if (current == separator.value) {
855                                        if (quoted) {
856                                                column.append(current);
857                                        } else if (separator.value == previous || previous == -1) {
858                                                add(column);
859                                                full = columns.isFull();
860                                        }
861                                } else {
862                                        // Read till the next token separator.
863                                        int j = i;
864                                        char c;
865                                        while (j < line.length() && !isTokenSeparator(c = line.charAt(j))) {
866                                                column.append(c);
867                                                ++j;
868                                        }
869                                        if (j != i - 1) {
870                                                i = j - 1;
871                                        }
872
873                                        if (!quoted) {
874                                                add(column);
875                                                full = columns.isFull();
876                                        }
877                                }
878                        }
879
880                        if (quoted) {
881                                throw new IllegalArgumentException("""
882                                        Unbalanced quote character.
883                                        %s
884                                        """.formatted(toErrorDesc(line, quoteIndex))
885                                );
886                        }
887                        if (line.isEmpty() ||
888                                separator.value == line.charAt(line.length() - 1))
889                        {
890                                add(column);
891                        }
892
893                        return columns.toArray();
894                }
895
896                private void add(final CharAppender column) {
897                        columns.add(column.toString());
898                        column.reset();
899                }
900
901                private boolean isTokenSeparator(final char c) {
902                        return c == separator.value || c == quote.value;
903                }
904
905                private static String toErrorDesc(final CharSequence line, final int pos) {
906                        return """
907                                %s
908                                %s
909                                """.formatted(
910                                        line.toString().stripTrailing(),
911                                        " ".repeat(pos) + "^"
912                                );
913                }
914        }
915
916
917        /**
918         * Column collection, which is backed up by a string list.
919         */
920        static final class ColumnList {
921                private final StringList columns = new StringList();
922                private final ColumnIndexes projection;
923
924                private int index = 0;
925                private int count = 0;
926
927                ColumnList(final ColumnIndexes projection) {
928                        this.projection = requireNonNull(projection);
929                }
930
931                /**
932                 * Appends a {@code column} to the column collection.
933                 *
934                 * @param column the column to add
935                 */
936                void add(String column) {
937                        if (!isFull()) {
938                                count += set(column, index++);
939                        }
940                }
941
942                private int set(String element, int column) {
943                        int updated = 0;
944
945                        if (projection.values.length == 0) {
946                                columns.add(element);
947                                ++updated;
948                        } else {
949                                int pos = -1;
950                                while ((pos = indexOf(projection.values, pos + 1, column)) != -1) {
951                                        for (int i = columns.size(); i <= pos; ++i) {
952                                                columns.add(null);
953                                        }
954                                        columns.set(pos, element);
955                                        ++updated;
956                                }
957                        }
958
959                        return updated;
960                }
961
962                private static int indexOf(int[] array, int start, int value) {
963                        for (int i = start; i < array.length; ++i) {
964                                if (array[i] == value) {
965                                        return i;
966                                }
967                        }
968
969                        return -1;
970                }
971
972                /**
973                 * Checks whether another column can be added.
974                 *
975                 * @return {@code true} if another column can be added to this
976                 *         collection, {@code false} otherwise
977                 */
978                boolean isFull() {
979                        return
980                                projection.values.length > 0 &&
981                                projection.values.length <= count;
982                }
983
984                /**
985                 * Removes all columns.
986                 */
987                public void clear() {
988                        columns.clear();
989                        index = 0;
990                        count = 0;
991                }
992
993                String[] toArray() {
994                        for (int i = columns.size(); i < projection.values.length; ++i) {
995                                columns.add(null);
996                        }
997                        return columns.toArray();
998                }
999
1000        }
1001
1002        /**
1003         * This class joins an array of columns into one CSV line.
1004         *
1005         * <h2>Examples</h2>
1006         * <b>Simple usage</b>
1007         * {@snippet class="Snippets" region="ColumnJoinerSnippets.simpleJoin"}
1008         *
1009         * <b>Embedding and re-ordering data</b>
1010         * {@snippet class="Snippets" region="ColumnJoinerSnippets.embedToCsv"}
1011         *
1012         * @apiNote
1013         * The column joiner is <em>thread-safe</em> and can be shared between
1014         * different threads.
1015         *
1016         * @version 8.1
1017         * @since 8.1
1018         */
1019        public static final class ColumnJoiner {
1020
1021                /**
1022                 * Default column joiner, which is using default separator character,
1023                 * {@link Separator#DEFAULT}, and default quote character,
1024                 * {@link Quote#DEFAULT}.
1025                 */
1026                public static final ColumnJoiner DEFAULT = new ColumnJoiner(
1027                        Separator.DEFAULT,
1028                        Quote.DEFAULT,
1029                        ColumnIndexes.ALL
1030                );
1031
1032                /**
1033                 * The CSV line splitter parameter.
1034                 *
1035                 * @param separator the column separator char
1036                 * @param quote the qute char
1037                 * @param embedding the column indices to read. If empty, all split
1038                 *        columns are used.
1039                 */
1040                private record Param(char separator, char quote, int... embedding) {
1041
1042                        private String escape(Object value) {
1043                                final var quoteStr = String.valueOf(quote);
1044
1045                                if (value == null) {
1046                                        return "";
1047                                } else {
1048                                        var stringValue = value.toString();
1049                                        var string = stringValue.replace(quoteStr, quoteStr + quoteStr);
1050
1051                                        if (stringValue.length() != string.length() || mustEscape(string)) {
1052                                                return quoteStr + string + quoteStr;
1053                                        } else {
1054                                                return stringValue;
1055                                        }
1056                                }
1057                        }
1058
1059                        private boolean mustEscape(CharSequence value) {
1060                                for (int i = 0; i < value.length(); ++i) {
1061                                        final char c = value.charAt(i);
1062                                        if (c == separator || isLineBreak(c)) {
1063                                                return true;
1064                                        }
1065                                }
1066                                return false;
1067                        }
1068                }
1069
1070                private final Param param;
1071                private final int columnCount;
1072
1073                /**
1074                 * Create a new column joiner with the given parameters.
1075                 *
1076                 * @param separator the CSV separator character used by the joiner
1077                 * @param quote the CSV quote character used by the joiner
1078                 * @param embedding the column indexes to join
1079                 * @throws NullPointerException if one of the parameters is {@code null}
1080                 */
1081                public ColumnJoiner(
1082                        final Separator separator,
1083                        final Quote quote,
1084                        final ColumnIndexes embedding
1085                ) {
1086                        if (separator.value == quote.value) {
1087                                throw new IllegalArgumentException(
1088                                        "Separator and quote char must be different: %s == %s."
1089                                                .formatted(separator.value, quote.value)
1090                                );
1091                        }
1092
1093                        param = new Param(separator.value, quote.value, embedding.values);
1094                        columnCount = Math.max(max(param.embedding) + 1, 0);
1095                }
1096
1097                /**
1098                 * Create a new column joiner with the given parameters.
1099                 *
1100                 * @param separator the CSV separator character used by the joiner
1101                 * @param quote the CSV quote character used by the joiner
1102                 * @throws NullPointerException if one of the parameters is {@code null}
1103                 */
1104                public ColumnJoiner(final Separator separator, final Quote quote) {
1105                        this(separator, quote, ColumnIndexes.ALL);
1106                }
1107
1108                /**
1109                 * Create a new column joiner with the given parameters.
1110                 *
1111                 * @param separator the CSV separator character used by the joiner
1112                 * @throws NullPointerException if one of the parameters is {@code null}
1113                 */
1114                public ColumnJoiner(final Separator separator) {
1115                        this(separator, Quote.DEFAULT, ColumnIndexes.ALL);
1116                }
1117
1118                /**
1119                 * Create a new column joiner with the given parameters.
1120                 *
1121                 * @param separator the CSV separator character used by the joiner
1122                 * @param embedding the column indexes to join
1123                 * @throws NullPointerException if one of the parameters is {@code null}
1124                 */
1125                public ColumnJoiner(final Separator separator, final ColumnIndexes embedding) {
1126                        this(separator, Quote.DEFAULT, embedding);
1127                }
1128
1129
1130                /**
1131                 * Create a new column joiner with the given parameters.
1132                 *
1133                 * @param quote the CSV quote character used by the joiner
1134                 * @throws NullPointerException if one of the parameters is {@code null}
1135                 */
1136                public ColumnJoiner(final Quote quote) {
1137                        this(Separator.DEFAULT, quote, ColumnIndexes.ALL);
1138                }
1139
1140                /**
1141                 * Create a new column joiner with the given <em>embedding</em> column
1142                 * indexes.
1143                 *
1144                 * @param embedding the embedding column indexes
1145                 */
1146                public ColumnJoiner(final ColumnIndexes embedding) {
1147                        this(Separator.DEFAULT, Quote.DEFAULT, embedding);
1148                }
1149
1150                /**
1151                 * Create a new column joiner with the given parameters.
1152                 *
1153                 * @param quote the CSV quote character used by the joiner
1154                 * @param embedding the column indexes to join
1155                 * @throws NullPointerException if one of the parameters is {@code null}
1156                 */
1157                public ColumnJoiner(final Quote quote, final ColumnIndexes embedding) {
1158                        this(Separator.DEFAULT, quote, embedding);
1159                }
1160
1161                private static int max(int[] array) {
1162                        int max = Integer.MIN_VALUE;
1163                        for (int value : array) {
1164                                if (value > max) {
1165                                        max = value;
1166                                }
1167                        }
1168                        return max;
1169                }
1170
1171                /**
1172                 * Joins the given CSV {@code columns}, using the given separator and
1173                 * quote character.
1174                 *
1175                 * @param columns the CSV columns to join
1176                 * @return the joined CSV columns
1177                 */
1178                public String join(final Iterable<?> columns) {
1179                        if (param.embedding.length == 0) {
1180                                return join0(columns);
1181                        } else {
1182                                final var values = new Object[columnCount];
1183                                final var it = columns.iterator();
1184                                int i = 0;
1185                                while (it.hasNext() && i < param.embedding.length) {
1186                                        final var col = it.next();
1187                                        final var index = param.embedding[i++];
1188                                        if (index >= 0) {
1189                                                values[index] = col;
1190                                        }
1191                                }
1192
1193                                return join0(Arrays.asList(values));
1194                        }
1195                }
1196
1197                private String join0(final Iterable<?> cols) {
1198                        final var row = new StringBuilder();
1199                        final var it = cols.iterator();
1200                        while (it.hasNext()) {
1201                                final var column = it.next();
1202                                row.append(param.escape(column));
1203                                if (it.hasNext()) {
1204                                        row.append(param.separator);
1205                                }
1206                        }
1207
1208                        return row.toString();
1209                }
1210
1211                /**
1212                 * Joins the given CSV {@code columns}, using the given separator and
1213                 * quote character.
1214                 *
1215                 * @param columns the CSV columns to join
1216                 * @return the joined CSV columns
1217                 */
1218                public String join(final Object[] columns) {
1219                        return join(Arrays.asList(columns));
1220                }
1221        }
1222
1223        static final class CharSequenceCursor {
1224                private final CharSequence chars;
1225
1226                int previous;
1227                char current;
1228                int next;
1229                int index = 0;
1230
1231                private CharSequenceCursor(final CharSequence chars) {
1232                        this.chars = requireNonNull(chars);
1233                }
1234
1235                boolean hasNext() {
1236                        return index < chars.length();
1237                }
1238
1239                void advance() {
1240                        if (index == 0) {
1241                                previous = -1;
1242                                current = chars.charAt(0);
1243                                next = 1 < chars.length() ? chars.charAt(1) : -1;
1244                                ++index;
1245                        } else {
1246                                previous = current;
1247                                current = (char)next;
1248                                ++index;
1249                                next = index < chars.length() ? chars.charAt(index) : -1;
1250                        }
1251                }
1252
1253                void set(int i) {
1254                        previous = i > 0 ? chars.charAt(i - 1) : -1;
1255                        current = chars.charAt(i);
1256                        next = i + 1 < chars.length() ? chars.charAt(i + 1) : -1;
1257                        index = i + 1;
1258                }
1259        }
1260
1261        /**
1262         * Character source interface.
1263         *
1264         * @since 8.2
1265         * @version 8.2
1266         */
1267        sealed interface CharCursor {
1268                /**
1269                 * Return the next character or -1 if there is no one.
1270                 *
1271                 * @return the next character or -1 if there is no one
1272                 * @throws IOException if reading the next character failed
1273                 */
1274                int next() throws IOException;
1275
1276                /**
1277                 * Return the correct kind of {@code CharCursor}, depending on the
1278                 * given {@code readable} type
1279                 *
1280                 * @param readable the character source
1281                 * @return a new character cursor
1282                 */
1283                static CharCursor of(final Readable readable) {
1284                        return readable instanceof CharBuffer cb
1285                                ? new CharBufferCharCursor(cb)
1286                                : new ReadableCharCursor(readable);
1287                }
1288        }
1289
1290        /**
1291         * Cursor <em>view</em> on a readable object.
1292         *
1293         * @since 8.2
1294         * @version 8.2
1295         */
1296        static final class ReadableCharCursor implements CharCursor {
1297                private static final int SIZE = 1024;
1298                private final Readable readable;
1299                private final CharBuffer buffer;
1300
1301                ReadableCharCursor(final Readable readable) {
1302                        this.readable = requireNonNull(readable);
1303                        this.buffer = CharBuffer.allocate(SIZE).flip();
1304                }
1305
1306                @Override
1307                public int next() throws IOException {
1308                        if (!buffer.hasRemaining()) {
1309                                if (!fill()) {
1310                                        return -1;
1311                                }
1312                        }
1313
1314                        return buffer.get();
1315                }
1316
1317                private boolean fill() throws IOException {
1318                        int n;
1319                        int i = 0;
1320                        buffer.clear();
1321                        do {
1322                                n = readable.read(buffer);
1323                        } while (n == 0 && i++ < 1000); // Make sure re-read will terminate.
1324                        buffer.flip();
1325
1326                        return n > 0;
1327                }
1328        }
1329
1330        /**
1331         * Cursor <em>view</em> on a character buffer.
1332         *
1333         * @since 8.2
1334         * @version 8.2
1335         */
1336        static final class CharBufferCharCursor implements CharCursor {
1337                private final CharBuffer buffer;
1338
1339                CharBufferCharCursor(final CharBuffer buffer) {
1340                        this.buffer = requireNonNull(buffer);
1341                }
1342
1343                @Override
1344                public int next() {
1345                        if (!buffer.hasRemaining()) {
1346                                return -1;
1347                        }
1348                        return buffer.get();
1349                }
1350        }
1351
1352        /**
1353         * Allows appending chars in bulks to {@link StringBuilder}.
1354         *
1355         * @since 8.2
1356         * @version 8.2
1357         */
1358        static final class CharAppender {
1359                private static final int SIZE = 32;
1360
1361                private char[] buffer = new char[SIZE];
1362                private int index = 0;
1363
1364                CharAppender() {
1365                }
1366
1367                boolean nonEmpty() {
1368                        return index != 0;
1369                }
1370
1371                void append(final char c) {
1372                        if (index == buffer.length) {
1373                                increaseSize(buffer.length*2);
1374                        }
1375
1376                        buffer[index++] = c;
1377                }
1378
1379                @Override
1380                public String toString() {
1381                        return String.valueOf(buffer, 0, index);
1382                }
1383
1384                void reset() {
1385                        index = 0;
1386                }
1387
1388                private void increaseSize(final int newSize) {
1389                        final char[] newBuffer = new char[newSize];
1390                        System.arraycopy(buffer, 0, newBuffer, 0, index);
1391                        buffer = newBuffer;
1392                }
1393        }
1394
1395        /**
1396         * Simple growing list of strings.
1397         *
1398         * @since 8.2
1399         * @version 8.2
1400         */
1401        static final class StringList {
1402                private static final int SIZE = 16;
1403                private String[] elements;
1404                private int size;
1405
1406                StringList() {
1407                        size = 0;
1408                        elements = new String[SIZE];
1409                }
1410
1411                public int size() {
1412                        return size;
1413                }
1414
1415                public void add(final String value) {
1416                        if (size == elements.length) {
1417                                increaseSize(elements.length*2);
1418                        }
1419                        elements[size++] = value;
1420                }
1421
1422                public void set(final int index, final String value) {
1423                        elements[index] = value;
1424                }
1425
1426                public void clear() {
1427                        size = 0;
1428                }
1429
1430                public String[] toArray() {
1431                        final var result = new String[size];
1432                        System.arraycopy(elements, 0, result, 0, size);
1433                        return result;
1434                }
1435
1436                private void increaseSize(final int newSize) {
1437                        final String[] newElements = new String[newSize];
1438                        System.arraycopy(elements, 0, newElements, 0, size);
1439                        elements = newElements;
1440                }
1441
1442        }
1443
1444}
1445
1446