001/*
002 * Java Genetic Algorithm Library (jenetics-8.1.0).
003 * Copyright (c) 2007-2024 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020package io.jenetics.stat;
021
022import static java.lang.Double.compare;
023import static java.lang.String.format;
024import static java.util.Objects.requireNonNull;
025
026import java.util.Arrays;
027import java.util.Objects;
028import java.util.function.DoubleConsumer;
029import java.util.function.ToDoubleFunction;
030import java.util.stream.Collector;
031
032/**
033 * Implementation of the quantile estimation algorithm published by
034 * <p>
035 * <strong>Raj JAIN and Imrich CHLAMTAC</strong>:
036 * <em>
037 *     The P<sup>2</sup> Algorithm for Dynamic Calculation of Quantiles and
038 *     Histograms Without Storing Observations
039 * </em>
040 * <br>
041 * [<a href="http://www.cse.wustl.edu/~jain/papers/ftp/psqr.pdf">Communications
042 * of the ACM; October 1985, Volume 28, Number 10</a>]
043 * <p>
044 * This class is designed to work with (though does not require) streams. For
045 * example, you can compute the quantile with:
046 * {@snippet lang="java":
047 * final DoubleStream stream = null; // @replace substring='null' replacement="..."
048 * final Quantile quantile = stream.collect(
049 *         () -> new Quantile(0.23),
050 *         Quantile::accept,
051 *         Quantile::combine
052 *     );
053 * }
054 *
055 * @implNote
056 * This implementation is not thread safe. However, it is safe to use on a
057 * parallel stream, because the parallel implementation of
058 * {@link java.util.stream.Stream#collect Stream.collect()} provides the
059 * necessary partitioning, isolation, and merging of results for safe and
060 * efficient parallel execution.
061 * Using this class in the {@code collect} method of a parallel stream can
062 * lead to a reduced accuracy of the quantile value. Since this implementation
063 * is an estimation algorithm, combining the estimations will only work for
064 * large streams ({@code size >> 1000}).
065 *
066 * @see <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia: Quantile</a>
067 *
068 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
069 * @since 1.0
070 * @version 6.0
071 */
072public class Quantile implements DoubleConsumer {
073
074        private long _count = 0;
075
076        // The desired quantile.
077        private final double _quantile;
078
079        // Marker heights.
080        private final double[] _q = {0, 0, 0, 0, 0};
081
082        // Marker positions.
083        private final double[] _n = {0, 0, 0, 0, 0};
084
085        // Desired marker positions.
086        private final double[] _nn = {0, 0, 0};
087
088        // Desired marker position increments.
089        private final double[] _dn = {0, 0, 0};
090
091        private boolean _initialized;
092
093        /**
094         * Create a new quantile accumulator with the given value.
095         *
096         * @param quantile the wished quantile value.
097         * @throws IllegalArgumentException if the {@code quantile} is not in the
098         *         range {@code [0, 1]}.
099         */
100        public Quantile(final double quantile) {
101                _quantile = quantile;
102                init(quantile);
103        }
104
105        private void init(final double quantile) {
106                check(quantile);
107
108                Arrays.fill(_q, 0);
109                Arrays.fill(_n, 0);
110                Arrays.fill(_nn, 0);
111                Arrays.fill(_dn, 0);
112
113                _n[0] = -1.0;
114                _q[2] = 0.0;
115                _initialized =
116                        compare(quantile, 0.0) == 0 ||
117                        compare(quantile, 1.0) == 0;
118
119                _count = 0;
120        }
121
122        private static void check(final double quantile) {
123                if (quantile < 0.0 || quantile > 1) {
124                        throw new IllegalArgumentException(format(
125                                "Quantile (%s) not in the valid range of [0, 1]", quantile
126                        ));
127                }
128        }
129
130        /**
131         * Reset this object to its initial state.
132         */
133        public void reset() {
134                init(_quantile);
135        }
136
137        /**
138         * Return the <em>quantile</em> {@code this} object has been parametrized
139         * with.
140         *
141         * @since 3.1
142         *
143         * @return the <em>quantile</em> {@code this} object has been parametrized
144         *         with
145         */
146        public double quantile() {
147                return _quantile;
148        }
149
150        /**
151         * Return the computed quantile value.
152         *
153         * @return the quantile value.
154         */
155        public double value() {
156                return _q[2];
157        }
158
159        /**
160         * Return the number of samples the quantile value was calculated of.
161         *
162         *
163         * @return the number of samples the quantile value was calculated of
164         */
165        public long count() {
166                return _count;
167        }
168
169        @Override
170        public void accept(final double value) {
171                if (!_initialized) {
172                        initialize(value);
173                } else {
174                        update(value);
175                }
176
177                ++_count;
178        }
179
180        /**
181         * Combine two {@code Quantile} objects.
182         *
183         * @since 3.1
184         *
185         * @param other the other {@code Quantile} object to combine
186         * @return {@code this}
187         * @throws java.lang.NullPointerException if the {@code other} object is
188         *         {@code null}.
189         * @throws java.lang.IllegalArgumentException if the {@link #quantile}
190         *         of the {@code other} object differs from {@code this} one.
191         */
192        public Quantile combine(final Quantile other) {
193                if (Double.compare(_quantile, other._quantile) != 0) {
194                        throw new IllegalArgumentException(format(
195                                "Can't perform combine, the quantile are not equal: %s != %s",
196                                _quantile, other._quantile
197                        ));
198                }
199
200                _count += other._count;
201
202                if (_quantile == 0.0) {
203                        _q[2] = Math.min(_q[2], other._q[2]);
204                } else if (_quantile == 1.0) {
205                        _q[2] = Math.max(_q[2], other._q[2]);
206                } else {
207                        // Combine the marker positions.
208                        _n[1] += other._n[1];
209                        _n[2] += other._n[2];
210                        _n[3] += other._n[3];
211                        _n[4] += other._n[4];
212
213                        // Combine the marker height.
214                        _q[0] = Math.min(_q[0], other._q[0]);
215                        _q[1] = (_q[1] + other._q[1])*0.5;
216                        _q[2] = (_q[2] + other._q[2])*0.5;
217                        _q[3] = (_q[3] + other._q[3])*0.5;
218                        _q[4] = Math.max(_q[4], other._q[4]);
219
220                        // Combine position of markers.
221                        _nn[0] += other._nn[0];
222                        _nn[1] += other._nn[1];
223                        _nn[2] += other._nn[2];
224
225                        adjustMarkerHeights();
226                }
227
228                return this;
229        }
230
231        private void initialize(double value) {
232                if (_n[0] < 0.0) {
233                        _n[0] = 0.0;
234                        _q[0] = value;
235                } else if (_n[1] == 0.0) {
236                        _n[1] = 1.0;
237                        _q[1] = value;
238                } else if (_n[2] == 0.0) {
239                        _n[2] = 2.0;
240                        _q[2] = value;
241                } else if (_n[3] == 0.0) {
242                        _n[3] = 3.0;
243                        _q[3] = value;
244                } else if (_n[4] == 0.0) {
245                        _n[4] = 4.0;
246                        _q[4] = value;
247                }
248
249                if (_n[4] != 0.0) {
250                        Arrays.sort(_q);
251
252                        _nn[0] = 2.0*_quantile;
253                        _nn[1] = 4.0*_quantile;
254                        _nn[2] = 2.0*_quantile + 2.0;
255
256                        _dn[0] = _quantile/2.0;
257                        _dn[1] = _quantile;
258                        _dn[2] = (1.0 + _quantile)/2.0;
259
260                        _initialized = true;
261                }
262        }
263
264        private void update(double value) {
265                assert _initialized;
266
267                // If min or max, handle as a special case; otherwise, ...
268                if (_quantile == 0.0) {
269                        if (value < _q[2]) {
270                                _q[2] = value;
271                        }
272                } else if (_quantile == 1.0) {
273                        if (value > _q[2]) {
274                                _q[2] = value;
275                        }
276                } else {
277                        // Increment marker locations and update min and max.
278                        if (value < _q[0]) {
279                                ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4]; _q[0] = value;
280                        } else if (value < _q[1]) {
281                                ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4];
282                        } else if (value < _q[2]) {
283                                ++_n[2]; ++_n[3]; ++_n[4];
284                        } else if (value < _q[3]) {
285                                ++_n[3]; ++_n[4];
286                        } else if (value < _q[4]) {
287                                ++_n[4];
288                        } else {
289                                ++_n[4]; _q[4] = value;
290                        }
291
292                        // Increment positions of markers k + 1
293                        _nn[0] += _dn[0];
294                        _nn[1] += _dn[1];
295                        _nn[2] += _dn[2];
296
297                        adjustMarkerHeights();
298                }
299        }
300
301        // Adjust heights of markers 0 to 2 if necessary
302        private void adjustMarkerHeights() {
303                double mm = _n[1] - 1.0;
304                double mp = _n[1] + 1.0;
305                if (_nn[0] >= mp && _n[2] > mp) {
306                        _q[1] = qPlus(mp, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
307                        _n[1] = mp;
308                } else if (_nn[0] <= mm && _n[0] < mm) {
309                        _q[1] = qMinus(mm, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
310                        _n[1] = mm;
311                }
312
313                mm = _n[2] - 1.0;
314                mp = _n[2] + 1.0;
315                if (_nn[1] >= mp && _n[3] > mp) {
316                        _q[2] = qPlus(mp, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
317                        _n[2] = mp;
318                } else if (_nn[1] <= mm && _n[1] < mm) {
319                        _q[2] = qMinus(mm, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
320                        _n[2] = mm;
321                }
322
323                mm = _n[3] - 1.0;
324                mp = _n[3] + 1.0;
325                if (_nn[2] >= mp && _n[4] > mp) {
326                        _q[3] = qPlus(mp, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
327                        _n[3] = mp;
328                } else if (_nn[2] <= mm && _n[2] < mm) {
329                        _q[3] = qMinus(mm, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
330                        _n[3] = mm;
331                }
332        }
333
334        private static double qPlus(
335                final double mp,
336                final double m0,
337                final double m1,
338                final double m2,
339                final double q0,
340                final double q1,
341                final double q2
342        ) {
343                double result =
344                        q1 +
345                        ((mp - m0)*(q2 - q1)/(m2 - m1) +
346                        (m2 - mp)*(q1 - q0)/(m1 - m0))/(m2 - m0);
347
348                if (result > q2) {
349                        result = q1 + (q2 - q1)/(m2 - m1);
350                }
351
352                return result;
353        }
354
355        private static double qMinus(
356                final double mm,
357                final double m0,
358                final double m1,
359                final double m2,
360                final double q0,
361                final double q1,
362                final double q2
363        ) {
364                double result =
365                        q1 -
366                        ((mm - m0)*(q2 - q1)/(m2 - m1) +
367                        (m2 - mm)*(q1 - q0)/(m1 - m0))/(m2 - m0);
368
369                if (q0 > result) {
370                        result = q1 + (q0 - q1)/(m0 - m1);
371                }
372
373                return result;
374        }
375
376        /**
377         * Compares the state of two {@code Quantile} objects. This is
378         * a replacement for the {@link #equals(Object)} which is not advisable to
379         * implement for this mutable object. If two objects have the same state, it
380         * has still the same state when updated with the same value.
381         * {@snippet lang="java":
382         * final Quantile q1 = null; // @replace substring='null' replacement="..."
383         * final Quantile q2 = null; // @replace substring='null' replacement="..."
384         *
385         * if (q1.sameState(q2)) {
386         *     final double value = random.nextDouble();
387         *     q1.accept(value);
388         *     q2.accept(value);
389         *
390         *     assert q1.sameState(q2);
391         *     assert q2.sameState(q1);
392         *     assert q1.sameState(q1);
393         * }
394         * }
395         *
396         * @since 3.7
397         *
398         * @param other the other object for the test
399         * @return {@code true} the {@code this} and the {@code other} objects have
400         *         the same state, {@code false} otherwise
401         */
402        public boolean sameState(final Quantile other) {
403                return Objects.equals(_quantile, other._quantile) &&
404                        Arrays.equals(_dn, other._dn) &&
405                        Arrays.equals(_n, other._n) &&
406                        Arrays.equals(_nn, other._nn) &&
407                        Arrays.equals(_q, other._q);
408        }
409
410        @Override
411        public String toString() {
412                return format(
413                        "%s[samples=%d, quantile=%f]",
414                        getClass().getSimpleName(), count(), value()
415                );
416        }
417
418
419        static Quantile median() {
420                return new Quantile(0.5);
421        }
422
423        /**
424         * Return a {@code Collector} which applies a double-producing mapping
425         * function to each input element, and returns quantiles for the resulting
426         * values.
427         * {@snippet lang="java":
428         * final Stream<SomeObject> stream = null; // @replace substring='null' replacement="..."
429         * final Quantile quantile = stream
430         *     .collect(toQuantile(0.25, v -> v.doubleValue()));
431         * }
432         *
433         * @param quantile the wished quantile value.
434         * @param mapper a mapping function to apply to each element
435         * @param <T> the type of the input elements
436         * @return a {@code Collector} implementing the quantile reduction
437         * @throws java.lang.NullPointerException if the given {@code mapper} is
438         *         {@code null}
439         * @throws IllegalArgumentException if the {@code quantile} is not in the
440         *         range {@code [0, 1]}.
441         */
442        public static <T> Collector<T, ?, Quantile> toQuantile(
443                final double quantile,
444                final ToDoubleFunction<? super T> mapper
445        ) {
446                check(quantile);
447                requireNonNull(mapper);
448
449                return Collector.of(
450                        () -> new Quantile(quantile),
451                        (r, t) -> r.accept(mapper.applyAsDouble(t)),
452                        Quantile::combine
453                );
454        }
455
456        /**
457         * Return a {@code Collector} which applies a double-producing mapping
458         * function to each input element, and returns the median for the resulting
459         * values.
460         * {@snippet lang="java":
461         * final Stream<SomeObject> stream = null; // @replace substring='null' replacement="..."
462         * final Quantile median = stream.collect(toMedian(v -> v.doubleValue()));
463         * }
464         *
465         * @param mapper a mapping function to apply to each element
466         * @param <T> the type of the input elements
467         * @return a {@code Collector} implementing the quantile reduction
468         * @throws java.lang.NullPointerException if the given {@code mapper} is
469         *         {@code null}
470         */
471        public static <T> Collector<T, ?, Quantile> toMedian(
472                final ToDoubleFunction<? super T> mapper
473        ) {
474                requireNonNull(mapper);
475
476                return Collector.of(
477                        Quantile::median,
478                        (r, t) -> r.accept(mapper.applyAsDouble(t)),
479                        Quantile::combine
480                );
481        }
482
483}