001 /*
002 * Java Genetic Algorithm Library (jenetics-5.2.0).
003 * Copyright (c) 2007-2020 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.stat;
021
022 import static java.lang.Double.compare;
023 import static java.lang.String.format;
024 import static java.util.Objects.requireNonNull;
025
026 import java.util.Arrays;
027 import java.util.Objects;
028 import java.util.function.DoubleConsumer;
029 import java.util.function.ToDoubleFunction;
030 import java.util.stream.Collector;
031
032 /**
033 * Implementation of the quantile estimation algorithm published by
034 * <p>
035 * <strong>Raj JAIN and Imrich CHLAMTAC</strong>:
036 * <em>
037 * The P<sup>2</sup> Algorithm for Dynamic Calculation of Quantiles and
038 * Histograms Without Storing Observations
039 * </em>
040 * <br>
041 * [<a href="http://www.cse.wustl.edu/~jain/papers/ftp/psqr.pdf">Communications
042 * of the ACM; October 1985, Volume 28, Number 10</a>]
043 * <p>
044 * This class is designed to work with (though does not require) streams. For
045 * example, you can compute the quantile with:
046 * <pre>{@code
047 * final DoubleStream stream = ...
048 * final Quantile quantile = stream.collect(
049 * () -> new Quantile(0.23),
050 * Quantile::accept,
051 * Quantile::combine
052 * );
053 * }</pre>
054 *
055 * @implNote
056 * This implementation is not thread safe. However, it is safe to use on a
057 * parallel stream, because the parallel implementation of
058 * {@link java.util.stream.Stream#collect Stream.collect()} provides the
059 * necessary partitioning, isolation, and merging of results for safe and
060 * efficient parallel execution.
061 * Using this class in the {@code collect} method of an parallel stream can
062 * lead to an reduced accuracy of the quantile value. Since this implementation
063 * is an estimation algorithm, combining the estimations will only work for
064 * large streams ({@code size >> 1000}).
065 *
066 * @see <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia: Quantile</a>
067 *
068 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
069 * @since 1.0
070 * @version 5.2
071 */
072 public class Quantile implements DoubleConsumer {
073
074 private long _count = 0;
075
076 // The desired quantile.
077 private final double _quantile;
078
079 // Marker heights.
080 private final double[] _q = {0, 0, 0, 0, 0};
081
082 // Marker positions.
083 private final double[] _n = {0, 0, 0, 0, 0};
084
085 // Desired marker positions.
086 private final double[] _nn = {0, 0, 0};
087
088 // Desired marker position increments.
089 private final double[] _dn = {0, 0, 0};
090
091 private boolean _initialized;
092
093 /**
094 * Create a new quantile accumulator with the given value.
095 *
096 * @param quantile the wished quantile value.
097 * @throws IllegalArgumentException if the {@code quantile} is not in the
098 * range {@code [0, 1]}.
099 */
100 public Quantile(final double quantile) {
101 _quantile = quantile;
102 init(quantile);
103 }
104
105 private void init(final double quantile) {
106 check(quantile);
107
108 Arrays.fill(_q, 0);
109 Arrays.fill(_n, 0);
110 Arrays.fill(_nn, 0);
111 Arrays.fill(_dn, 0);
112
113 _n[0] = -1.0;
114 _q[2] = 0.0;
115 _initialized =
116 compare(quantile, 0.0) == 0 ||
117 compare(quantile, 1.0) == 0;
118
119 _count = 0;
120 }
121
122 private static void check(final double quantile) {
123 if (quantile < 0.0 || quantile > 1) {
124 throw new IllegalArgumentException(format(
125 "Quantile (%s) not in the valid range of [0, 1]", quantile
126 ));
127 }
128 }
129
130 /**
131 * Reset this object to its initial state.
132 */
133 public void reset() {
134 init(_quantile);
135 }
136
137 /**
138 * Return the <em>quantile</em> {@code this} object has been parametrized
139 * with.
140 *
141 * @since 3.1
142 *
143 * @return the <em>quantile</em> {@code this} object has been parametrized
144 * with
145 */
146 public double quantile() {
147 return _quantile;
148 }
149
150 /**
151 * Return the <em>quantile</em> {@code this} object has been parametrized
152 * with.
153 *
154 * @since 3.1
155 *
156 * @return the <em>quantile</em> {@code this} object has been parametrized
157 * with
158 * @deprecated Use {@link #quantile()} instead
159 */
160 @Deprecated
161 public double getQuantile() {
162 return _quantile;
163 }
164
165 /**
166 * Return the computed quantile value.
167 *
168 * @return the quantile value.
169 */
170 public double value() {
171 return _q[2];
172 }
173
174 /**
175 * Return the computed quantile value.
176 *
177 * @return the quantile value.
178 * @deprecated Use {@link #value()} instead
179 */
180 @Deprecated
181 public double getValue() {
182 return _q[2];
183 }
184
185 /**
186 * Return the number of samples the quantile value was calculated of.
187 *
188 *
189 * @return the number of samples the quantile value was calculated of
190 */
191 public long count() {
192 return _count;
193 }
194
195 /**
196 * Return the number of samples the quantile value was calculated of.
197 *
198 *
199 * @return the number of samples the quantile value was calculated of
200 * @deprecated Use {@link #count()} instead
201 */
202 @Deprecated
203 public long getSamples() {
204 return _count;
205 }
206
207 @Override
208 public void accept(final double value) {
209 if (!_initialized) {
210 initialize(value);
211 } else {
212 update(value);
213 }
214
215 ++_count;
216 }
217
218 /**
219 * Combine two {@code Quantile} objects.
220 *
221 * @since 3.1
222 *
223 * @param other the other {@code Quantile} object to combine
224 * @return {@code this}
225 * @throws java.lang.NullPointerException if the {@code other} object is
226 * {@code null}.
227 * @throws java.lang.IllegalArgumentException if the {@link #quantile}
228 * of the {@code other} object differs from {@code this} one.
229 */
230 public Quantile combine(final Quantile other) {
231 if (Double.compare(_quantile, other._quantile) != 0) {
232 throw new IllegalArgumentException(format(
233 "Can't perform combine, the quantile are not equal: %s != %s",
234 _quantile, other._quantile
235 ));
236 }
237
238 _count += other._count;
239
240 if (_quantile == 0.0) {
241 _q[2] = Math.min(_q[2], other._q[2]);
242 } else if (_quantile == 1.0) {
243 _q[2] = Math.max(_q[2], other._q[2]);
244 } else {
245 // Combine the marker positions.
246 _n[1] += other._n[1];
247 _n[2] += other._n[2];
248 _n[3] += other._n[3];
249 _n[4] += other._n[4];
250
251 // Combine the marker height.
252 _q[0] = Math.min(_q[0], other._q[0]);
253 _q[1] = (_q[1] + other._q[1])*0.5;
254 _q[2] = (_q[2] + other._q[2])*0.5;
255 _q[3] = (_q[3] + other._q[3])*0.5;
256 _q[4] = Math.max(_q[4], other._q[4]);
257
258 // Combine position of markers.
259 _nn[0] += other._nn[0];
260 _nn[1] += other._nn[1];
261 _nn[2] += other._nn[2];
262
263 adjustMarkerHeights();
264 }
265
266 return this;
267 }
268
269 private void initialize(double value) {
270 if (_n[0] < 0.0) {
271 _n[0] = 0.0;
272 _q[0] = value;
273 } else if (_n[1] == 0.0) {
274 _n[1] = 1.0;
275 _q[1] = value;
276 } else if (_n[2] == 0.0) {
277 _n[2] = 2.0;
278 _q[2] = value;
279 } else if (_n[3] == 0.0) {
280 _n[3] = 3.0;
281 _q[3] = value;
282 } else if (_n[4] == 0.0) {
283 _n[4] = 4.0;
284 _q[4] = value;
285 }
286
287 if (_n[4] != 0.0) {
288 Arrays.sort(_q);
289
290 _nn[0] = 2.0*_quantile;
291 _nn[1] = 4.0*_quantile;
292 _nn[2] = 2.0*_quantile + 2.0;
293
294 _dn[0] = _quantile/2.0;
295 _dn[1] = _quantile;
296 _dn[2] = (1.0 + _quantile)/2.0;
297
298 _initialized = true;
299 }
300 }
301
302 private void update(double value) {
303 assert _initialized;
304
305 // If min or max, handle as special case; otherwise, ...
306 if (_quantile == 0.0) {
307 if (value < _q[2]) {
308 _q[2] = value;
309 }
310 } else if (_quantile == 1.0) {
311 if (value > _q[2]) {
312 _q[2] = value;
313 }
314 } else {
315 // Increment marker locations and update min and max.
316 if (value < _q[0]) {
317 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4]; _q[0] = value;
318 } else if (value < _q[1]) {
319 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4];
320 } else if (value < _q[2]) {
321 ++_n[2]; ++_n[3]; ++_n[4];
322 } else if (value < _q[3]) {
323 ++_n[3]; ++_n[4];
324 } else if (value < _q[4]) {
325 ++_n[4];
326 } else {
327 ++_n[4]; _q[4] = value;
328 }
329
330 // Increment positions of markers k + 1
331 _nn[0] += _dn[0];
332 _nn[1] += _dn[1];
333 _nn[2] += _dn[2];
334
335 adjustMarkerHeights();
336 }
337 }
338
339 // Adjust heights of markers 0 to 2 if necessary
340 private void adjustMarkerHeights() {
341 double mm = _n[1] - 1.0;
342 double mp = _n[1] + 1.0;
343 if (_nn[0] >= mp && _n[2] > mp) {
344 _q[1] = qPlus(mp, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
345 _n[1] = mp;
346 } else if (_nn[0] <= mm && _n[0] < mm) {
347 _q[1] = qMinus(mm, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
348 _n[1] = mm;
349 }
350
351 mm = _n[2] - 1.0;
352 mp = _n[2] + 1.0;
353 if (_nn[1] >= mp && _n[3] > mp) {
354 _q[2] = qPlus(mp, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
355 _n[2] = mp;
356 } else if (_nn[1] <= mm && _n[1] < mm) {
357 _q[2] = qMinus(mm, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
358 _n[2] = mm;
359 }
360
361 mm = _n[3] - 1.0;
362 mp = _n[3] + 1.0;
363 if (_nn[2] >= mp && _n[4] > mp) {
364 _q[3] = qPlus(mp, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
365 _n[3] = mp;
366 } else if (_nn[2] <= mm && _n[2] < mm) {
367 _q[3] = qMinus(mm, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
368 _n[3] = mm;
369 }
370 }
371
372 private static double qPlus(
373 final double mp,
374 final double m0,
375 final double m1,
376 final double m2,
377 final double q0,
378 final double q1,
379 final double q2
380 ) {
381 double result =
382 q1 +
383 ((mp - m0)*(q2 - q1)/(m2 - m1) +
384 (m2 - mp)*(q1 - q0)/(m1 - m0))/(m2 - m0);
385
386 if (result > q2) {
387 result = q1 + (q2 - q1)/(m2 - m1);
388 }
389
390 return result;
391 }
392
393 private static double qMinus(
394 final double mm,
395 final double m0,
396 final double m1,
397 final double m2,
398 final double q0,
399 final double q1,
400 final double q2
401 ) {
402 double result =
403 q1 -
404 ((mm - m0)*(q2 - q1)/(m2 - m1) +
405 (m2 - mm)*(q1 - q0)/(m1 - m0))/(m2 - m0);
406
407 if (q0 > result) {
408 result = q1 + (q0 - q1)/(m0 - m1);
409 }
410
411 return result;
412 }
413
414 /**
415 * Compares the state of two {@code Quantile} objects. This is
416 * a replacement for the {@link #equals(Object)} which is not advisable to
417 * implement for this mutable object. If two object have the same state, it
418 * has still the same state when updated with the same value.
419 * <pre>{@code
420 * final Quantile q1 = ...;
421 * final Quantile q2 = ...;
422 *
423 * if (q1.sameState(q2)) {
424 * final double value = random.nextDouble();
425 * q1.accept(value);
426 * q2.accept(value);
427 *
428 * assert q1.sameState(q2);
429 * assert q2.sameState(q1);
430 * assert q1.sameState(q1);
431 * }
432 * }</pre>
433 *
434 * @since 3.7
435 *
436 * @param other the other object for the test
437 * @return {@code true} the {@code this} and the {@code other} objects have
438 * the same state, {@code false} otherwise
439 */
440 public boolean sameState(final Quantile other) {
441 return Objects.equals(_quantile, other._quantile) &&
442 Arrays.equals(_dn, other._dn) &&
443 Arrays.equals(_n, other._n) &&
444 Arrays.equals(_nn, other._nn) &&
445 Arrays.equals(_q, other._q);
446 }
447
448 @Override
449 public String toString() {
450 return format(
451 "%s[samples=%d, quantile=%f]",
452 getClass().getSimpleName(), count(), value()
453 );
454 }
455
456
457 static Quantile median() {
458 return new Quantile(0.5);
459 }
460
461 /**
462 * Return a {@code Collector} which applies an double-producing mapping
463 * function to each input element, and returns quantiles for the resulting
464 * values.
465 *
466 * <pre>{@code
467 * final Stream<SomeObject> stream = ...
468 * final Quantile quantile = stream
469 * .collect(toQuantile(0.25, v -> v.doubleValue()));
470 * }</pre>
471 *
472 * @param quantile the wished quantile value.
473 * @param mapper a mapping function to apply to each element
474 * @param <T> the type of the input elements
475 * @return a {@code Collector} implementing the quantiles reduction
476 * @throws java.lang.NullPointerException if the given {@code mapper} is
477 * {@code null}
478 * @throws IllegalArgumentException if the {@code quantile} is not in the
479 * range {@code [0, 1]}.
480 */
481 public static <T> Collector<T, ?, Quantile> toQuantile(
482 final double quantile,
483 final ToDoubleFunction<? super T> mapper
484 ) {
485 check(quantile);
486 requireNonNull(mapper);
487
488 return Collector.of(
489 () -> new Quantile(quantile),
490 (r, t) -> r.accept(mapper.applyAsDouble(t)),
491 Quantile::combine
492 );
493 }
494
495 /**
496 * Return a {@code Collector} which applies an double-producing mapping
497 * function to each input element, and returns the median for the resulting
498 * values.
499 *
500 * <pre>{@code
501 * final Stream<SomeObject> stream = ...
502 * final Quantile median = stream.collect(toMedian(v -> v.doubleValue()));
503 * }</pre>
504 *
505 * @param mapper a mapping function to apply to each element
506 * @param <T> the type of the input elements
507 * @return a {@code Collector} implementing the quantiles reduction
508 * @throws java.lang.NullPointerException if the given {@code mapper} is
509 * {@code null}
510 */
511 public static <T> Collector<T, ?, Quantile> toMedian(
512 final ToDoubleFunction<? super T> mapper
513 ) {
514 requireNonNull(mapper);
515
516 return Collector.of(
517 Quantile::median,
518 (r, t) -> r.accept(mapper.applyAsDouble(t)),
519 Quantile::combine
520 );
521 }
522
523 }
|