001 /*
002 * Java Genetic Algorithm Library (jenetics-5.1.0).
003 * Copyright (c) 2007-2019 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com)
019 */
020 package io.jenetics.stat;
021
022 import static java.lang.Double.compare;
023 import static java.lang.String.format;
024 import static java.util.Objects.requireNonNull;
025
026 import java.util.Arrays;
027 import java.util.Objects;
028 import java.util.function.DoubleConsumer;
029 import java.util.function.ToDoubleFunction;
030 import java.util.stream.Collector;
031
032 /**
033 * Implementation of the quantile estimation algorithm published by
034 * <p>
035 * <strong>Raj JAIN and Imrich CHLAMTAC</strong>:
036 * <em>
037 * The P<sup>2</sup> Algorithm for Dynamic Calculation of Quantiles and
038 * Histograms Without Storing Observations
039 * </em>
040 * <br>
041 * [<a href="http://www.cse.wustl.edu/~jain/papers/ftp/psqr.pdf">Communications
042 * of the ACM; October 1985, Volume 28, Number 10</a>]
043 * <p>
044 * This class is designed to work with (though does not require) streams. For
045 * example, you can compute the quantile with:
046 * <pre>{@code
047 * final DoubleStream stream = ...
048 * final Quantile quantile = stream.collect(
049 * () -> new Quantile(0.23),
050 * Quantile::accept,
051 * Quantile::combine
052 * );
053 * }</pre>
054 *
055 * @implNote
056 * This implementation is not thread safe. However, it is safe to use on a
057 * parallel stream, because the parallel implementation of
058 * {@link java.util.stream.Stream#collect Stream.collect()} provides the
059 * necessary partitioning, isolation, and merging of results for safe and
060 * efficient parallel execution.
061 * Using this class in the {@code collect} method of an parallel stream can
062 * lead to an reduced accuracy of the quantile value. Since this implementation
063 * is an estimation algorithm, combining the estimations will only work for
064 * large streams ({@code size >> 1000}).
065 *
066 * @see <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia: Quantile</a>
067 *
068 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a>
069 * @since 1.0
070 * @version 3.7
071 */
072 public class Quantile implements DoubleConsumer {
073
074 private long _samples = 0;
075
076 // The desired quantile.
077 private final double _quantile;
078
079 // Marker heights.
080 private final double[] _q = {0, 0, 0, 0, 0};
081
082 // Marker positions.
083 private final double[] _n = {0, 0, 0, 0, 0};
084
085 // Desired marker positions.
086 private final double[] _nn = {0, 0, 0};
087
088 // Desired marker position increments.
089 private final double[] _dn = {0, 0, 0};
090
091 private boolean _initialized;
092
093 /**
094 * Create a new quantile accumulator with the given value.
095 *
096 * @param quantile the wished quantile value.
097 * @throws IllegalArgumentException if the {@code quantile} is not in the
098 * range {@code [0, 1]}.
099 */
100 public Quantile(final double quantile) {
101 _quantile = quantile;
102 init(quantile);
103 }
104
105 private void init(final double quantile) {
106 check(quantile);
107
108 Arrays.fill(_q, 0);
109 Arrays.fill(_n, 0);
110 Arrays.fill(_nn, 0);
111 Arrays.fill(_dn, 0);
112
113 _n[0] = -1.0;
114 _q[2] = 0.0;
115 _initialized =
116 compare(quantile, 0.0) == 0 ||
117 compare(quantile, 1.0) == 0;
118
119 _samples = 0;
120 }
121
122 private static void check(final double quantile) {
123 if (quantile < 0.0 || quantile > 1) {
124 throw new IllegalArgumentException(format(
125 "Quantile (%s) not in the valid range of [0, 1]", quantile
126 ));
127 }
128 }
129
130 /**
131 * Reset this object to its initial state.
132 */
133 public void reset() {
134 init(_quantile);
135 }
136
137 /**
138 * Return the <em>quantile</em> {@code this} object has been parametrized
139 * with.
140 *
141 * @since 3.1
142 *
143 * @return the <em>quantile</em> {@code this} object has been parametrized
144 * with
145 */
146 public double getQuantile() {
147 return _quantile;
148 }
149
150 /**
151 * Return the computed quantile value.
152 *
153 * @return the quantile value.
154 */
155 public double getValue() {
156 return _q[2];
157 }
158
159 /**
160 * Return the number of samples the quantile value was calculated of.
161 *
162 *
163 * @return the number of samples the quantile value was calculated of
164 */
165 public long getSamples() {
166 return _samples;
167 }
168
169 @Override
170 public void accept(final double value) {
171 if (!_initialized) {
172 initialize(value);
173 } else {
174 update(value);
175 }
176
177 ++_samples;
178 }
179
180 /**
181 * Combine two {@code Quantile} objects.
182 *
183 * @since 3.1
184 *
185 * @param other the other {@code Quantile} object to combine
186 * @return {@code this}
187 * @throws java.lang.NullPointerException if the {@code other} object is
188 * {@code null}.
189 * @throws java.lang.IllegalArgumentException if the {@link #getQuantile}
190 * of the {@code other} object differs from {@code this} one.
191 */
192 public Quantile combine(final Quantile other) {
193 if (Double.compare(_quantile, other._quantile) != 0) {
194 throw new IllegalArgumentException(format(
195 "Can't perform combine, the quantile are not equal: %s != %s",
196 _quantile, other._quantile
197 ));
198 }
199
200 _samples += other._samples;
201
202 if (_quantile == 0.0) {
203 _q[2] = Math.min(_q[2], other._q[2]);
204 } else if (_quantile == 1.0) {
205 _q[2] = Math.max(_q[2], other._q[2]);
206 } else {
207 // Combine the marker positions.
208 _n[1] += other._n[1];
209 _n[2] += other._n[2];
210 _n[3] += other._n[3];
211 _n[4] += other._n[4];
212
213 // Combine the marker height.
214 _q[0] = Math.min(_q[0], other._q[0]);
215 _q[1] = (_q[1] + other._q[1])*0.5;
216 _q[2] = (_q[2] + other._q[2])*0.5;
217 _q[3] = (_q[3] + other._q[3])*0.5;
218 _q[4] = Math.max(_q[4], other._q[4]);
219
220 // Combine position of markers.
221 _nn[0] += other._nn[0];
222 _nn[1] += other._nn[1];
223 _nn[2] += other._nn[2];
224
225 adjustMarkerHeights();
226 }
227
228 return this;
229 }
230
231 private void initialize(double value) {
232 if (_n[0] < 0.0) {
233 _n[0] = 0.0;
234 _q[0] = value;
235 } else if (_n[1] == 0.0) {
236 _n[1] = 1.0;
237 _q[1] = value;
238 } else if (_n[2] == 0.0) {
239 _n[2] = 2.0;
240 _q[2] = value;
241 } else if (_n[3] == 0.0) {
242 _n[3] = 3.0;
243 _q[3] = value;
244 } else if (_n[4] == 0.0) {
245 _n[4] = 4.0;
246 _q[4] = value;
247 }
248
249 if (_n[4] != 0.0) {
250 Arrays.sort(_q);
251
252 _nn[0] = 2.0*_quantile;
253 _nn[1] = 4.0*_quantile;
254 _nn[2] = 2.0*_quantile + 2.0;
255
256 _dn[0] = _quantile/2.0;
257 _dn[1] = _quantile;
258 _dn[2] = (1.0 + _quantile)/2.0;
259
260 _initialized = true;
261 }
262 }
263
264 private void update(double value) {
265 assert _initialized;
266
267 // If min or max, handle as special case; otherwise, ...
268 if (_quantile == 0.0) {
269 if (value < _q[2]) {
270 _q[2] = value;
271 }
272 } else if (_quantile == 1.0) {
273 if (value > _q[2]) {
274 _q[2] = value;
275 }
276 } else {
277 // Increment marker locations and update min and max.
278 if (value < _q[0]) {
279 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4]; _q[0] = value;
280 } else if (value < _q[1]) {
281 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4];
282 } else if (value < _q[2]) {
283 ++_n[2]; ++_n[3]; ++_n[4];
284 } else if (value < _q[3]) {
285 ++_n[3]; ++_n[4];
286 } else if (value < _q[4]) {
287 ++_n[4];
288 } else {
289 ++_n[4]; _q[4] = value;
290 }
291
292 // Increment positions of markers k + 1
293 _nn[0] += _dn[0];
294 _nn[1] += _dn[1];
295 _nn[2] += _dn[2];
296
297 adjustMarkerHeights();
298 }
299 }
300
301 // Adjust heights of markers 0 to 2 if necessary
302 private void adjustMarkerHeights() {
303 double mm = _n[1] - 1.0;
304 double mp = _n[1] + 1.0;
305 if (_nn[0] >= mp && _n[2] > mp) {
306 _q[1] = qPlus(mp, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
307 _n[1] = mp;
308 } else if (_nn[0] <= mm && _n[0] < mm) {
309 _q[1] = qMinus(mm, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
310 _n[1] = mm;
311 }
312
313 mm = _n[2] - 1.0;
314 mp = _n[2] + 1.0;
315 if (_nn[1] >= mp && _n[3] > mp) {
316 _q[2] = qPlus(mp, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
317 _n[2] = mp;
318 } else if (_nn[1] <= mm && _n[1] < mm) {
319 _q[2] = qMinus(mm, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
320 _n[2] = mm;
321 }
322
323 mm = _n[3] - 1.0;
324 mp = _n[3] + 1.0;
325 if (_nn[2] >= mp && _n[4] > mp) {
326 _q[3] = qPlus(mp, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
327 _n[3] = mp;
328 } else if (_nn[2] <= mm && _n[2] < mm) {
329 _q[3] = qMinus(mm, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
330 _n[3] = mm;
331 }
332 }
333
334 private static double qPlus(
335 final double mp,
336 final double m0,
337 final double m1,
338 final double m2,
339 final double q0,
340 final double q1,
341 final double q2
342 ) {
343 double result =
344 q1 +
345 ((mp - m0)*(q2 - q1)/(m2 - m1) +
346 (m2 - mp)*(q1 - q0)/(m1 - m0))/(m2 - m0);
347
348 if (result > q2) {
349 result = q1 + (q2 - q1)/(m2 - m1);
350 }
351
352 return result;
353 }
354
355 private static double qMinus(
356 final double mm,
357 final double m0,
358 final double m1,
359 final double m2,
360 final double q0,
361 final double q1,
362 final double q2
363 ) {
364 double result =
365 q1 -
366 ((mm - m0)*(q2 - q1)/(m2 - m1) +
367 (m2 - mm)*(q1 - q0)/(m1 - m0))/(m2 - m0);
368
369 if (q0 > result) {
370 result = q1 + (q0 - q1)/(m0 - m1);
371 }
372
373 return result;
374 }
375
376 /**
377 * Compares the state of two {@code Quantile} objects. This is
378 * a replacement for the {@link #equals(Object)} which is not advisable to
379 * implement for this mutable object. If two object have the same state, it
380 * has still the same state when updated with the same value.
381 * <pre>{@code
382 * final Quantile q1 = ...;
383 * final Quantile q2 = ...;
384 *
385 * if (q1.sameState(q2)) {
386 * final double value = random.nextDouble();
387 * q1.accept(value);
388 * q2.accept(value);
389 *
390 * assert q1.sameState(q2);
391 * assert q2.sameState(q1);
392 * assert q1.sameState(q1);
393 * }
394 * }</pre>
395 *
396 * @since 3.7
397 *
398 * @param other the other object for the test
399 * @return {@code true} the {@code this} and the {@code other} objects have
400 * the same state, {@code false} otherwise
401 */
402 public boolean sameState(final Quantile other) {
403 return Objects.equals(_quantile, other._quantile) &&
404 Arrays.equals(_dn, other._dn) &&
405 Arrays.equals(_n, other._n) &&
406 Arrays.equals(_nn, other._nn) &&
407 Arrays.equals(_q, other._q);
408 }
409
410 @Override
411 public String toString() {
412 return format(
413 "%s[samples=%d, quantile=%f]",
414 getClass().getSimpleName(), getSamples(), getValue()
415 );
416 }
417
418
419 static Quantile median() {
420 return new Quantile(0.5);
421 }
422
423 /**
424 * Return a {@code Collector} which applies an double-producing mapping
425 * function to each input element, and returns quantiles for the resulting
426 * values.
427 *
428 * <pre>{@code
429 * final Stream<SomeObject> stream = ...
430 * final Quantile quantile = stream
431 * .collect(toQuantile(0.25, v -> v.doubleValue()));
432 * }</pre>
433 *
434 * @param quantile the wished quantile value.
435 * @param mapper a mapping function to apply to each element
436 * @param <T> the type of the input elements
437 * @return a {@code Collector} implementing the quantiles reduction
438 * @throws java.lang.NullPointerException if the given {@code mapper} is
439 * {@code null}
440 * @throws IllegalArgumentException if the {@code quantile} is not in the
441 * range {@code [0, 1]}.
442 */
443 public static <T> Collector<T, ?, Quantile> toQuantile(
444 final double quantile,
445 final ToDoubleFunction<? super T> mapper
446 ) {
447 check(quantile);
448 requireNonNull(mapper);
449
450 return Collector.of(
451 () -> new Quantile(quantile),
452 (r, t) -> r.accept(mapper.applyAsDouble(t)),
453 Quantile::combine
454 );
455 }
456
457 /**
458 * Return a {@code Collector} which applies an double-producing mapping
459 * function to each input element, and returns the median for the resulting
460 * values.
461 *
462 * <pre>{@code
463 * final Stream<SomeObject> stream = ...
464 * final Quantile median = stream.collect(toMedian(v -> v.doubleValue()));
465 * }</pre>
466 *
467 * @param mapper a mapping function to apply to each element
468 * @param <T> the type of the input elements
469 * @return a {@code Collector} implementing the quantiles reduction
470 * @throws java.lang.NullPointerException if the given {@code mapper} is
471 * {@code null}
472 */
473 public static <T> Collector<T, ?, Quantile> toMedian(
474 final ToDoubleFunction<? super T> mapper
475 ) {
476 requireNonNull(mapper);
477
478 return Collector.of(
479 Quantile::median,
480 (r, t) -> r.accept(mapper.applyAsDouble(t)),
481 Quantile::combine
482 );
483 }
484
485 }
|