001 /*
002 * Java Genetic Algorithm Library (jenetics-3.9.0).
003 * Copyright (c) 2007-2017 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
019 */
020 package org.jenetics.stat;
021
022 import static java.lang.Double.compare;
023 import static java.lang.String.format;
024 import static java.util.Objects.requireNonNull;
025
026 import java.util.Arrays;
027 import java.util.Objects;
028 import java.util.function.DoubleConsumer;
029 import java.util.function.ToDoubleFunction;
030 import java.util.stream.Collector;
031
032 /**
033 * Implementation of the quantile estimation algorithm published by
034 * <p>
035 * <strong>Raj JAIN and Imrich CHLAMTAC</strong>:
036 * <em>
037 * The P<sup>2</sup> Algorithm for Dynamic Calculation of Quantiles and
038 * Histograms Without Storing Observations
039 * </em>
040 * <br>
041 * [<a href="http://www.cse.wustl.edu/~jain/papers/ftp/psqr.pdf">Communications
042 * of the ACM; October 1985, Volume 28, Number 10</a>]
043 * <p>
044 * This class is designed to work with (though does not require) streams. For
045 * example, you can compute the quantile with:
046 * <pre>{@code
047 * final DoubleStream stream = ...
048 * final Quantile quantile = stream.collect(
049 * () -> new Quantile(0.23),
050 * Quantile::accept,
051 * Quantile::combine
052 * );
053 * }</pre>
054 *
055 * <p>
056 * <b>Implementation note:</b>
057 * <i>This implementation is not thread safe. However, it is safe to use on a
058 * parallel stream, because the parallel implementation of
059 * {@link java.util.stream.Stream#collect Stream.collect()}provides the
060 * necessary partitioning, isolation, and merging of results for safe and
061 * efficient parallel execution.</i>
062 * <br>
063 * <i>Using this class in the {@code collect} method of an parallel stream can
064 * lead to an reduced accuracy of the quantile value. Since this implementation
065 * is an estimation algorithm, combining the estimations will only work for
066 * large streams ({@code size >> 1000}).</i>
067 *
068 * @see <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia: Quantile</a>
069 *
070 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
071 * @since 1.0
072 * @version 3.7
073 */
074 public class Quantile implements DoubleConsumer {
075
076 private long _samples = 0;
077
078 // The desired quantile.
079 private final double _quantile;
080
081 // Marker heights.
082 private final double[] _q = {0, 0, 0, 0, 0};
083
084 // Marker positions.
085 private final double[] _n = {0, 0, 0, 0, 0};
086
087 // Desired marker positions.
088 private final double[] _nn = {0, 0, 0};
089
090 // Desired marker position increments.
091 private final double[] _dn = {0, 0, 0};
092
093 private boolean _initialized;
094
095 /**
096 * Create a new quantile accumulator with the given value.
097 *
098 * @param quantile the wished quantile value.
099 * @throws IllegalArgumentException if the {@code quantile} is not in the
100 * range {@code [0, 1]}.
101 */
102 public Quantile(final double quantile) {
103 _quantile = quantile;
104 init(quantile);
105 }
106
107 private void init(final double quantile) {
108 check(quantile);
109
110 Arrays.fill(_q, 0);
111 Arrays.fill(_n, 0);
112 Arrays.fill(_nn, 0);
113 Arrays.fill(_dn, 0);
114
115 _n[0] = -1.0;
116 _q[2] = 0.0;
117 _initialized =
118 compare(quantile, 0.0) == 0 ||
119 compare(quantile, 1.0) == 0;
120
121 _samples = 0;
122 }
123
124 private static void check(final double quantile) {
125 if (quantile < 0.0 || quantile > 1) {
126 throw new IllegalArgumentException(format(
127 "Quantile (%s) not in the valid range of [0, 1]", quantile
128 ));
129 }
130 }
131
132 /**
133 * Reset this object to its initial state.
134 */
135 public void reset() {
136 init(_quantile);
137 }
138
139 /**
140 * Return the <em>quantile</em> {@code this} object has been parametrized
141 * with.
142 *
143 * @since 3.1
144 *
145 * @return the <em>quantile</em> {@code this} object has been parametrized
146 * with
147 */
148 public double getQuantile() {
149 return _quantile;
150 }
151
152 /**
153 * Return the computed quantile value.
154 *
155 * @return the quantile value.
156 */
157 public double getValue() {
158 return _q[2];
159 }
160
161 /**
162 * Return the number of samples the quantile value was calculated of.
163 *
164 *
165 * @return the number of samples the quantile value was calculated of
166 */
167 public long getSamples() {
168 return _samples;
169 }
170
171 @Override
172 public void accept(final double value) {
173 if (!_initialized) {
174 initialize(value);
175 } else {
176 update(value);
177 }
178
179 ++_samples;
180 }
181
182 /**
183 * Combine two {@code Quantile} objects.
184 *
185 * @since 3.1
186 *
187 * @param other the other {@code Quantile} object to combine
188 * @return {@code this}
189 * @throws java.lang.NullPointerException if the {@code other} object is
190 * {@code null}.
191 * @throws java.lang.IllegalArgumentException if the {@link #getQuantile}
192 * of the {@code other} object differs from {@code this} one.
193 */
194 public Quantile combine(final Quantile other) {
195 if (_quantile != other._quantile) {
196 throw new IllegalArgumentException(format(
197 "Can't perform combine, the quantile are not equal: %s != %s",
198 _quantile, other._quantile
199 ));
200 }
201
202 _samples += other._samples;
203
204 if (_quantile == 0.0) {
205 _q[2] = Math.min(_q[2], other._q[2]);
206 } else if (_quantile == 1.0) {
207 _q[2] = Math.max(_q[2], other._q[2]);
208 } else {
209 // Combine the marker positions.
210 _n[1] += other._n[1];
211 _n[2] += other._n[2];
212 _n[3] += other._n[3];
213 _n[4] += other._n[4];
214
215 // Combine the marker height.
216 _q[0] = Math.min(_q[0], other._q[0]);
217 _q[1] = (_q[1] + other._q[1])*0.5;
218 _q[2] = (_q[2] + other._q[2])*0.5;
219 _q[3] = (_q[3] + other._q[3])*0.5;
220 _q[4] = Math.max(_q[4], other._q[4]);
221
222 // Combine position of markers.
223 _nn[0] += other._nn[0];
224 _nn[1] += other._nn[1];
225 _nn[2] += other._nn[2];
226
227 adjustMarkerHeights();
228 }
229
230 return this;
231 }
232
233 private void initialize(double value) {
234 if (_n[0] < 0.0) {
235 _n[0] = 0.0;
236 _q[0] = value;
237 } else if (_n[1] == 0.0) {
238 _n[1] = 1.0;
239 _q[1] = value;
240 } else if (_n[2] == 0.0) {
241 _n[2] = 2.0;
242 _q[2] = value;
243 } else if (_n[3] == 0.0) {
244 _n[3] = 3.0;
245 _q[3] = value;
246 } else if (_n[4] == 0.0) {
247 _n[4] = 4.0;
248 _q[4] = value;
249 }
250
251 if (_n[4] != 0.0) {
252 Arrays.sort(_q);
253
254 _nn[0] = 2.0*_quantile;
255 _nn[1] = 4.0*_quantile;
256 _nn[2] = 2.0*_quantile + 2.0;
257
258 _dn[0] = _quantile/2.0;
259 _dn[1] = _quantile;
260 _dn[2] = (1.0 + _quantile)/2.0;
261
262 _initialized = true;
263 }
264 }
265
266 private void update(double value) {
267 assert _initialized;
268
269 // If min or max, handle as special case; otherwise, ...
270 if (_quantile == 0.0) {
271 if (value < _q[2]) {
272 _q[2] = value;
273 }
274 } else if (_quantile == 1.0) {
275 if (value > _q[2]) {
276 _q[2] = value;
277 }
278 } else {
279 // Increment marker locations and update min and max.
280 if (value < _q[0]) {
281 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4]; _q[0] = value;
282 } else if (value < _q[1]) {
283 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4];
284 } else if (value < _q[2]) {
285 ++_n[2]; ++_n[3]; ++_n[4];
286 } else if (value < _q[3]) {
287 ++_n[3]; ++_n[4];
288 } else if (value < _q[4]) {
289 ++_n[4];
290 } else {
291 ++_n[4]; _q[4] = value;
292 }
293
294 // Increment positions of markers k + 1
295 _nn[0] += _dn[0];
296 _nn[1] += _dn[1];
297 _nn[2] += _dn[2];
298
299 adjustMarkerHeights();
300 }
301 }
302
303 // Adjust heights of markers 0 to 2 if necessary
304 private void adjustMarkerHeights() {
305 double mm = _n[1] - 1.0;
306 double mp = _n[1] + 1.0;
307 if (_nn[0] >= mp && _n[2] > mp) {
308 _q[1] = qPlus(mp, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
309 _n[1] = mp;
310 } else if (_nn[0] <= mm && _n[0] < mm) {
311 _q[1] = qMinus(mm, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
312 _n[1] = mm;
313 }
314
315 mm = _n[2] - 1.0;
316 mp = _n[2] + 1.0;
317 if (_nn[1] >= mp && _n[3] > mp) {
318 _q[2] = qPlus(mp, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
319 _n[2] = mp;
320 } else if (_nn[1] <= mm && _n[1] < mm) {
321 _q[2] = qMinus(mm, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
322 _n[2] = mm;
323 }
324
325 mm = _n[3] - 1.0;
326 mp = _n[3] + 1.0;
327 if (_nn[2] >= mp && _n[4] > mp) {
328 _q[3] = qPlus(mp, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
329 _n[3] = mp;
330 } else if (_nn[2] <= mm && _n[2] < mm) {
331 _q[3] = qMinus(mm, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
332 _n[3] = mm;
333 }
334 }
335
336 private static double qPlus(
337 final double mp,
338 final double m0,
339 final double m1,
340 final double m2,
341 final double q0,
342 final double q1,
343 final double q2
344 ) {
345 double result = q1 +
346 ((mp - m0)*(q2 - q1)/(m2 - m1) +
347 (m2 - mp)*(q1 - q0)/(m1 - m0))/(m2 - m0);
348
349 if (result > q2) {
350 result = q1 + (q2 - q1)/(m2 - m1);
351 }
352
353 return result;
354 }
355
356 private static double qMinus(
357 final double mm,
358 final double m0,
359 final double m1,
360 final double m2,
361 final double q0,
362 final double q1,
363 final double q2
364 ) {
365 double result = q1 -
366 ((mm - m0)*(q2 - q1)/(m2 - m1) +
367 (m2 - mm)*(q1 - q0)/(m1 - m0))/(m2 - m0);
368
369 if (q0 > result) {
370 result = q1 + (q0 - q1)/(m0 - m1);
371 }
372
373 return result;
374 }
375
376 /**
377 * Compares the state of two {@code Quantile} objects. This is
378 * a replacement for the {@link #equals(Object)} which is not advisable to
379 * implement for this mutable object. If two object have the same state, it
380 * has still the same state when updated with the same value.
381 * <pre>{@code
382 * final Quantile q1 = ...;
383 * final Quantile q2 = ...;
384 *
385 * if (q1.sameState(q2)) {
386 * final double value = random.nextDouble();
387 * q1.accept(value);
388 * q2.accept(value);
389 *
390 * assert q1.sameState(q2);
391 * assert q2.sameState(q1);
392 * assert q1.sameState(q1);
393 * }
394 * }</pre>
395 *
396 * @since 3.7
397 *
398 * @param other the other object for the test
399 * @return {@code true} the {@code this} and the {@code other} objects have
400 * the same state, {@code false} otherwise
401 */
402 public boolean sameState(final Quantile other) {
403 return Objects.equals(_quantile, other._quantile) &&
404 Arrays.equals(_dn, other._dn) &&
405 Arrays.equals(_n, other._n) &&
406 Arrays.equals(_nn, other._nn) &&
407 Arrays.equals(_q, other._q);
408 }
409
410 @Override
411 public String toString() {
412 return format(
413 "%s[samples=%d, quantile=%f]",
414 getClass().getSimpleName(), getSamples(), getValue()
415 );
416 }
417
418
419 static Quantile median() {
420 return new Quantile(0.5);
421 }
422
423 /**
424 * Return a {@code Collector} which applies an double-producing mapping
425 * function to each input element, and returns quantiles for the resulting
426 * values.
427 *
428 * <pre>{@code
429 * final Stream<SomeObject> stream = ...
430 * final Quantile quantile = stream
431 * .collect(toQuantile(0.25, v -> v.doubleValue()));
432 * }</pre>
433 *
434 * @param quantile the wished quantile value.
435 * @param mapper a mapping function to apply to each element
436 * @param <T> the type of the input elements
437 * @return a {@code Collector} implementing the quantiles reduction
438 * @throws java.lang.NullPointerException if the given {@code mapper} is
439 * {@code null}
440 * @throws IllegalArgumentException if the {@code quantile} is not in the
441 * range {@code [0, 1]}.
442 */
443 public static <T> Collector<T, ?, Quantile> toQuantile(
444 final double quantile,
445 final ToDoubleFunction<? super T> mapper
446 ) {
447 check(quantile);
448 requireNonNull(mapper);
449
450 return Collector.of(
451 () -> new Quantile(quantile),
452 (r, t) -> r.accept(mapper.applyAsDouble(t)),
453 Quantile::combine
454 );
455 }
456
457 /**
458 * Return a {@code Collector} which applies an double-producing mapping
459 * function to each input element, and returns the median for the resulting
460 * values.
461 *
462 * <pre>{@code
463 * final Stream<SomeObject> stream = ...
464 * final Quantile median = stream.collect(toMedian(v -> v.doubleValue()));
465 * }</pre>
466 *
467 * @param mapper a mapping function to apply to each element
468 * @param <T> the type of the input elements
469 * @return a {@code Collector} implementing the quantiles reduction
470 * @throws java.lang.NullPointerException if the given {@code mapper} is
471 * {@code null}
472 */
473 public static <T> Collector<T, ?, Quantile> toMedian(
474 final ToDoubleFunction<? super T> mapper
475 ) {
476 requireNonNull(mapper);
477
478 return Collector.of(
479 Quantile::median,
480 (r, t) -> r.accept(mapper.applyAsDouble(t)),
481 Quantile::combine
482 );
483 }
484
485 }
|