001 /*
002 * Java Genetic Algorithm Library (jenetics-3.5.0).
003 * Copyright (c) 2007-2016 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
019 */
020 package org.jenetics.stat;
021
022 import static java.lang.Double.compare;
023 import static java.lang.String.format;
024 import static java.util.Objects.requireNonNull;
025 import static org.jenetics.internal.util.Equality.eq;
026
027 import java.util.Arrays;
028 import java.util.function.DoubleConsumer;
029 import java.util.function.ToDoubleFunction;
030 import java.util.stream.Collector;
031
032 import org.jenetics.internal.util.Hash;
033
034 /**
035 * Implementation of the quantile estimation algorithm published by
036 * <p>
037 * <strong>Raj JAIN and Imrich CHLAMTAC</strong>:
038 * <em>
039 * The P<sup>2</sup> Algorithm for Dynamic Calculation of Quantiles and
040 * Histograms Without Storing Observations
041 * </em>
042 * <br>
043 * [<a href="http://www.cse.wustl.edu/~jain/papers/ftp/psqr.pdf">Communications
044 * of the ACM; October 1985, Volume 28, Number 10</a>]
045 * <p>
046 * This class is designed to work with (though does not require) streams. For
047 * example, you can compute the quantile with:
048 * <pre>{@code
049 * final DoubleStream stream = ...
050 * final Quantile quantile = stream.collect(
051 * () -> new Quantile(0.23),
052 * Quantile::accept,
053 * Quantile::combine
054 * );
055 * }</pre>
056 *
057 * <p>
058 * <b>Implementation note:</b>
059 * <i>This implementation is not thread safe. However, it is safe to use on a
060 * parallel stream, because the parallel implementation of
061 * {@link java.util.stream.Stream#collect Stream.collect()}provides the
062 * necessary partitioning, isolation, and merging of results for safe and
063 * efficient parallel execution.</i>
064 * <br>
065 * <i>Using this class in the {@code collect} method of an parallel stream can
066 * lead to an reduced accuracy of the quantile value. Since this implementation
067 * is an estimation algorithm, combining the estimations will only work for
068 * large streams ({@code size >> 1000}).</i>
069 *
070 * @see <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia: Quantile</a>
071 *
072 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
073 * @since 1.0
074 * @version 3.1
075 */
076 public class Quantile implements DoubleConsumer {
077
078 private long _samples = 0;
079
080 // The desired quantile.
081 private final double _quantile;
082
083 // Marker heights.
084 private final double[] _q = {0, 0, 0, 0, 0};
085
086 // Marker positions.
087 private final double[] _n = {0, 0, 0, 0, 0};
088
089 // Desired marker positions.
090 private final double[] _nn = {0, 0, 0};
091
092 // Desired marker position increments.
093 private final double[] _dn = {0, 0, 0};
094
095 private boolean _initialized;
096
097 /**
098 * Create a new quantile accumulator with the given value.
099 *
100 * @param quantile the wished quantile value.
101 * @throws IllegalArgumentException if the {@code quantile} is not in the
102 * range {@code [0, 1]}.
103 */
104 public Quantile(final double quantile) {
105 _quantile = quantile;
106 init(quantile);
107 }
108
109 private void init(final double quantile) {
110 check(quantile);
111
112 Arrays.fill(_q, 0);
113 Arrays.fill(_n, 0);
114 Arrays.fill(_nn, 0);
115 Arrays.fill(_dn, 0);
116
117 _n[0] = -1.0;
118 _q[2] = 0.0;
119 _initialized =
120 compare(quantile, 0.0) == 0 ||
121 compare(quantile, 1.0) == 0;
122
123 _samples = 0;
124 }
125
126 private static void check(final double quantile) {
127 if (quantile < 0.0 || quantile > 1) {
128 throw new IllegalArgumentException(format(
129 "Quantile (%s) not in the valid range of [0, 1]", quantile
130 ));
131 }
132 }
133
134 /**
135 * Reset this object to its initial state.
136 */
137 public void reset() {
138 init(_quantile);
139 }
140
141 /**
142 * Return the <em>quantile</em> {@code this} object has been parametrized
143 * with.
144 *
145 * @since 3.1
146 *
147 * @return the <em>quantile</em> {@code this} object has been parametrized
148 * with
149 */
150 public double getQuantile() {
151 return _quantile;
152 }
153
154 /**
155 * Return the computed quantile value.
156 *
157 * @return the quantile value.
158 */
159 public double getValue() {
160 return _q[2];
161 }
162
163 /**
164 * Return the number of samples the quantile value was calculated of.
165 *
166 *
167 * @return the number of samples the quantile value was calculated of
168 */
169 public long getSamples() {
170 return _samples;
171 }
172
173 @Override
174 public void accept(final double value) {
175 if (!_initialized) {
176 initialize(value);
177 } else {
178 update(value);
179 }
180
181 ++_samples;
182 }
183
184 /**
185 * Combine two {@code Quantile} objects.
186 *
187 * @since 3.1
188 *
189 * @param other the other {@code Quantile} object to combine
190 * @return {@code this}
191 * @throws java.lang.NullPointerException if the {@code other} object is
192 * {@code null}.
193 * @throws java.lang.IllegalArgumentException if the {@link #getQuantile}
194 * of the {@code other} object differs from {@code this} one.
195 */
196 public Quantile combine(final Quantile other) {
197 if (_quantile != other._quantile) {
198 throw new IllegalArgumentException(format(
199 "Can't perform combine, the quantile are not equal: %s != %s",
200 _quantile, other._quantile
201 ));
202 }
203
204 _samples += other._samples;
205
206 if (_quantile == 0.0) {
207 _q[2] = Math.min(_q[2], other._q[2]);
208 } else if (_quantile == 1.0) {
209 _q[2] = Math.max(_q[2], other._q[2]);
210 } else {
211 // Combine the marker positions.
212 _n[1] += other._n[1];
213 _n[2] += other._n[2];
214 _n[3] += other._n[3];
215 _n[4] += other._n[4];
216
217 // Combine the marker height.
218 _q[0] = Math.min(_q[0], other._q[0]);
219 _q[1] = (_q[1] + other._q[1])*0.5;
220 _q[2] = (_q[2] + other._q[2])*0.5;
221 _q[3] = (_q[3] + other._q[3])*0.5;
222 _q[4] = Math.max(_q[4], other._q[4]);
223
224 // Combine position of markers.
225 _nn[0] += other._nn[0];
226 _nn[1] += other._nn[1];
227 _nn[2] += other._nn[2];
228
229 adjustMarkerHeights();
230 }
231
232 return this;
233 }
234
235 private void initialize(double value) {
236 if (_n[0] < 0.0) {
237 _n[0] = 0.0;
238 _q[0] = value;
239 } else if (_n[1] == 0.0) {
240 _n[1] = 1.0;
241 _q[1] = value;
242 } else if (_n[2] == 0.0) {
243 _n[2] = 2.0;
244 _q[2] = value;
245 } else if (_n[3] == 0.0) {
246 _n[3] = 3.0;
247 _q[3] = value;
248 } else if (_n[4] == 0.0) {
249 _n[4] = 4.0;
250 _q[4] = value;
251 }
252
253 if (_n[4] != 0.0) {
254 Arrays.sort(_q);
255
256 _nn[0] = 2.0*_quantile;
257 _nn[1] = 4.0*_quantile;
258 _nn[2] = 2.0*_quantile + 2.0;
259
260 _dn[0] = _quantile/2.0;
261 _dn[1] = _quantile;
262 _dn[2] = (1.0 + _quantile)/2.0;
263
264 _initialized = true;
265 }
266 }
267
268 private void update(double value) {
269 assert _initialized;
270
271 // If min or max, handle as special case; otherwise, ...
272 if (_quantile == 0.0) {
273 if (value < _q[2]) {
274 _q[2] = value;
275 }
276 } else if (_quantile == 1.0) {
277 if (value > _q[2]) {
278 _q[2] = value;
279 }
280 } else {
281 // Increment marker locations and update min and max.
282 if (value < _q[0]) {
283 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4]; _q[0] = value;
284 } else if (value < _q[1]) {
285 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4];
286 } else if (value < _q[2]) {
287 ++_n[2]; ++_n[3]; ++_n[4];
288 } else if (value < _q[3]) {
289 ++_n[3]; ++_n[4];
290 } else if (value < _q[4]) {
291 ++_n[4];
292 } else {
293 ++_n[4]; _q[4] = value;
294 }
295
296 // Increment positions of markers k + 1
297 _nn[0] += _dn[0];
298 _nn[1] += _dn[1];
299 _nn[2] += _dn[2];
300
301 adjustMarkerHeights();
302 }
303 }
304
305 // Adjust heights of markers 0 to 2 if necessary
306 private void adjustMarkerHeights() {
307 double mm = _n[1] - 1.0;
308 double mp = _n[1] + 1.0;
309 if (_nn[0] >= mp && _n[2] > mp) {
310 _q[1] = qPlus(mp, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
311 _n[1] = mp;
312 } else if (_nn[0] <= mm && _n[0] < mm) {
313 _q[1] = qMinus(mm, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]);
314 _n[1] = mm;
315 }
316
317 mm = _n[2] - 1.0;
318 mp = _n[2] + 1.0;
319 if (_nn[1] >= mp && _n[3] > mp) {
320 _q[2] = qPlus(mp, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
321 _n[2] = mp;
322 } else if (_nn[1] <= mm && _n[1] < mm) {
323 _q[2] = qMinus(mm, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]);
324 _n[2] = mm;
325 }
326
327 mm = _n[3] - 1.0;
328 mp = _n[3] + 1.0;
329 if (_nn[2] >= mp && _n[4] > mp) {
330 _q[3] = qPlus(mp, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
331 _n[3] = mp;
332 } else if (_nn[2] <= mm && _n[2] < mm) {
333 _q[3] = qMinus(mm, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]);
334 _n[3] = mm;
335 }
336 }
337
338 private static double qPlus(
339 final double mp,
340 final double m0,
341 final double m1,
342 final double m2,
343 final double q0,
344 final double q1,
345 final double q2
346 ) {
347 double result = q1 +
348 ((mp - m0)*(q2 - q1)/(m2 - m1) +
349 (m2 - mp)*(q1 - q0)/(m1 - m0))/(m2 - m0);
350
351 if (result > q2) {
352 result = q1 + (q2 - q1)/(m2 - m1);
353 }
354
355 return result;
356 }
357
358 private static double qMinus(
359 final double mm,
360 final double m0,
361 final double m1,
362 final double m2,
363 final double q0,
364 final double q1,
365 final double q2
366 ) {
367 double result = q1 -
368 ((mm - m0)*(q2 - q1)/(m2 - m1) +
369 (m2 - mm)*(q1 - q0)/(m1 - m0))/(m2 - m0);
370
371 if (q0 > result) {
372 result = q1 + (q0 - q1)/(m0 - m1);
373 }
374
375 return result;
376 }
377
378 @Override
379 public int hashCode() {
380 return Hash.of(getClass()).
381 and(super.hashCode()).
382 and(_quantile).
383 and(_dn).
384 and(_n).
385 and(_nn).
386 and(_q).value();
387 }
388
389 @Override
390 public boolean equals(final Object obj) {
391 return obj instanceof Quantile &&
392 eq(_quantile, ((Quantile)obj)._quantile) &&
393 eq(_dn, ((Quantile)obj)._dn) &&
394 eq(_n, ((Quantile)obj)._n) &&
395 eq(_nn, ((Quantile)obj)._nn) &&
396 eq(_q, ((Quantile)obj)._q) &&
397 super.equals(obj);
398 }
399
400 @Override
401 public String toString() {
402 return format(
403 "%s[samples=%d, quantile=%f]",
404 getClass().getSimpleName(), getSamples(), getValue()
405 );
406 }
407
408
409 static Quantile median() {
410 return new Quantile(0.5);
411 }
412
413 /**
414 * Return a {@code Collector} which applies an double-producing mapping
415 * function to each input element, and returns quantiles for the resulting
416 * values.
417 *
418 * <pre>{@code
419 * final Stream<SomeObject> stream = ...
420 * final Quantile quantile = stream
421 * .collect(toQuantile(0.25, v -> v.doubleValue()));
422 * }</pre>
423 *
424 * @param quantile the wished quantile value.
425 * @param mapper a mapping function to apply to each element
426 * @param <T> the type of the input elements
427 * @return a {@code Collector} implementing the quantiles reduction
428 * @throws java.lang.NullPointerException if the given {@code mapper} is
429 * {@code null}
430 * @throws IllegalArgumentException if the {@code quantile} is not in the
431 * range {@code [0, 1]}.
432 */
433 public static <T> Collector<T, ?, Quantile> toQuantile(
434 final double quantile,
435 final ToDoubleFunction<? super T> mapper
436 ) {
437 check(quantile);
438 requireNonNull(mapper);
439
440 return Collector.of(
441 () -> new Quantile(quantile),
442 (r, t) -> r.accept(mapper.applyAsDouble(t)),
443 Quantile::combine
444 );
445 }
446
447 /**
448 * Return a {@code Collector} which applies an double-producing mapping
449 * function to each input element, and returns the median for the resulting
450 * values.
451 *
452 * <pre>{@code
453 * final Stream<SomeObject> stream = ...
454 * final Quantile median = stream.collect(toMedian(v -> v.doubleValue()));
455 * }</pre>
456 *
457 * @param mapper a mapping function to apply to each element
458 * @param <T> the type of the input elements
459 * @return a {@code Collector} implementing the quantiles reduction
460 * @throws java.lang.NullPointerException if the given {@code mapper} is
461 * {@code null}
462 */
463 public static <T> Collector<T, ?, Quantile> toMedian(
464 final ToDoubleFunction<? super T> mapper
465 ) {
466 requireNonNull(mapper);
467
468 return Collector.of(
469 Quantile::median,
470 (r, t) -> r.accept(mapper.applyAsDouble(t)),
471 Quantile::combine
472 );
473 }
474
475 }
|