001/* 002 * Java Genetic Algorithm Library (jenetics-8.1.0). 003 * Copyright (c) 2007-2024 Franz Wilhelmstötter 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 * Author: 018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmail.com) 019 */ 020package io.jenetics.stat; 021 022import static java.lang.Double.compare; 023import static java.lang.String.format; 024import static java.util.Objects.requireNonNull; 025 026import java.util.Arrays; 027import java.util.Objects; 028import java.util.function.DoubleConsumer; 029import java.util.function.ToDoubleFunction; 030import java.util.stream.Collector; 031 032/** 033 * Implementation of the quantile estimation algorithm published by 034 * <p> 035 * <strong>Raj JAIN and Imrich CHLAMTAC</strong>: 036 * <em> 037 * The P<sup>2</sup> Algorithm for Dynamic Calculation of Quantiles and 038 * Histograms Without Storing Observations 039 * </em> 040 * <br> 041 * [<a href="http://www.cse.wustl.edu/~jain/papers/ftp/psqr.pdf">Communications 042 * of the ACM; October 1985, Volume 28, Number 10</a>] 043 * <p> 044 * This class is designed to work with (though does not require) streams. For 045 * example, you can compute the quantile with: 046 * {@snippet lang="java": 047 * final DoubleStream stream = null; // @replace substring='null' replacement="..." 048 * final Quantile quantile = stream.collect( 049 * () -> new Quantile(0.23), 050 * Quantile::accept, 051 * Quantile::combine 052 * ); 053 * } 054 * 055 * @implNote 056 * This implementation is not thread safe. However, it is safe to use on a 057 * parallel stream, because the parallel implementation of 058 * {@link java.util.stream.Stream#collect Stream.collect()} provides the 059 * necessary partitioning, isolation, and merging of results for safe and 060 * efficient parallel execution. 061 * Using this class in the {@code collect} method of a parallel stream can 062 * lead to a reduced accuracy of the quantile value. Since this implementation 063 * is an estimation algorithm, combining the estimations will only work for 064 * large streams ({@code size >> 1000}). 065 * 066 * @see <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia: Quantile</a> 067 * 068 * @author <a href="mailto:franz.wilhelmstoetter@gmail.com">Franz Wilhelmstötter</a> 069 * @since 1.0 070 * @version 6.0 071 */ 072public class Quantile implements DoubleConsumer { 073 074 private long _count = 0; 075 076 // The desired quantile. 077 private final double _quantile; 078 079 // Marker heights. 080 private final double[] _q = {0, 0, 0, 0, 0}; 081 082 // Marker positions. 083 private final double[] _n = {0, 0, 0, 0, 0}; 084 085 // Desired marker positions. 086 private final double[] _nn = {0, 0, 0}; 087 088 // Desired marker position increments. 089 private final double[] _dn = {0, 0, 0}; 090 091 private boolean _initialized; 092 093 /** 094 * Create a new quantile accumulator with the given value. 095 * 096 * @param quantile the wished quantile value. 097 * @throws IllegalArgumentException if the {@code quantile} is not in the 098 * range {@code [0, 1]}. 099 */ 100 public Quantile(final double quantile) { 101 _quantile = quantile; 102 init(quantile); 103 } 104 105 private void init(final double quantile) { 106 check(quantile); 107 108 Arrays.fill(_q, 0); 109 Arrays.fill(_n, 0); 110 Arrays.fill(_nn, 0); 111 Arrays.fill(_dn, 0); 112 113 _n[0] = -1.0; 114 _q[2] = 0.0; 115 _initialized = 116 compare(quantile, 0.0) == 0 || 117 compare(quantile, 1.0) == 0; 118 119 _count = 0; 120 } 121 122 private static void check(final double quantile) { 123 if (quantile < 0.0 || quantile > 1) { 124 throw new IllegalArgumentException(format( 125 "Quantile (%s) not in the valid range of [0, 1]", quantile 126 )); 127 } 128 } 129 130 /** 131 * Reset this object to its initial state. 132 */ 133 public void reset() { 134 init(_quantile); 135 } 136 137 /** 138 * Return the <em>quantile</em> {@code this} object has been parametrized 139 * with. 140 * 141 * @since 3.1 142 * 143 * @return the <em>quantile</em> {@code this} object has been parametrized 144 * with 145 */ 146 public double quantile() { 147 return _quantile; 148 } 149 150 /** 151 * Return the computed quantile value. 152 * 153 * @return the quantile value. 154 */ 155 public double value() { 156 return _q[2]; 157 } 158 159 /** 160 * Return the number of samples the quantile value was calculated of. 161 * 162 * 163 * @return the number of samples the quantile value was calculated of 164 */ 165 public long count() { 166 return _count; 167 } 168 169 @Override 170 public void accept(final double value) { 171 if (!_initialized) { 172 initialize(value); 173 } else { 174 update(value); 175 } 176 177 ++_count; 178 } 179 180 /** 181 * Combine two {@code Quantile} objects. 182 * 183 * @since 3.1 184 * 185 * @param other the other {@code Quantile} object to combine 186 * @return {@code this} 187 * @throws java.lang.NullPointerException if the {@code other} object is 188 * {@code null}. 189 * @throws java.lang.IllegalArgumentException if the {@link #quantile} 190 * of the {@code other} object differs from {@code this} one. 191 */ 192 public Quantile combine(final Quantile other) { 193 if (Double.compare(_quantile, other._quantile) != 0) { 194 throw new IllegalArgumentException(format( 195 "Can't perform combine, the quantile are not equal: %s != %s", 196 _quantile, other._quantile 197 )); 198 } 199 200 _count += other._count; 201 202 if (_quantile == 0.0) { 203 _q[2] = Math.min(_q[2], other._q[2]); 204 } else if (_quantile == 1.0) { 205 _q[2] = Math.max(_q[2], other._q[2]); 206 } else { 207 // Combine the marker positions. 208 _n[1] += other._n[1]; 209 _n[2] += other._n[2]; 210 _n[3] += other._n[3]; 211 _n[4] += other._n[4]; 212 213 // Combine the marker height. 214 _q[0] = Math.min(_q[0], other._q[0]); 215 _q[1] = (_q[1] + other._q[1])*0.5; 216 _q[2] = (_q[2] + other._q[2])*0.5; 217 _q[3] = (_q[3] + other._q[3])*0.5; 218 _q[4] = Math.max(_q[4], other._q[4]); 219 220 // Combine position of markers. 221 _nn[0] += other._nn[0]; 222 _nn[1] += other._nn[1]; 223 _nn[2] += other._nn[2]; 224 225 adjustMarkerHeights(); 226 } 227 228 return this; 229 } 230 231 private void initialize(double value) { 232 if (_n[0] < 0.0) { 233 _n[0] = 0.0; 234 _q[0] = value; 235 } else if (_n[1] == 0.0) { 236 _n[1] = 1.0; 237 _q[1] = value; 238 } else if (_n[2] == 0.0) { 239 _n[2] = 2.0; 240 _q[2] = value; 241 } else if (_n[3] == 0.0) { 242 _n[3] = 3.0; 243 _q[3] = value; 244 } else if (_n[4] == 0.0) { 245 _n[4] = 4.0; 246 _q[4] = value; 247 } 248 249 if (_n[4] != 0.0) { 250 Arrays.sort(_q); 251 252 _nn[0] = 2.0*_quantile; 253 _nn[1] = 4.0*_quantile; 254 _nn[2] = 2.0*_quantile + 2.0; 255 256 _dn[0] = _quantile/2.0; 257 _dn[1] = _quantile; 258 _dn[2] = (1.0 + _quantile)/2.0; 259 260 _initialized = true; 261 } 262 } 263 264 private void update(double value) { 265 assert _initialized; 266 267 // If min or max, handle as a special case; otherwise, ... 268 if (_quantile == 0.0) { 269 if (value < _q[2]) { 270 _q[2] = value; 271 } 272 } else if (_quantile == 1.0) { 273 if (value > _q[2]) { 274 _q[2] = value; 275 } 276 } else { 277 // Increment marker locations and update min and max. 278 if (value < _q[0]) { 279 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4]; _q[0] = value; 280 } else if (value < _q[1]) { 281 ++_n[1]; ++_n[2]; ++_n[3]; ++_n[4]; 282 } else if (value < _q[2]) { 283 ++_n[2]; ++_n[3]; ++_n[4]; 284 } else if (value < _q[3]) { 285 ++_n[3]; ++_n[4]; 286 } else if (value < _q[4]) { 287 ++_n[4]; 288 } else { 289 ++_n[4]; _q[4] = value; 290 } 291 292 // Increment positions of markers k + 1 293 _nn[0] += _dn[0]; 294 _nn[1] += _dn[1]; 295 _nn[2] += _dn[2]; 296 297 adjustMarkerHeights(); 298 } 299 } 300 301 // Adjust heights of markers 0 to 2 if necessary 302 private void adjustMarkerHeights() { 303 double mm = _n[1] - 1.0; 304 double mp = _n[1] + 1.0; 305 if (_nn[0] >= mp && _n[2] > mp) { 306 _q[1] = qPlus(mp, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]); 307 _n[1] = mp; 308 } else if (_nn[0] <= mm && _n[0] < mm) { 309 _q[1] = qMinus(mm, _n[0], _n[1], _n[2], _q[0], _q[1], _q[2]); 310 _n[1] = mm; 311 } 312 313 mm = _n[2] - 1.0; 314 mp = _n[2] + 1.0; 315 if (_nn[1] >= mp && _n[3] > mp) { 316 _q[2] = qPlus(mp, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]); 317 _n[2] = mp; 318 } else if (_nn[1] <= mm && _n[1] < mm) { 319 _q[2] = qMinus(mm, _n[1], _n[2], _n[3], _q[1], _q[2], _q[3]); 320 _n[2] = mm; 321 } 322 323 mm = _n[3] - 1.0; 324 mp = _n[3] + 1.0; 325 if (_nn[2] >= mp && _n[4] > mp) { 326 _q[3] = qPlus(mp, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]); 327 _n[3] = mp; 328 } else if (_nn[2] <= mm && _n[2] < mm) { 329 _q[3] = qMinus(mm, _n[2], _n[3], _n[4], _q[2], _q[3], _q[4]); 330 _n[3] = mm; 331 } 332 } 333 334 private static double qPlus( 335 final double mp, 336 final double m0, 337 final double m1, 338 final double m2, 339 final double q0, 340 final double q1, 341 final double q2 342 ) { 343 double result = 344 q1 + 345 ((mp - m0)*(q2 - q1)/(m2 - m1) + 346 (m2 - mp)*(q1 - q0)/(m1 - m0))/(m2 - m0); 347 348 if (result > q2) { 349 result = q1 + (q2 - q1)/(m2 - m1); 350 } 351 352 return result; 353 } 354 355 private static double qMinus( 356 final double mm, 357 final double m0, 358 final double m1, 359 final double m2, 360 final double q0, 361 final double q1, 362 final double q2 363 ) { 364 double result = 365 q1 - 366 ((mm - m0)*(q2 - q1)/(m2 - m1) + 367 (m2 - mm)*(q1 - q0)/(m1 - m0))/(m2 - m0); 368 369 if (q0 > result) { 370 result = q1 + (q0 - q1)/(m0 - m1); 371 } 372 373 return result; 374 } 375 376 /** 377 * Compares the state of two {@code Quantile} objects. This is 378 * a replacement for the {@link #equals(Object)} which is not advisable to 379 * implement for this mutable object. If two objects have the same state, it 380 * has still the same state when updated with the same value. 381 * {@snippet lang="java": 382 * final Quantile q1 = null; // @replace substring='null' replacement="..." 383 * final Quantile q2 = null; // @replace substring='null' replacement="..." 384 * 385 * if (q1.sameState(q2)) { 386 * final double value = random.nextDouble(); 387 * q1.accept(value); 388 * q2.accept(value); 389 * 390 * assert q1.sameState(q2); 391 * assert q2.sameState(q1); 392 * assert q1.sameState(q1); 393 * } 394 * } 395 * 396 * @since 3.7 397 * 398 * @param other the other object for the test 399 * @return {@code true} the {@code this} and the {@code other} objects have 400 * the same state, {@code false} otherwise 401 */ 402 public boolean sameState(final Quantile other) { 403 return Objects.equals(_quantile, other._quantile) && 404 Arrays.equals(_dn, other._dn) && 405 Arrays.equals(_n, other._n) && 406 Arrays.equals(_nn, other._nn) && 407 Arrays.equals(_q, other._q); 408 } 409 410 @Override 411 public String toString() { 412 return format( 413 "%s[samples=%d, quantile=%f]", 414 getClass().getSimpleName(), count(), value() 415 ); 416 } 417 418 419 static Quantile median() { 420 return new Quantile(0.5); 421 } 422 423 /** 424 * Return a {@code Collector} which applies a double-producing mapping 425 * function to each input element, and returns quantiles for the resulting 426 * values. 427 * {@snippet lang="java": 428 * final Stream<SomeObject> stream = null; // @replace substring='null' replacement="..." 429 * final Quantile quantile = stream 430 * .collect(toQuantile(0.25, v -> v.doubleValue())); 431 * } 432 * 433 * @param quantile the wished quantile value. 434 * @param mapper a mapping function to apply to each element 435 * @param <T> the type of the input elements 436 * @return a {@code Collector} implementing the quantile reduction 437 * @throws java.lang.NullPointerException if the given {@code mapper} is 438 * {@code null} 439 * @throws IllegalArgumentException if the {@code quantile} is not in the 440 * range {@code [0, 1]}. 441 */ 442 public static <T> Collector<T, ?, Quantile> toQuantile( 443 final double quantile, 444 final ToDoubleFunction<? super T> mapper 445 ) { 446 check(quantile); 447 requireNonNull(mapper); 448 449 return Collector.of( 450 () -> new Quantile(quantile), 451 (r, t) -> r.accept(mapper.applyAsDouble(t)), 452 Quantile::combine 453 ); 454 } 455 456 /** 457 * Return a {@code Collector} which applies a double-producing mapping 458 * function to each input element, and returns the median for the resulting 459 * values. 460 * {@snippet lang="java": 461 * final Stream<SomeObject> stream = null; // @replace substring='null' replacement="..." 462 * final Quantile median = stream.collect(toMedian(v -> v.doubleValue())); 463 * } 464 * 465 * @param mapper a mapping function to apply to each element 466 * @param <T> the type of the input elements 467 * @return a {@code Collector} implementing the quantile reduction 468 * @throws java.lang.NullPointerException if the given {@code mapper} is 469 * {@code null} 470 */ 471 public static <T> Collector<T, ?, Quantile> toMedian( 472 final ToDoubleFunction<? super T> mapper 473 ) { 474 requireNonNull(mapper); 475 476 return Collector.of( 477 Quantile::median, 478 (r, t) -> r.accept(mapper.applyAsDouble(t)), 479 Quantile::combine 480 ); 481 } 482 483}