blob: e25df50435b0896a96b75db98932bcb366480514 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2006 IBM Corporation and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.test.internal.performance.eval;
import org.eclipse.test.internal.performance.db.TimeSeries;
/**
* Utility methods for statistics.
*
* @since 3.2
*/
public final class StatisticsUtil {
/**
* Percentile constants class.
*
* @since 3.2
*/
public static final class Percentile {
final int fColumn;
private final double fInside;
private Percentile(int column, double inside) {
fColumn = column;
fInside = inside;
}
/**
* Returns how much is within the percentile, [0, 1]
*
* @return the inside quotient
*/
public double inside() {
return fInside;
}
}
public static final Percentile T90 = new Percentile(0, 0.9);
public static final Percentile T95 = new Percentile(1, 0.95);
public static final Percentile T97_5 = new Percentile(2, 0.975);
public static final Percentile T99 = new Percentile(3, 0.99);
/**
* Returns the student's t value from the two-tailed t-table. For a degree-of-freedom larger than 100, the value for 100 is
* returned.
*
* @param df
* the degrees of freedom (usually sample size - 1)
* @param percentile
* the percentile
* @return the corresponding student's t value
*/
public static double getStudentsT(int df, Percentile percentile) {
if (df < 0)
df = 0;
else if (df > 100)
df = 100;
return T[df][percentile.fColumn];
}
/**
* Returns <code>true</code> if the mean of two data sets is significantly different, such that the probability that they are
* from the same population is lower than <code>percentile</code>, <code>false</code> otherwise. The data sets are taken from
* <code>series</code> at <code>index1</code> and <code>index2</code>.
* <p>
* Note that no conclusion must be drawn from a <code>false</code> return value: it does not indicate that the two data sets are
* from the same population - there may simply be not enough data to conclude the other way, for example due to a small sample
* size or large standard deviation.
* </p>
* <p>
* Also note that a <code>true</code> return value does not say anything about the relevance of the difference - a statistically
* significant difference may be practically irrelevant if it is small.
* </p>
* <p>
* XXX the current implementation assumes that the standard deviations are sufficiently similar.
* </p>
*
* @param refSeries
* the time series containing the first data set
* @param index1
* the index into <code>series1</code> for the first data set
* @param testSeries
* the time series containing the second data set
* @param index2
* the index into <code>series2</code> for the second data set
* @param percentile
* the percentile level to use
* @return <code>true</code> if the null hypothesis is rejected on the <code>percentile</code> level, <code>false</code> if it
* cannot be rejected based on the given data
*/
public static double[] statisticsForTimeSeries(TimeSeries refSeries, int index1, TimeSeries testSeries, int index2,
Percentile percentile) {
// see http://bmj.bmjjournals.com/collections/statsbk/7.shtml
double[] values = new double[] { refSeries.getValue(index1), testSeries.getValue(index2) };
long[] counts = new long[] { refSeries.getCount(index1), testSeries.getCount(index2) };
double[] stddevs = new double[] { refSeries.getStddev(index1), testSeries.getStddev(index2) };
double ttest = studentTtest(values, stddevs, counts, percentile);
return new double[] { getStudentsT((int) (counts[0] + counts[1] - 2), percentile), ttest,
standardError(values, stddevs, counts), deviation(values), };
}
public static double studentTtest(double[] values, double[] stddevs, long[] counts, Percentile percentile) {
double ref = values[0];
double val = values[1];
double delta = ref - val;
long df1 = counts[0] - 1;
double sd1 = stddevs[0];
long df2 = counts[1];
double sd2 = stddevs[1];
// TODO if the stdev's are not sufficiently similar, we have to take a different approach
if (!Double.isNaN(sd1) && !Double.isNaN(sd2) && df1 > 0 && df2 > 0) {
long df = df1 + df2;
double sp_square = (df1 * sd1 * sd1 + df2 * sd2 * sd2) / df;
double se_diff = Math.sqrt(sp_square * (1.0 / (df1 + 1) + 1.0 / (df2 + 1)));
return Math.abs(delta / se_diff);
}
return -1;
}
public static double deviation(double[] values) {
return (values[1] - values[0]) / values[0];
}
public static double standardError(double[] values, double[] stddevs, long[] counts) {
return Math.sqrt((stddevs[0] * stddevs[0] / counts[0]) + (stddevs[1] * stddevs[1] / counts[1])) / values[0];
}
/**
* The (two-tailed) T-table. [degrees_of_freedom][percentile]
*/
private static final double[][] T = { { Double.NaN, Double.NaN, Double.NaN, Double.NaN },
{ Double.NaN, Double.NaN, Double.NaN, Double.NaN }, { 2.92, 4.3027, 6.2054, 9.925 },
{ 2.3534, 3.1824, 4.1765, 5.8408 }, { 2.1318, 2.7765, 3.4954, 4.6041 }, { 2.015, 2.5706, 3.1634, 4.0321 },
{ 1.9432, 2.4469, 2.9687, 3.7074 }, { 1.8946, 2.3646, 2.8412, 3.4995 }, { 1.8595, 2.306, 2.7515, 3.3554 },
{ 1.8331, 2.2622, 2.685, 3.2498 }, { 1.8125, 2.2281, 2.6338, 3.1693 }, { 1.7959, 2.201, 2.5931, 3.1058 },
{ 1.7823, 2.1788, 2.56, 3.0545 }, { 1.7709, 2.1604, 2.5326, 3.0123 }, { 1.7613, 2.1448, 2.5096, 2.9768 },
{ 1.7531, 2.1315, 2.4899, 2.9467 }, { 1.7459, 2.1199, 2.4729, 2.9208 }, { 1.7396, 2.1098, 2.4581, 2.8982 },
{ 1.7341, 2.1009, 2.445, 2.8784 }, { 1.7291, 2.093, 2.4334, 2.8609 }, { 1.7247, 2.086, 2.4231, 2.8453 },
{ 1.7207, 2.0796, 2.4138, 2.8314 }, { 1.7171, 2.0739, 2.4055, 2.8188 }, { 1.7139, 2.0687, 2.3979, 2.8073 },
{ 1.7109, 2.0639, 2.391, 2.797 }, { 1.7081, 2.0595, 2.3846, 2.7874 }, { 1.7056, 2.0555, 2.3788, 2.7787 },
{ 1.7033, 2.0518, 2.3734, 2.7707 }, { 1.7011, 2.0484, 2.3685, 2.7633 }, { 1.6991, 2.0452, 2.3638, 2.7564 },
{ 1.6973, 2.0423, 2.3596, 2.75 }, { 1.6955, 2.0395, 2.3556, 2.744 }, { 1.6939, 2.0369, 2.3518, 2.7385 },
{ 1.6924, 2.0345, 2.3483, 2.7333 }, { 1.6909, 2.0322, 2.3451, 2.7284 }, { 1.6896, 2.0301, 2.342, 2.7238 },
{ 1.6883, 2.0281, 2.3391, 2.7195 }, { 1.6871, 2.0262, 2.3363, 2.7154 }, { 1.686, 2.0244, 2.3337, 2.7116 },
{ 1.6849, 2.0227, 2.3313, 2.7079 }, { 1.6839, 2.0211, 2.3289, 2.7045 }, { 1.6829, 2.0195, 2.3267, 2.7012 },
{ 1.682, 2.0181, 2.3246, 2.6981 }, { 1.6811, 2.0167, 2.3226, 2.6951 }, { 1.6802, 2.0154, 2.3207, 2.6923 },
{ 1.6794, 2.0141, 2.3189, 2.6896 }, { 1.6787, 2.0129, 2.3172, 2.687 }, { 1.6779, 2.0117, 2.3155, 2.6846 },
{ 1.6772, 2.0106, 2.3139, 2.6822 }, { 1.6766, 2.0096, 2.3124, 2.68 }, { 1.6759, 2.0086, 2.3109, 2.6778 },
{ 1.6753, 2.0076, 2.3095, 2.6757 }, { 1.6747, 2.0066, 2.3082, 2.6737 }, { 1.6741, 2.0057, 2.3069, 2.6718 },
{ 1.6736, 2.0049, 2.3056, 2.67 }, { 1.673, 2.004, 2.3044, 2.6682 }, { 1.6725, 2.0032, 2.3033, 2.6665 },
{ 1.672, 2.0025, 2.3022, 2.6649 }, { 1.6716, 2.0017, 2.3011, 2.6633 }, { 1.6711, 2.001, 2.3, 2.6618 },
{ 1.6706, 2.0003, 2.299, 2.6603 }, { 1.6702, 1.9996, 2.2981, 2.6589 }, { 1.6698, 1.999, 2.2971, 2.6575 },
{ 1.6694, 1.9983, 2.2962, 2.6561 }, { 1.669, 1.9977, 2.2954, 2.6549 }, { 1.6686, 1.9971, 2.2945, 2.6536 },
{ 1.6683, 1.9966, 2.2937, 2.6524 }, { 1.6679, 1.996, 2.2929, 2.6512 }, { 1.6676, 1.9955, 2.2921, 2.6501 },
{ 1.6672, 1.9949, 2.2914, 2.649 }, { 1.6669, 1.9944, 2.2906, 2.6479 }, { 1.6666, 1.9939, 2.2899, 2.6469 },
{ 1.6663, 1.9935, 2.2892, 2.6458 }, { 1.666, 1.993, 2.2886, 2.6449 }, { 1.6657, 1.9925, 2.2879, 2.6439 },
{ 1.6654, 1.9921, 2.2873, 2.643 }, { 1.6652, 1.9917, 2.2867, 2.6421 }, { 1.6649, 1.9913, 2.2861, 2.6412 },
{ 1.6646, 1.9908, 2.2855, 2.6403 }, { 1.6644, 1.9905, 2.2849, 2.6395 }, { 1.6641, 1.9901, 2.2844, 2.6387 },
{ 1.6639, 1.9897, 2.2838, 2.6379 }, { 1.6636, 1.9893, 2.2833, 2.6371 }, { 1.6634, 1.989, 2.2828, 2.6364 },
{ 1.6632, 1.9886, 2.2823, 2.6356 }, { 1.663, 1.9883, 2.2818, 2.6349 }, { 1.6628, 1.9879, 2.2813, 2.6342 },
{ 1.6626, 1.9876, 2.2809, 2.6335 }, { 1.6624, 1.9873, 2.2804, 2.6329 }, { 1.6622, 1.987, 2.28, 2.6322 },
{ 1.662, 1.9867, 2.2795, 2.6316 }, { 1.6618, 1.9864, 2.2791, 2.6309 }, { 1.6616, 1.9861, 2.2787, 2.6303 },
{ 1.6614, 1.9858, 2.2783, 2.6297 }, { 1.6612, 1.9855, 2.2779, 2.6291 }, { 1.6611, 1.9852, 2.2775, 2.6286 },
{ 1.6609, 1.985, 2.2771, 2.628 }, { 1.6607, 1.9847, 2.2767, 2.6275 }, { 1.6606, 1.9845, 2.2764, 2.6269 },
{ 1.6604, 1.9842, 2.276, 2.6264 }, { 1.6602, 1.984, 2.2757, 2.6259 }, };
private StatisticsUtil() {
// don't instantiate
}
}