blob: deffd92c0753e01466cc3d4befef9f0c2d575f25 [file] [log] [blame]
 Stats (The Eclipse January API Documentation)
org.eclipse.january.dataset

Class Stats

• public class Stats
extends Object
Statistics class TODO Where is mode? http://en.wikipedia.org/wiki/Mode_(statistics)
• Constructor Summary

Constructors
Constructor and Description
Stats()
• Method Summary

Methods
Modifier and Type Method and Description
static Object averageDeviation(Dataset a)
static Dataset covariance(Dataset a)
See covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof) with b = null, rowvar = true, bias = false and ddof = null.
static Dataset covariance(Dataset a, boolean rowvar, boolean bias, Integer ddof)
static Dataset covariance(Dataset a, Dataset b)
See covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof) with b = null, rowvar = true, bias = false and ddof = null.
static Dataset covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)
Calculate the covariance matrix (array) of a concatenated with b.
static Dataset cumulativeProduct(Dataset a, boolean... ignoreInvalids)
static Dataset cumulativeProduct(Dataset a, int axis, boolean... ignoreInvalids)
static Dataset cumulativeSum(Dataset a, boolean... ignoreInvalids)
static Dataset cumulativeSum(Dataset a, int axis, boolean... ignoreInvalids)
static Object iqr(Dataset a)
Interquartile range: Q3 - Q1
static Dataset iqr(Dataset a, int axis)
Interquartile range: Q3 - Q1
static Object kurtosis(Dataset a, boolean... ignoreInvalids)
static Dataset kurtosis(Dataset a, int axis, boolean... ignoreInvalids)
static Object median(Dataset a)
static Dataset median(Dataset a, int axis)
static double[] outlierValues(Dataset a, double lo, double hi, int length)
Calculate approximate outlier values.
static Object product(Dataset a, boolean... ignoreInvalids)
static Dataset product(Dataset a, int axis, boolean... ignoreInvalids)
static double[] quantile(Dataset a, double... values)
Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF)
static double quantile(Dataset a, double q)
Calculate quantile of dataset which is defined as the inverse of the cumulative distribution function (CDF)
static Dataset[] quantile(Dataset a, int axis, double... values)
Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF)
static double residual(Dataset a, Dataset b)
The residual is the sum of squared differences
static Object skewness(Dataset a, boolean... ignoreInvalids)
static Dataset skewness(Dataset a, int axis, boolean... ignoreInvalids)
static Object sum(Dataset a, boolean... ignoreInvalids)
static Object typedProduct(Dataset a, int dtype, boolean... ignoreInvalids)
static Dataset typedProduct(Dataset a, int dtype, int axis, boolean... ignoreInvalids)
static Object typedSum(Dataset a, int dtype, boolean... ignoreInvalids)
static Dataset typedSum(Dataset a, int dtype, int axis, boolean... ignoreInvalids)
static double weightedResidual(Dataset a, Dataset b, Dataset w)
The residual is the sum of squared differences
• Constructor Detail

• Method Detail

• quantile

public static double quantile(Dataset a,
double q)
Calculate quantile of dataset which is defined as the inverse of the cumulative distribution function (CDF)
Parameters:
a -
q -
Returns:
point at which CDF has value q
• quantile

public static double[] quantile(Dataset a,
double... values)
Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF)
Parameters:
a -
values -
Returns:
points at which CDF has given values
• quantile

public static Dataset[] quantile(Dataset a,
int axis,
double... values)
Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF)
Parameters:
a -
axis -
values -
Returns:
points at which CDF has given values
• median

public static Dataset median(Dataset a,
int axis)
Parameters:
a - dataset
axis -
Returns:
median
• iqr

public static Object iqr(Dataset a)
Interquartile range: Q3 - Q1
Parameters:
a -
Returns:
range
• iqr

public static Dataset iqr(Dataset a,
int axis)
Interquartile range: Q3 - Q1
Parameters:
a -
axis -
Returns:
range
• typedSum

public static Dataset typedSum(Dataset a,
int dtype,
int axis,
boolean... ignoreInvalids)
Parameters:
a - dataset
dtype -
axis -
ignoreInvalids - see Dataset.max(int, boolean...)
Returns:
typed sum of items along axis in dataset
Since:
2.0
• typedProduct

public static Dataset typedProduct(Dataset a,
int dtype,
int axis,
boolean... ignoreInvalids)
Parameters:
a - dataset
dtype -
axis -
ignoreInvalids - see IDataset.max(boolean...)
Returns:
typed product of items along axis in dataset
Since:
2.0
• residual

public static double residual(Dataset a,
Dataset b)
The residual is the sum of squared differences
Parameters:
a -
b -
Returns:
residual value
• weightedResidual

public static double weightedResidual(Dataset a,
Dataset b,
Dataset w)
The residual is the sum of squared differences
Parameters:
a -
b -
w -
Returns:
residual value
• outlierValues

public static double[] outlierValues(Dataset a,
double lo,
double hi,
int length)
Calculate approximate outlier values. These are defined as the values in the dataset that are approximately below and above the given thresholds - in terms of percentages of dataset size.

It approximates by limiting the number of items (given by length) used internally by data structures - the larger this is, the more accurate will those outlier values become. The actual thresholds used are returned in the array.

Also, the low and high values will be made distinct if possible by adjusting the thresholds

Parameters:
a -
lo - percentage threshold for lower limit
hi - percentage threshold for higher limit
length - maximum number of items used internally, if negative, then unlimited
Returns:
double array with low and high values, and low and high percentage thresholds
• covariance

public static Dataset covariance(Dataset a,
Dataset b,
boolean rowvar,
boolean bias,
Integer ddof)
Calculate the covariance matrix (array) of a concatenated with b. This method is directly based on the implementation in numpy (cov).
Parameters:
a - Array containing multiple variable and observations. Each row represents a variable, each column an observation.
b - An extra set of variables and observations. Must be of same type as a and have a compatible shape.
rowvar - When true (default), each row is a variable; when false each column is a variable.
bias - Default normalisation is (N - 1) - N is number of observations. If set true, normalisation is (N).
ddof - Default normalisation is (N - 1). If ddof is set, then normalisation is (N - ddof).
Returns:
covariance array of a concatenated with b
Since:
2.0