From 1249cf8f386828ea6590920da345a334bf226041 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 28 Jul 2006 18:33:04 +0000 Subject: SQL2003-standard statistical aggregates, by Sergey Koposov. I've added only the float8 versions of the aggregates, which is all that the standard requires. Sergey's original patch also provided versions using numeric arithmetic, but given the size and slowness of the code, I doubt we ought to include those in core. --- doc/src/sgml/func.sgml | 405 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 336 insertions(+), 69 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index c9b474e0893..678b1cbeca8 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ - + Functions and Operators @@ -7785,16 +7785,18 @@ SELECT NULLIF(value, '(none)') ... Aggregate functions compute a single result - value from a set of input values. shows the built-in aggregate - functions. The special syntax considerations for aggregate + value from a set of input values. The built-in aggregate functions + are listed in + and + . + The special syntax considerations for aggregate functions are explained in . Consult for additional introductory information. - Aggregate Functions + General-Purpose Aggregate Functions @@ -7897,7 +7899,7 @@ SELECT NULLIF(value, '(none)') ... count(*)bigint - number of input values + number of input rows @@ -7905,7 +7907,7 @@ SELECT NULLIF(value, '(none)') ... any bigint - number of input values for which the value of expression is not null @@ -7948,6 +7950,333 @@ SELECT NULLIF(value, '(none)') ... + + sum(expression) + + smallint, int, + bigint, real, double + precision, numeric, or + interval + + + bigint for smallint or + int arguments, numeric for + bigint arguments, double precision + for floating-point arguments, otherwise the same as the + argument data type + + sum of expression across all input values + + + +
+ + + It should be noted that except for count, + these functions return a null value when no rows are selected. In + particular, sum of no rows returns null, not + zero as one might expect. The coalesce function may be + used to substitute zero for null when necessary. + + + + + ANY + + + SOME + + + Boolean aggregates bool_and and + bool_or correspond to standard SQL aggregates + every and any or + some. + As for any and some, + it seems that there is an ambiguity built into the standard syntax: + +SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...; + + Here ANY can be considered both as leading + to a subquery or as an aggregate if the select expression returns 1 row. + Thus the standard name cannot be given to these aggregates. + + + + + + Users accustomed to working with other SQL database management + systems may be surprised by the performance of the + count aggregate when it is applied to the + entire table. A query like: + +SELECT count(*) FROM sometable; + + will be executed by PostgreSQL using a + sequential scan of the entire table. + + + + + + shows + aggregate functions typically used in statistical analysis. + (These are separated out merely to avoid cluttering the listing + of more-commonly-used aggregates.) Where the description mentions + N, it means the + number of input rows for which all the input expressions are non-null. + In all cases, null is returned if the computation is meaningless, + for example when N is zero. + + + + statistics + + + linear regression + + + + Aggregate Functions for Statistics + + + + + Function + Argument Type + Return Type + Description + + + + + + + + + correlation + + corr(Y, X) + + + double precision + + + double precision + + sqrt((N * + sum(X*Y) - sum(X) * sum(Y))^2 / ((N * sum(X^2) - sum(X)^2) * (N * sum(Y^2) - sum(Y)^2))) + + + + + + covariance + population + + covar_pop(Y, X) + + + double precision + + + double precision + + (sum(X*Y) - sum(X) * sum(Y) / N) / N + + + + + + covariance + sample + + covar_samp(Y, X) + + + double precision + + + double precision + + (sum(X*Y) - sum(X) * sum(Y) / N) / (N - 1) + + + + + regr_avgx(Y, X) + + + double precision + + + double precision + + sum(X) / + N + + + + + regr_avgy(Y, X) + + + double precision + + + double precision + + sum(Y) / + N + + + + + regr_count(Y, X) + + + double precision + + + bigint + + number of input rows in which both expressions are non-null + + + + + + regression intercept + + regr_intercept(Y, X) + + + double precision + + + double precision + + (sum(Y) * + sum(X^2) - sum(X) * sum(X*Y)) / (N * sum(X^2) - sum(X)^2) + + + + + regr_r2(Y, X) + + + double precision + + + double precision + + (N * + sum(X*Y) - sum(X) * sum(Y))^2 / ((N * sum(X^2) - sum(X)^2) * (N * sum(Y^2) - sum(Y)^2)) + + + + + + regression slope + + regr_slope(Y, X) + + + double precision + + + double precision + + (N * + sum(X*Y) - sum(X) * sum(Y)) / (N * sum(X^2) - sum(X)^2) + + + + + regr_sxx(Y, X) + + + double precision + + + double precision + + sum(X^2) - + sum(X)^2 / N + + + + + regr_sxy(Y, X) + + + double precision + + + double precision + + sum(X*Y) - sum(X) * sum(Y) / N + + + + + regr_syy(Y, X) + + + double precision + + + double precision + + sum(Y^2) - + sum(Y)^2 / N + + @@ -8007,24 +8336,6 @@ SELECT NULLIF(value, '(none)') ... sample standard deviation of the input values - - sum(expression) - - smallint, int, - bigint, real, double - precision, numeric, or - interval - - - bigint for smallint or - int arguments, numeric for - bigint arguments, double precision - for floating-point arguments, otherwise the same as the - argument data type - - sum of expression across all input values - - @@ -8087,50 +8398,6 @@ SELECT NULLIF(value, '(none)') ...
- - It should be noted that except for count, - these functions return a null value when no rows are selected. In - particular, sum of no rows returns null, not - zero as one might expect. The coalesce function may be - used to substitute zero for null when necessary. - - - - - ANY - - - SOME - - - Boolean aggregates bool_and and - bool_or correspond to standard SQL aggregates - every and any or - some. - As for any and some, - it seems that there is an ambiguity built into the standard syntax: - -SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...; - - Here ANY can be considered both as leading - to a subquery or as an aggregate if the select expression returns 1 row. - Thus the standard name cannot be given to these aggregates. - - - - - - Users accustomed to working with other SQL database management - systems may be surprised by the performance of the - count aggregate when it is applied to the - entire table. A query like: - -SELECT count(*) FROM sometable; - - will be executed by PostgreSQL using a - sequential scan of the entire table. - - -- cgit v1.2.3