mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
For this change to land in master, the audience="hidden" code review needs to be completed first. Otherwise, the doc build would still work but the audience="hidden" content would be visible rather than hidden as desired. Some work happening in parallel might introduce additional instances of audience="Cloudera". I suggest addressing those in a followup CR so this global change can land quickly. Since the changes apply across so many different files, but are so narrow in scope, I suggest that the way to validate (check that no extraneous changes were introduced accidentally) is to diff just the changed lines: git diff -U0 HEAD^ HEAD In patch set 2, I updated other topics marked audience="Cloudera" by CRs that were pushed in the meantime. Change-Id: Ic93d89da77e1f51bbf548a522d98d0c4e2fb31c8 Reviewed-on: http://gerrit.cloudera.org:8080/5613 Reviewed-by: John Russell <jrussell@cloudera.com> Tested-by: Impala Public Jenkins
148 lines
6.2 KiB
XML
148 lines
6.2 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
Licensed to the Apache Software Foundation (ASF) under one
|
|
or more contributor license agreements. See the NOTICE file
|
|
distributed with this work for additional information
|
|
regarding copyright ownership. The ASF licenses this file
|
|
to you under the Apache License, Version 2.0 (the
|
|
"License"); you may not use this file except in compliance
|
|
with the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
software distributed under the License is distributed on an
|
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
KIND, either express or implied. See the License for the
|
|
specific language governing permissions and limitations
|
|
under the License.
|
|
-->
|
|
<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
|
|
<concept rev="1.4" id="variance">
|
|
|
|
<title>VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, VAR_POP Functions</title>
|
|
<titlealts audience="PDF"><navtitle>VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, VAR_POP</navtitle></titlealts>
|
|
<prolog>
|
|
<metadata>
|
|
<data name="Category" value="Impala"/>
|
|
<data name="Category" value="SQL"/>
|
|
<data name="Category" value="Impala Functions"/>
|
|
<data name="Category" value="Aggregate Functions"/>
|
|
<data name="Category" value="Querying"/>
|
|
<data name="Category" value="Developers"/>
|
|
<data name="Category" value="Data Analysts"/>
|
|
</metadata>
|
|
</prolog>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
<indexterm audience="hidden">variance() function</indexterm>
|
|
<indexterm audience="hidden">variance_samp() function</indexterm>
|
|
<indexterm audience="hidden">variance_pop() function</indexterm>
|
|
<indexterm audience="hidden">var_samp() function</indexterm>
|
|
<indexterm audience="hidden">var_pop() function</indexterm>
|
|
An aggregate function that returns the
|
|
<xref href="http://en.wikipedia.org/wiki/Variance" scope="external" format="html">variance</xref> of a set of
|
|
numbers. This is a mathematical property that signifies how far the values spread apart from the mean. The
|
|
return value can be zero (if the input is a single value, or a set of identical values), or a positive number
|
|
otherwise.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/syntax_blurb"/>
|
|
|
|
<codeblock>{ VARIANCE | VAR[IANCE]_SAMP | VAR[IANCE]_POP } ([DISTINCT | ALL] <varname>expression</varname>)</codeblock>
|
|
|
|
<p>
|
|
This function works with any numeric data type.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/former_odd_return_type_string"/>
|
|
|
|
<p>
|
|
This function is typically used in mathematical formulas related to probability distributions.
|
|
</p>
|
|
|
|
<p>
|
|
The <codeph>VARIANCE_SAMP()</codeph> and <codeph>VARIANCE_POP()</codeph> functions compute the sample
|
|
variance and population variance, respectively, of the input values. (<codeph>VARIANCE()</codeph> is an alias
|
|
for <codeph>VARIANCE_SAMP()</codeph>.) Both functions evaluate all input rows matched by the query. The
|
|
difference is that <codeph>STDDEV_SAMP()</codeph> is scaled by <codeph>1/(N-1)</codeph> while
|
|
<codeph>STDDEV_POP()</codeph> is scaled by <codeph>1/N</codeph>.
|
|
</p>
|
|
|
|
<p rev="2.0.0">
|
|
The functions <codeph>VAR_SAMP()</codeph> and <codeph>VAR_POP()</codeph> are the same as
|
|
<codeph>VARIANCE_SAMP()</codeph> and <codeph>VARIANCE_POP()</codeph>, respectively. These aliases are
|
|
available in Impala 2.0 and later.
|
|
</p>
|
|
|
|
<p>
|
|
If no input rows match the query, the result of any of these functions is <codeph>NULL</codeph>. If a single
|
|
input row matches the query, the result of any of these functions is <codeph>"0.0"</codeph>.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p>
|
|
This example demonstrates how <codeph>VARIANCE()</codeph> and <codeph>VARIANCE_SAMP()</codeph> return the
|
|
same result, while <codeph>VARIANCE_POP()</codeph> uses a slightly different calculation to reflect that the
|
|
input data is considered part of a larger <q>population</q>.
|
|
</p>
|
|
|
|
<codeblock>[localhost:21000] > select variance(score) from test_scores;
|
|
+-----------------+
|
|
| variance(score) |
|
|
+-----------------+
|
|
| 812.25 |
|
|
+-----------------+
|
|
[localhost:21000] > select variance_samp(score) from test_scores;
|
|
+----------------------+
|
|
| variance_samp(score) |
|
|
+----------------------+
|
|
| 812.25 |
|
|
+----------------------+
|
|
[localhost:21000] > select variance_pop(score) from test_scores;
|
|
+---------------------+
|
|
| variance_pop(score) |
|
|
+---------------------+
|
|
| 811.438 |
|
|
+---------------------+
|
|
</codeblock>
|
|
|
|
<p>
|
|
This example demonstrates that, because the return value of these aggregate functions is a
|
|
<codeph>STRING</codeph>, you convert the result with <codeph>CAST</codeph> if you need to do further
|
|
calculations as a numeric value.
|
|
</p>
|
|
|
|
<codeblock>[localhost:21000] > create table score_stats as select cast(stddev(score) as decimal(7,4)) `standard_deviation`, cast(variance(score) as decimal(7,4)) `variance` from test_scores;
|
|
+-------------------+
|
|
| summary |
|
|
+-------------------+
|
|
| Inserted 1 row(s) |
|
|
+-------------------+
|
|
[localhost:21000] > desc score_stats;
|
|
+--------------------+--------------+---------+
|
|
| name | type | comment |
|
|
+--------------------+--------------+---------+
|
|
| standard_deviation | decimal(7,4) | |
|
|
| variance | decimal(7,4) | |
|
|
+--------------------+--------------+---------+
|
|
</codeblock>
|
|
|
|
<p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/related_info"/>
|
|
|
|
<p>
|
|
The <codeph>STDDEV()</codeph>, <codeph>STDDEV_POP()</codeph>, and <codeph>STDDEV_SAMP()</codeph> functions
|
|
compute the standard deviation (square root of the variance) based on the results of
|
|
<codeph>VARIANCE()</codeph>, <codeph>VARIANCE_POP()</codeph>, and <codeph>VARIANCE_SAMP()</codeph>
|
|
respectively. See <xref href="impala_stddev.xml#stddev"/> for details about the standard deviation property.
|
|
</p>
|
|
</conbody>
|
|
</concept>
|