Update DESCRIBE FORMATTED results to match the Hive HS2 output

This commit is contained in:
Lenni Kuff
2013-06-18 16:33:26 -07:00
committed by Henry Robinson
parent 4d89735ec5
commit abdfae5b24
8 changed files with 64 additions and 29 deletions

View File

@@ -29,6 +29,9 @@ import com.google.common.collect.Lists;
* TDescribeTableResult object.
*/
public class DescribeResultFactory {
// Number of columns in each row of the DESCRIBE FORMATTED result set.
private final static int NUM_DESC_FORMATTED_RESULT_COLS = 3;
public static TDescribeTableResult buildDescribeTableResult(Table table,
TDescribeTableOutputStyle outputFormat) {
switch (outputFormat) {
@@ -64,8 +67,8 @@ public class DescribeResultFactory {
/*
* Builds a TDescribeTableResult that contains the result of a DESCRIBE FORMATTED
* <table> command. For the formatted describe output the goal is to be exactly the
* same as what Hive outputs, for compatibility reasons. To do this, Hive's
* MetadataFormatUtils class is used to build the results.
* same as what Hive (via HiveServer2) outputs, for compatibility reasons. To do this,
* Hive's MetadataFormatUtils class is used to build the results.
*/
private static TDescribeTableResult describeTableFormatted(Table table) {
TDescribeTableResult descResult = new TDescribeTableResult();
@@ -82,10 +85,21 @@ public class DescribeResultFactory {
sb.append(MetaDataFormatUtils.getTableInformation(hiveTable));
for (String line: sb.toString().split("\n")) {
TColumnValue descFormattedEntry = new TColumnValue();
descFormattedEntry.setStringVal(line);
descResult.results.add(new TResultRow(Lists.newArrayList(descFormattedEntry)));
// To match Hive's HiveServer2 output, split each line into multiple column
// values based on the field delimiter.
String[] columns = line.split(MetaDataFormatUtils.FIELD_DELIM);
TResultRow resultRow = new TResultRow();
for (int i = 0; i < NUM_DESC_FORMATTED_RESULT_COLS; ++i) {
TColumnValue colVal = new TColumnValue();
colVal.setStringVal(null);
if (columns.length > i) {
// Add the column value.
colVal.setStringVal(columns[i]);
}
resultRow.addToColVals(colVal);
}
descResult.results.add(resultRow);
}
return descResult;
}
}
}

View File

@@ -170,17 +170,10 @@ public class Frontend {
} else if (analysis.isDescribeStmt()) {
ddl.ddl_type = TDdlType.DESCRIBE;
ddl.setDescribe_table_params(analysis.getDescribeStmt().toThrift());
// DESCRIBE FORMATTED commands return all all results in a single column.
if (analysis.getDescribeStmt().getOutputStyle() ==
TDescribeTableOutputStyle.FORMATTED) {
metadata.setColumnDescs(Arrays.asList(
new TColumnDesc("describe_formatted", TPrimitiveType.STRING)));
} else {
metadata.setColumnDescs(Arrays.asList(
new TColumnDesc("name", TPrimitiveType.STRING),
new TColumnDesc("type", TPrimitiveType.STRING),
new TColumnDesc("comment", TPrimitiveType.STRING)));
}
metadata.setColumnDescs(Arrays.asList(
new TColumnDesc("name", TPrimitiveType.STRING),
new TColumnDesc("type", TPrimitiveType.STRING),
new TColumnDesc("comment", TPrimitiveType.STRING)));
} else if (analysis.isAlterTableStmt()) {
ddl.ddl_type = TDdlType.ALTER_TABLE;
ddl.setAlter_table_params(analysis.getAlterTableStmt().toThrift());

View File

@@ -104,4 +104,8 @@
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NOSASL</value>
</property>
</configuration>

View File

@@ -100,4 +100,8 @@
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NOSASL</value>
</property>
</configuration>

View File

@@ -1,11 +1,16 @@
#!/bin/bash
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
HIVE_SERVER_PORT=10000
export HIVE_SERVER2_THRIFT_PORT=11050
set -u
# Kill for a clean start.
$IMPALA_HOME/testdata/bin/kill-hive-server.sh
# Starts hive-server on the specified port
# Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT
# environment variable.
hive --service hiveserver2 &
# Starts hive-server (1) on the specified port.
hive --service hiveserver -p $HIVE_SERVER_PORT &
sleep 5

View File

@@ -29,6 +29,7 @@ from tests.util.shell_util import exec_shell_cmd
from tests.util.test_file_parser import *
from tests.util.thrift_util import create_transport
from tests.common.base_test_suite import BaseTestSuite
from tests.common.query_executor import JdbcQueryExecOptions, execute_using_jdbc
# Imports required for Hive Metastore Client
from hive_metastore import ThriftHiveMetastore
@@ -38,6 +39,9 @@ from thrift.protocol import TBinaryProtocol
logging.basicConfig(level=logging.INFO, format='%(threadName)s: %(message)s')
LOG = logging.getLogger('impala_test_suite')
IMPALAD = pytest.config.option.impalad
IMPALAD_HS2_HOST_PORT = pytest.config.option.impalad.split(':')[0] + ":" + \
pytest.config.option.impalad_hs2_port
HIVE_HS2_HOST_PORT = pytest.config.option.hive_server2
WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
# Base class for Impala tests. All impala test cases should inherit from this class
@@ -224,17 +228,20 @@ class ImpalaTestSuite(BaseTestSuite):
assert len(result.data) <= 1, 'Multiple values returned from scalar'
return result.data[0] if len(result.data) == 1 else None
def exec_and_compare_hive_and_impala_hs2(self, stmt):
"""Compare Hive and Impala results when executing the same statment over HS2"""
# Run the statement targeting Hive
exec_opts = JdbcQueryExecOptions(iterations=1, impalad=HIVE_HS2_HOST_PORT)
hive_results = execute_using_jdbc(stmt, exec_opts).data
def exec_and_compare_hive_and_impala(self, exec_stmt):
"""Executes the same statement in Hive and Impala and compares the results"""
rc, stdout, stderr =\
exec_shell_cmd("hive -e \"%s\"" % exec_stmt)
assert rc == 0, "stdout: %s\nstderr: %s" % (stdout, stderr)
result = self.client.execute(exec_stmt)
# Run the statement targeting Impala
exec_opts = JdbcQueryExecOptions(iterations=1, impalad=IMPALAD_HS2_HOST_PORT)
impala_results = execute_using_jdbc(stmt, exec_opts).data
# Compare line-by-line (hive results go to stdout).
for impala, hive in zip(result.data, stdout.split('\n')):
assert impala.rstrip() == hive.rstrip()
# Compare the results
assert (impala_results is not None) and (hive_results is not None)
for impala, hive in zip(impala_results, hive_results):
assert impala == hive
def __drop_partitions(self, db_name, table_name):
"""Drops all partitions in the given table"""

View File

@@ -20,9 +20,17 @@ def pytest_addoption(parser):
parser.addoption("--impalad", default="localhost:21000", help=\
"The impalad host:port to run tests against.")
parser.addoption("--impalad_hs2_port", default="21050", help=\
"The impalad HiveServer2 port.")
# TODO: Migrate test infrastructure to HiveServer2 and remove the need for this
# parameter.
parser.addoption("--hive_server", default="localhost:10000", help=\
"The hive server host:port to connect to.")
parser.addoption("--hive_server2", default="localhost:11050", help=\
"Hive's HiveServer2 host:port to connect to.")
parser.addoption("--update_results", action="store_true", default=False, help=\
"If set, will generate new results for all tests run instead of "\
"verifying the results.")

View File

@@ -38,9 +38,9 @@ class TestMetadataQueryStatements(ImpalaTestSuite):
def test_describe_formatted(self, vector):
# Describe a partitioned table.
self.exec_and_compare_hive_and_impala("describe formatted functional.alltypes")
self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes")
# Describe an unpartitioned table.
self.exec_and_compare_hive_and_impala("describe formatted tpch.lineitem")
self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem")
def test_use_table(self, vector):
self.run_test_case('QueryTest/use', vector)