mirror of
https://github.com/apache/impala.git
synced 2026-01-05 21:00:54 -05:00
Update DESCRIBE FORMATTED results to match the Hive HS2 output
This commit is contained in:
committed by
Henry Robinson
parent
4d89735ec5
commit
abdfae5b24
@@ -29,6 +29,9 @@ import com.google.common.collect.Lists;
|
||||
* TDescribeTableResult object.
|
||||
*/
|
||||
public class DescribeResultFactory {
|
||||
// Number of columns in each row of the DESCRIBE FORMATTED result set.
|
||||
private final static int NUM_DESC_FORMATTED_RESULT_COLS = 3;
|
||||
|
||||
public static TDescribeTableResult buildDescribeTableResult(Table table,
|
||||
TDescribeTableOutputStyle outputFormat) {
|
||||
switch (outputFormat) {
|
||||
@@ -64,8 +67,8 @@ public class DescribeResultFactory {
|
||||
/*
|
||||
* Builds a TDescribeTableResult that contains the result of a DESCRIBE FORMATTED
|
||||
* <table> command. For the formatted describe output the goal is to be exactly the
|
||||
* same as what Hive outputs, for compatibility reasons. To do this, Hive's
|
||||
* MetadataFormatUtils class is used to build the results.
|
||||
* same as what Hive (via HiveServer2) outputs, for compatibility reasons. To do this,
|
||||
* Hive's MetadataFormatUtils class is used to build the results.
|
||||
*/
|
||||
private static TDescribeTableResult describeTableFormatted(Table table) {
|
||||
TDescribeTableResult descResult = new TDescribeTableResult();
|
||||
@@ -82,10 +85,21 @@ public class DescribeResultFactory {
|
||||
sb.append(MetaDataFormatUtils.getTableInformation(hiveTable));
|
||||
|
||||
for (String line: sb.toString().split("\n")) {
|
||||
TColumnValue descFormattedEntry = new TColumnValue();
|
||||
descFormattedEntry.setStringVal(line);
|
||||
descResult.results.add(new TResultRow(Lists.newArrayList(descFormattedEntry)));
|
||||
// To match Hive's HiveServer2 output, split each line into multiple column
|
||||
// values based on the field delimiter.
|
||||
String[] columns = line.split(MetaDataFormatUtils.FIELD_DELIM);
|
||||
TResultRow resultRow = new TResultRow();
|
||||
for (int i = 0; i < NUM_DESC_FORMATTED_RESULT_COLS; ++i) {
|
||||
TColumnValue colVal = new TColumnValue();
|
||||
colVal.setStringVal(null);
|
||||
if (columns.length > i) {
|
||||
// Add the column value.
|
||||
colVal.setStringVal(columns[i]);
|
||||
}
|
||||
resultRow.addToColVals(colVal);
|
||||
}
|
||||
descResult.results.add(resultRow);
|
||||
}
|
||||
return descResult;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,17 +170,10 @@ public class Frontend {
|
||||
} else if (analysis.isDescribeStmt()) {
|
||||
ddl.ddl_type = TDdlType.DESCRIBE;
|
||||
ddl.setDescribe_table_params(analysis.getDescribeStmt().toThrift());
|
||||
// DESCRIBE FORMATTED commands return all all results in a single column.
|
||||
if (analysis.getDescribeStmt().getOutputStyle() ==
|
||||
TDescribeTableOutputStyle.FORMATTED) {
|
||||
metadata.setColumnDescs(Arrays.asList(
|
||||
new TColumnDesc("describe_formatted", TPrimitiveType.STRING)));
|
||||
} else {
|
||||
metadata.setColumnDescs(Arrays.asList(
|
||||
new TColumnDesc("name", TPrimitiveType.STRING),
|
||||
new TColumnDesc("type", TPrimitiveType.STRING),
|
||||
new TColumnDesc("comment", TPrimitiveType.STRING)));
|
||||
}
|
||||
metadata.setColumnDescs(Arrays.asList(
|
||||
new TColumnDesc("name", TPrimitiveType.STRING),
|
||||
new TColumnDesc("type", TPrimitiveType.STRING),
|
||||
new TColumnDesc("comment", TPrimitiveType.STRING)));
|
||||
} else if (analysis.isAlterTableStmt()) {
|
||||
ddl.ddl_type = TDdlType.ALTER_TABLE;
|
||||
ddl.setAlter_table_params(analysis.getAlterTableStmt().toThrift());
|
||||
|
||||
@@ -104,4 +104,8 @@
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.server2.authentication</name>
|
||||
<value>NOSASL</value>
|
||||
</property>
|
||||
</configuration>
|
||||
|
||||
@@ -100,4 +100,8 @@
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.server2.authentication</name>
|
||||
<value>NOSASL</value>
|
||||
</property>
|
||||
</configuration>
|
||||
|
||||
7
testdata/bin/run-hive-server.sh
vendored
7
testdata/bin/run-hive-server.sh
vendored
@@ -1,11 +1,16 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
||||
HIVE_SERVER_PORT=10000
|
||||
export HIVE_SERVER2_THRIFT_PORT=11050
|
||||
set -u
|
||||
|
||||
# Kill for a clean start.
|
||||
$IMPALA_HOME/testdata/bin/kill-hive-server.sh
|
||||
|
||||
# Starts hive-server on the specified port
|
||||
# Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT
|
||||
# environment variable.
|
||||
hive --service hiveserver2 &
|
||||
|
||||
# Starts hive-server (1) on the specified port.
|
||||
hive --service hiveserver -p $HIVE_SERVER_PORT &
|
||||
sleep 5
|
||||
|
||||
@@ -29,6 +29,7 @@ from tests.util.shell_util import exec_shell_cmd
|
||||
from tests.util.test_file_parser import *
|
||||
from tests.util.thrift_util import create_transport
|
||||
from tests.common.base_test_suite import BaseTestSuite
|
||||
from tests.common.query_executor import JdbcQueryExecOptions, execute_using_jdbc
|
||||
|
||||
# Imports required for Hive Metastore Client
|
||||
from hive_metastore import ThriftHiveMetastore
|
||||
@@ -38,6 +39,9 @@ from thrift.protocol import TBinaryProtocol
|
||||
logging.basicConfig(level=logging.INFO, format='%(threadName)s: %(message)s')
|
||||
LOG = logging.getLogger('impala_test_suite')
|
||||
IMPALAD = pytest.config.option.impalad
|
||||
IMPALAD_HS2_HOST_PORT = pytest.config.option.impalad.split(':')[0] + ":" + \
|
||||
pytest.config.option.impalad_hs2_port
|
||||
HIVE_HS2_HOST_PORT = pytest.config.option.hive_server2
|
||||
WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
|
||||
|
||||
# Base class for Impala tests. All impala test cases should inherit from this class
|
||||
@@ -224,17 +228,20 @@ class ImpalaTestSuite(BaseTestSuite):
|
||||
assert len(result.data) <= 1, 'Multiple values returned from scalar'
|
||||
return result.data[0] if len(result.data) == 1 else None
|
||||
|
||||
def exec_and_compare_hive_and_impala_hs2(self, stmt):
|
||||
"""Compare Hive and Impala results when executing the same statment over HS2"""
|
||||
# Run the statement targeting Hive
|
||||
exec_opts = JdbcQueryExecOptions(iterations=1, impalad=HIVE_HS2_HOST_PORT)
|
||||
hive_results = execute_using_jdbc(stmt, exec_opts).data
|
||||
|
||||
def exec_and_compare_hive_and_impala(self, exec_stmt):
|
||||
"""Executes the same statement in Hive and Impala and compares the results"""
|
||||
rc, stdout, stderr =\
|
||||
exec_shell_cmd("hive -e \"%s\"" % exec_stmt)
|
||||
assert rc == 0, "stdout: %s\nstderr: %s" % (stdout, stderr)
|
||||
result = self.client.execute(exec_stmt)
|
||||
# Run the statement targeting Impala
|
||||
exec_opts = JdbcQueryExecOptions(iterations=1, impalad=IMPALAD_HS2_HOST_PORT)
|
||||
impala_results = execute_using_jdbc(stmt, exec_opts).data
|
||||
|
||||
# Compare line-by-line (hive results go to stdout).
|
||||
for impala, hive in zip(result.data, stdout.split('\n')):
|
||||
assert impala.rstrip() == hive.rstrip()
|
||||
# Compare the results
|
||||
assert (impala_results is not None) and (hive_results is not None)
|
||||
for impala, hive in zip(impala_results, hive_results):
|
||||
assert impala == hive
|
||||
|
||||
def __drop_partitions(self, db_name, table_name):
|
||||
"""Drops all partitions in the given table"""
|
||||
|
||||
@@ -20,9 +20,17 @@ def pytest_addoption(parser):
|
||||
parser.addoption("--impalad", default="localhost:21000", help=\
|
||||
"The impalad host:port to run tests against.")
|
||||
|
||||
parser.addoption("--impalad_hs2_port", default="21050", help=\
|
||||
"The impalad HiveServer2 port.")
|
||||
|
||||
# TODO: Migrate test infrastructure to HiveServer2 and remove the need for this
|
||||
# parameter.
|
||||
parser.addoption("--hive_server", default="localhost:10000", help=\
|
||||
"The hive server host:port to connect to.")
|
||||
|
||||
parser.addoption("--hive_server2", default="localhost:11050", help=\
|
||||
"Hive's HiveServer2 host:port to connect to.")
|
||||
|
||||
parser.addoption("--update_results", action="store_true", default=False, help=\
|
||||
"If set, will generate new results for all tests run instead of "\
|
||||
"verifying the results.")
|
||||
|
||||
@@ -38,9 +38,9 @@ class TestMetadataQueryStatements(ImpalaTestSuite):
|
||||
|
||||
def test_describe_formatted(self, vector):
|
||||
# Describe a partitioned table.
|
||||
self.exec_and_compare_hive_and_impala("describe formatted functional.alltypes")
|
||||
self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes")
|
||||
# Describe an unpartitioned table.
|
||||
self.exec_and_compare_hive_and_impala("describe formatted tpch.lineitem")
|
||||
self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem")
|
||||
|
||||
def test_use_table(self, vector):
|
||||
self.run_test_case('QueryTest/use', vector)
|
||||
|
||||
Reference in New Issue
Block a user