diff --git a/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java b/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java index a9b5832a2..cb70e565f 100644 --- a/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java +++ b/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java @@ -29,6 +29,9 @@ import com.google.common.collect.Lists; * TDescribeTableResult object. */ public class DescribeResultFactory { + // Number of columns in each row of the DESCRIBE FORMATTED result set. + private final static int NUM_DESC_FORMATTED_RESULT_COLS = 3; + public static TDescribeTableResult buildDescribeTableResult(Table table, TDescribeTableOutputStyle outputFormat) { switch (outputFormat) { @@ -64,8 +67,8 @@ public class DescribeResultFactory { /* * Builds a TDescribeTableResult that contains the result of a DESCRIBE FORMATTED * command. For the formatted describe output the goal is to be exactly the - * same as what Hive outputs, for compatibility reasons. To do this, Hive's - * MetadataFormatUtils class is used to build the results. + * same as what Hive (via HiveServer2) outputs, for compatibility reasons. To do this, + * Hive's MetadataFormatUtils class is used to build the results. */ private static TDescribeTableResult describeTableFormatted(Table table) { TDescribeTableResult descResult = new TDescribeTableResult(); @@ -82,10 +85,21 @@ public class DescribeResultFactory { sb.append(MetaDataFormatUtils.getTableInformation(hiveTable)); for (String line: sb.toString().split("\n")) { - TColumnValue descFormattedEntry = new TColumnValue(); - descFormattedEntry.setStringVal(line); - descResult.results.add(new TResultRow(Lists.newArrayList(descFormattedEntry))); + // To match Hive's HiveServer2 output, split each line into multiple column + // values based on the field delimiter. + String[] columns = line.split(MetaDataFormatUtils.FIELD_DELIM); + TResultRow resultRow = new TResultRow(); + for (int i = 0; i < NUM_DESC_FORMATTED_RESULT_COLS; ++i) { + TColumnValue colVal = new TColumnValue(); + colVal.setStringVal(null); + if (columns.length > i) { + // Add the column value. + colVal.setStringVal(columns[i]); + } + resultRow.addToColVals(colVal); + } + descResult.results.add(resultRow); } return descResult; } -} \ No newline at end of file +} diff --git a/fe/src/main/java/com/cloudera/impala/service/Frontend.java b/fe/src/main/java/com/cloudera/impala/service/Frontend.java index b87a7613b..47fdac7b7 100644 --- a/fe/src/main/java/com/cloudera/impala/service/Frontend.java +++ b/fe/src/main/java/com/cloudera/impala/service/Frontend.java @@ -170,17 +170,10 @@ public class Frontend { } else if (analysis.isDescribeStmt()) { ddl.ddl_type = TDdlType.DESCRIBE; ddl.setDescribe_table_params(analysis.getDescribeStmt().toThrift()); - // DESCRIBE FORMATTED commands return all all results in a single column. - if (analysis.getDescribeStmt().getOutputStyle() == - TDescribeTableOutputStyle.FORMATTED) { - metadata.setColumnDescs(Arrays.asList( - new TColumnDesc("describe_formatted", TPrimitiveType.STRING))); - } else { - metadata.setColumnDescs(Arrays.asList( - new TColumnDesc("name", TPrimitiveType.STRING), - new TColumnDesc("type", TPrimitiveType.STRING), - new TColumnDesc("comment", TPrimitiveType.STRING))); - } + metadata.setColumnDescs(Arrays.asList( + new TColumnDesc("name", TPrimitiveType.STRING), + new TColumnDesc("type", TPrimitiveType.STRING), + new TColumnDesc("comment", TPrimitiveType.STRING))); } else if (analysis.isAlterTableStmt()) { ddl.ddl_type = TDdlType.ALTER_TABLE; ddl.setAlter_table_params(analysis.getAlterTableStmt().toThrift()); diff --git a/fe/src/test/resources/mysql-hive-site.xml.template b/fe/src/test/resources/mysql-hive-site.xml.template index f00b88df3..cf8ab2e28 100644 --- a/fe/src/test/resources/mysql-hive-site.xml.template +++ b/fe/src/test/resources/mysql-hive-site.xml.template @@ -104,4 +104,8 @@ dfs.replication3 + + hive.server2.authentication + NOSASL + diff --git a/fe/src/test/resources/postgresql-hive-site.xml.template b/fe/src/test/resources/postgresql-hive-site.xml.template index f2033383c..119e2fa34 100644 --- a/fe/src/test/resources/postgresql-hive-site.xml.template +++ b/fe/src/test/resources/postgresql-hive-site.xml.template @@ -100,4 +100,8 @@ dfs.replication3 + + hive.server2.authentication + NOSASL + diff --git a/testdata/bin/run-hive-server.sh b/testdata/bin/run-hive-server.sh index 340eea7a8..973e30093 100755 --- a/testdata/bin/run-hive-server.sh +++ b/testdata/bin/run-hive-server.sh @@ -1,11 +1,16 @@ #!/bin/bash # Copyright (c) 2012 Cloudera, Inc. All rights reserved. HIVE_SERVER_PORT=10000 +export HIVE_SERVER2_THRIFT_PORT=11050 set -u # Kill for a clean start. $IMPALA_HOME/testdata/bin/kill-hive-server.sh -# Starts hive-server on the specified port +# Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT +# environment variable. +hive --service hiveserver2 & + +# Starts hive-server (1) on the specified port. hive --service hiveserver -p $HIVE_SERVER_PORT & sleep 5 diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index 2373f579f..120c6d56d 100755 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -29,6 +29,7 @@ from tests.util.shell_util import exec_shell_cmd from tests.util.test_file_parser import * from tests.util.thrift_util import create_transport from tests.common.base_test_suite import BaseTestSuite +from tests.common.query_executor import JdbcQueryExecOptions, execute_using_jdbc # Imports required for Hive Metastore Client from hive_metastore import ThriftHiveMetastore @@ -38,6 +39,9 @@ from thrift.protocol import TBinaryProtocol logging.basicConfig(level=logging.INFO, format='%(threadName)s: %(message)s') LOG = logging.getLogger('impala_test_suite') IMPALAD = pytest.config.option.impalad +IMPALAD_HS2_HOST_PORT = pytest.config.option.impalad.split(':')[0] + ":" + \ + pytest.config.option.impalad_hs2_port +HIVE_HS2_HOST_PORT = pytest.config.option.hive_server2 WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR'] # Base class for Impala tests. All impala test cases should inherit from this class @@ -224,17 +228,20 @@ class ImpalaTestSuite(BaseTestSuite): assert len(result.data) <= 1, 'Multiple values returned from scalar' return result.data[0] if len(result.data) == 1 else None + def exec_and_compare_hive_and_impala_hs2(self, stmt): + """Compare Hive and Impala results when executing the same statment over HS2""" + # Run the statement targeting Hive + exec_opts = JdbcQueryExecOptions(iterations=1, impalad=HIVE_HS2_HOST_PORT) + hive_results = execute_using_jdbc(stmt, exec_opts).data - def exec_and_compare_hive_and_impala(self, exec_stmt): - """Executes the same statement in Hive and Impala and compares the results""" - rc, stdout, stderr =\ - exec_shell_cmd("hive -e \"%s\"" % exec_stmt) - assert rc == 0, "stdout: %s\nstderr: %s" % (stdout, stderr) - result = self.client.execute(exec_stmt) + # Run the statement targeting Impala + exec_opts = JdbcQueryExecOptions(iterations=1, impalad=IMPALAD_HS2_HOST_PORT) + impala_results = execute_using_jdbc(stmt, exec_opts).data - # Compare line-by-line (hive results go to stdout). - for impala, hive in zip(result.data, stdout.split('\n')): - assert impala.rstrip() == hive.rstrip() + # Compare the results + assert (impala_results is not None) and (hive_results is not None) + for impala, hive in zip(impala_results, hive_results): + assert impala == hive def __drop_partitions(self, db_name, table_name): """Drops all partitions in the given table""" diff --git a/tests/conftest.py b/tests/conftest.py index ef08bd6a8..a9548f8eb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,9 +20,17 @@ def pytest_addoption(parser): parser.addoption("--impalad", default="localhost:21000", help=\ "The impalad host:port to run tests against.") + parser.addoption("--impalad_hs2_port", default="21050", help=\ + "The impalad HiveServer2 port.") + + # TODO: Migrate test infrastructure to HiveServer2 and remove the need for this + # parameter. parser.addoption("--hive_server", default="localhost:10000", help=\ "The hive server host:port to connect to.") + parser.addoption("--hive_server2", default="localhost:11050", help=\ + "Hive's HiveServer2 host:port to connect to.") + parser.addoption("--update_results", action="store_true", default=False, help=\ "If set, will generate new results for all tests run instead of "\ "verifying the results.") diff --git a/tests/query_test/test_metadata_query_statements.py b/tests/query_test/test_metadata_query_statements.py index 439cd9817..b4979027b 100644 --- a/tests/query_test/test_metadata_query_statements.py +++ b/tests/query_test/test_metadata_query_statements.py @@ -38,9 +38,9 @@ class TestMetadataQueryStatements(ImpalaTestSuite): def test_describe_formatted(self, vector): # Describe a partitioned table. - self.exec_and_compare_hive_and_impala("describe formatted functional.alltypes") + self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes") # Describe an unpartitioned table. - self.exec_and_compare_hive_and_impala("describe formatted tpch.lineitem") + self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem") def test_use_table(self, vector): self.run_test_case('QueryTest/use', vector)