Update DESCRIBE FORMATTED results to match the Hive HS2 output

2026-01-05 21:00:54 -05:00 · 2013-06-18 16:33:26 -07:00
parent 4d89735ec5
commit abdfae5b24
8 changed files with 64 additions and 29 deletions
--- a/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java
+++ b/fe/src/main/java/com/cloudera/impala/service/DescribeResultFactory.java
@@ -29,6 +29,9 @@ import com.google.common.collect.Lists;
 * TDescribeTableResult object.
 */
 public class DescribeResultFactory {
+  // Number of columns in each row of the DESCRIBE FORMATTED result set.
+  private final static int NUM_DESC_FORMATTED_RESULT_COLS = 3;
+
  public static TDescribeTableResult buildDescribeTableResult(Table table,
      TDescribeTableOutputStyle outputFormat) {
    switch (outputFormat) {
@@ -64,8 +67,8 @@ public class DescribeResultFactory {
  /*
   * Builds a TDescribeTableResult that contains the result of a DESCRIBE FORMATTED
   * <table> command. For the formatted describe output the goal is to be exactly the
-   * same as what Hive outputs, for compatibility reasons. To do this, Hive's
-   * MetadataFormatUtils class is used to build the results.
+   * same as what Hive (via HiveServer2) outputs, for compatibility reasons. To do this,
+   * Hive's MetadataFormatUtils class is used to build the results.
   */
  private static TDescribeTableResult describeTableFormatted(Table table) {
    TDescribeTableResult descResult = new TDescribeTableResult();
@@ -82,10 +85,21 @@ public class DescribeResultFactory {
    sb.append(MetaDataFormatUtils.getTableInformation(hiveTable));

    for (String line: sb.toString().split("\n")) {
-      TColumnValue descFormattedEntry = new TColumnValue();
-      descFormattedEntry.setStringVal(line);
-      descResult.results.add(new TResultRow(Lists.newArrayList(descFormattedEntry)));
+      // To match Hive's HiveServer2 output, split each line into multiple column
+      // values based on the field delimiter.
+      String[] columns = line.split(MetaDataFormatUtils.FIELD_DELIM);
+      TResultRow resultRow = new TResultRow();
+      for (int i = 0; i < NUM_DESC_FORMATTED_RESULT_COLS; ++i) {
+        TColumnValue colVal = new TColumnValue();
+        colVal.setStringVal(null);
+        if (columns.length > i) {
+          // Add the column value.
+          colVal.setStringVal(columns[i]);
+        }
+        resultRow.addToColVals(colVal);
+      }
+      descResult.results.add(resultRow);
    }
    return descResult;
  }
-}
+}
--- a/fe/src/main/java/com/cloudera/impala/service/Frontend.java
+++ b/fe/src/main/java/com/cloudera/impala/service/Frontend.java
@@ -170,17 +170,10 @@ public class Frontend {
    } else if (analysis.isDescribeStmt()) {
      ddl.ddl_type = TDdlType.DESCRIBE;
      ddl.setDescribe_table_params(analysis.getDescribeStmt().toThrift());
-      // DESCRIBE FORMATTED commands return all all results in a single column.
-      if (analysis.getDescribeStmt().getOutputStyle() ==
-          TDescribeTableOutputStyle.FORMATTED) {
-        metadata.setColumnDescs(Arrays.asList(
-            new TColumnDesc("describe_formatted", TPrimitiveType.STRING)));
-      } else {
-        metadata.setColumnDescs(Arrays.asList(
-            new TColumnDesc("name", TPrimitiveType.STRING),
-            new TColumnDesc("type", TPrimitiveType.STRING),
-            new TColumnDesc("comment", TPrimitiveType.STRING)));
-      }
+      metadata.setColumnDescs(Arrays.asList(
+          new TColumnDesc("name", TPrimitiveType.STRING),
+          new TColumnDesc("type", TPrimitiveType.STRING),
+          new TColumnDesc("comment", TPrimitiveType.STRING)));
    } else if (analysis.isAlterTableStmt()) {
      ddl.ddl_type = TDdlType.ALTER_TABLE;
      ddl.setAlter_table_params(analysis.getAlterTableStmt().toThrift());
--- a/fe/src/test/resources/mysql-hive-site.xml.template
+++ b/fe/src/test/resources/mysql-hive-site.xml.template
@@ -104,4 +104,8 @@
  <name>dfs.replication</name>
  <value>3</value>
 </property>
+<property>
+  <name>hive.server2.authentication</name>
+  <value>NOSASL</value>
+</property>
 </configuration>
--- a/fe/src/test/resources/postgresql-hive-site.xml.template
+++ b/fe/src/test/resources/postgresql-hive-site.xml.template
@@ -100,4 +100,8 @@
  <name>dfs.replication</name>
  <value>3</value>
 </property>
+<property>
+  <name>hive.server2.authentication</name>
+  <value>NOSASL</value>
+</property>
 </configuration>
--- a/testdata/bin/run-hive-server.sh
+++ b/testdata/bin/run-hive-server.sh
@@ -1,11 +1,16 @@
 #!/bin/bash
 # Copyright (c) 2012 Cloudera, Inc. All rights reserved.
 HIVE_SERVER_PORT=10000
+export HIVE_SERVER2_THRIFT_PORT=11050
 set -u

 # Kill for a clean start.
 $IMPALA_HOME/testdata/bin/kill-hive-server.sh

-# Starts hive-server on the specified port
+# Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT
+# environment variable.
+hive --service hiveserver2 &
+
+# Starts hive-server (1) on the specified port.
 hive --service hiveserver -p $HIVE_SERVER_PORT &
 sleep 5
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -29,6 +29,7 @@ from tests.util.shell_util import exec_shell_cmd
 from tests.util.test_file_parser import *
 from tests.util.thrift_util import create_transport
 from tests.common.base_test_suite import BaseTestSuite
+from tests.common.query_executor import JdbcQueryExecOptions, execute_using_jdbc

 # Imports required for Hive Metastore Client
 from hive_metastore import ThriftHiveMetastore
@@ -38,6 +39,9 @@ from thrift.protocol import TBinaryProtocol
 logging.basicConfig(level=logging.INFO, format='%(threadName)s: %(message)s')
 LOG = logging.getLogger('impala_test_suite')
 IMPALAD = pytest.config.option.impalad
+IMPALAD_HS2_HOST_PORT = pytest.config.option.impalad.split(':')[0] + ":" + \
+    pytest.config.option.impalad_hs2_port
+HIVE_HS2_HOST_PORT = pytest.config.option.hive_server2
 WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']

 # Base class for Impala tests. All impala test cases should inherit from this class
@@ -224,17 +228,20 @@ class ImpalaTestSuite(BaseTestSuite):
    assert len(result.data) <= 1, 'Multiple values returned from scalar'
    return result.data[0] if len(result.data) == 1 else None

+  def exec_and_compare_hive_and_impala_hs2(self, stmt):
+    """Compare Hive and Impala results when executing the same statment over HS2"""
+    # Run the statement targeting Hive
+    exec_opts = JdbcQueryExecOptions(iterations=1, impalad=HIVE_HS2_HOST_PORT)
+    hive_results = execute_using_jdbc(stmt, exec_opts).data

-  def exec_and_compare_hive_and_impala(self, exec_stmt):
-    """Executes the same statement in Hive and Impala and compares the results"""
-    rc, stdout, stderr =\
-        exec_shell_cmd("hive -e \"%s\"" % exec_stmt)
-    assert rc == 0, "stdout: %s\nstderr: %s" % (stdout, stderr)
-    result = self.client.execute(exec_stmt)
+    # Run the statement targeting Impala
+    exec_opts = JdbcQueryExecOptions(iterations=1, impalad=IMPALAD_HS2_HOST_PORT)
+    impala_results = execute_using_jdbc(stmt, exec_opts).data

-    # Compare line-by-line (hive results go to stdout).
-    for impala, hive in zip(result.data, stdout.split('\n')):
-      assert impala.rstrip() == hive.rstrip()
+    # Compare the results
+    assert (impala_results is not None) and (hive_results is not None)
+    for impala, hive in zip(impala_results, hive_results):
+      assert impala == hive

  def __drop_partitions(self, db_name, table_name):
    """Drops all partitions in the given table"""
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -20,9 +20,17 @@ def pytest_addoption(parser):
  parser.addoption("--impalad", default="localhost:21000", help=\
                   "The impalad host:port to run tests against.")

+  parser.addoption("--impalad_hs2_port", default="21050", help=\
+                   "The impalad HiveServer2 port.")
+
+  # TODO: Migrate test infrastructure to HiveServer2 and remove the need for this
+  # parameter.
  parser.addoption("--hive_server", default="localhost:10000", help=\
                   "The hive server host:port to connect to.")

+  parser.addoption("--hive_server2", default="localhost:11050", help=\
+                   "Hive's HiveServer2 host:port to connect to.")
+
  parser.addoption("--update_results", action="store_true", default=False, help=\
                   "If set, will generate new results for all tests run instead of "\
                   "verifying the results.")
--- a/tests/query_test/test_metadata_query_statements.py
+++ b/tests/query_test/test_metadata_query_statements.py
@@ -38,9 +38,9 @@ class TestMetadataQueryStatements(ImpalaTestSuite):

  def test_describe_formatted(self, vector):
    # Describe a partitioned table.
-    self.exec_and_compare_hive_and_impala("describe formatted functional.alltypes")
+    self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes")
    # Describe an unpartitioned table.
-    self.exec_and_compare_hive_and_impala("describe formatted tpch.lineitem")
+    self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem")

  def test_use_table(self, vector):
    self.run_test_case('QueryTest/use', vector)