From 461a48df2bfcfc324fac27c1f937993fd108bbc0 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skye@cloudera.com>
Date: Wed, 27 Feb 2013 18:26:31 -0800
Subject: [PATCH] Refactor testing framework to generate Avro tables.

---
 bin/load-data.py                              |  32 +-
 testdata/bin/generate-schema-statements.py    | 219 ++++++--
 testdata/bin/generate-test-vectors.py         |   4 +-
 .../functional/functional_schema_template.sql | 526 ++++++++----------
 .../datasets/tpcds/tpcds_schema_template.sql  | 452 +++++++--------
 .../datasets/tpch/tpch_schema_template.sql    | 340 +++++------
 .../functional-query_core.csv                 |   2 +
 .../functional-query_dimensions.csv           |   2 +-
 .../functional-query_exhaustive.csv           |   2 +
 .../functional-query_pairwise.csv             |   7 +-
 .../hive-benchmark_dimensions.csv             |   2 +-
 .../hive-benchmark_exhaustive.csv             |   2 +
 .../hive-benchmark_pairwise.csv               |   7 +-
 .../targeted-perf/targeted-perf_core.csv      |   2 +
 .../targeted-stress/targeted-stress_core.csv  |   2 +
 testdata/workloads/tpcds/tpcds_dimensions.csv |   2 +-
 testdata/workloads/tpcds/tpcds_exhaustive.csv |   2 +
 testdata/workloads/tpcds/tpcds_pairwise.csv   |   7 +-
 testdata/workloads/tpch/tpch_core.csv         |   2 +
 tests/common/impala_test_suite.py             |   3 +-
 tests/common/test_result_verifier.py          |  25 +-
 tests/query_test/test_aggregation.py          |   4 +
 tests/query_test/test_queries.py              |   5 +
 23 files changed, 850 insertions(+), 801 deletions(-)

diff --git a/bin/load-data.py b/bin/load-data.py
index d7efa513a..b658a2b98 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -19,6 +19,7 @@ parser.add_option("-e", "--exploration_strategy", dest="exploration_strategy",
                   help="The exploration strategy for schema gen: 'core', "\
                   "'pairwise', or 'exhaustive'")
 parser.add_option("--hive_warehouse_dir", dest="hive_warehouse_dir",
+                  default="/test-warehouse",
                   help="The HDFS path to the base Hive test warehouse directory")
 parser.add_option("-w", "--workloads", dest="workloads",
                   help="Comma-separated list of workloads to load data for. If 'all' is "\
@@ -38,19 +39,21 @@ parser.add_option("--table_names", dest="table_names", default=None,
 parser.add_option("--table_formats", dest="table_formats", default=None,
                   help="Override the test vectors and load using the specified table "\
                   "formats. Ex. --table_formats=seq/snap/block,text/none")
-
+parser.add_option("--hdfs_namenode", dest="hdfs_namenode", default="localhost:20500",
+                  help="HDFS name node for Avro schema URLs, default localhost:20500")
 (options, args) = parser.parse_args()
 
 WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
 DATASET_DIR = os.environ['IMPALA_DATASET_DIR']
 TESTDATA_BIN_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin')
+AVRO_SCHEMA_DIR = "avro_schemas"
 
 GENERATE_SCHEMA_CMD = "generate-schema-statements.py --exploration_strategy=%s "\
                       "--workload=%s --scale_factor=%s --verbose"
 HIVE_CMD = os.path.join(os.environ['HIVE_HOME'], 'bin/hive')
 HIVE_ARGS = "-hiveconf hive.root.logger=WARN,console -v"
-
 IMPALA_SHELL_CMD = os.path.join(os.environ['IMPALA_HOME'], 'bin/impala-shell.sh')
+HADOOP_CMD = os.path.join(os.environ['HADOOP_HOME'], 'bin/hadoop')
 
 def available_workloads(workload_dir):
   return [subdir for subdir in os.listdir(workload_dir)
@@ -99,6 +102,8 @@ def generate_schema_statements(workload):
     generate_cmd += " --table_formats=%s" % options.table_formats
   if options.hive_warehouse_dir is not None:
     generate_cmd += " --hive_warehouse_dir=%s" % options.hive_warehouse_dir
+  if options.hdfs_namenode is not None:
+    generate_cmd += " --hdfs_namenode=%s" % options.hdfs_namenode
   print 'Executing Generate Schema Command: ' + generate_cmd
   ret_val = subprocess.call(os.path.join(TESTDATA_BIN_DIR, generate_cmd), shell = True)
   if ret_val != 0:
@@ -119,6 +124,15 @@ def get_dataset_for_workload(workload):
       print 'Dimension file does not contain dataset for workload \'%s\'' % (workload)
       sys.exit(1)
 
+def copy_avro_schemas_to_hdfs(schemas_dir):
+  """Recursively copies all of schemas_dir to the test warehouse."""
+  cmd = "%s fs -put -f %s /" % (HADOOP_CMD, schemas_dir)
+  print "Executing HDFS copy command: " + cmd
+  ret_val = subprocess.call(cmd, shell=True)
+  if ret_val != 0:
+    print "Error copying Avro schemas to HDFS, exiting"
+    sys.exit(ret_val)
+
 if __name__ == "__main__":
   all_workloads = available_workloads(WORKLOAD_DIR)
   workloads = []
@@ -142,8 +156,18 @@ if __name__ == "__main__":
     dataset_dir = os.path.join(DATASET_DIR, dataset)
     os.chdir(dataset_dir)
     generate_schema_statements(workload)
-    exec_hive_query_from_file(os.path.join(dataset_dir,
-       'load-%s-%s-generated.sql' % (workload, options.exploration_strategy)))
+    # We load Avro tables separately due to bugs in the Avro SerDe.
+    # generate-schema-statements.py separates the avro statements into a
+    # separate file to get around this.
+    # See https://issues.apache.org/jira/browse/HIVE-4195.
+    generated_file = 'load-%s-%s-generated.sql' % (workload, options.exploration_strategy)
+    if os.path.exists(generated_file):
+      exec_hive_query_from_file(os.path.join(dataset_dir, generated_file))
+    generated_avro_file = \
+        'load-%s-%s-avro-generated.sql' % (workload, options.exploration_strategy)
+    if os.path.exists(generated_avro_file):
+      copy_avro_schemas_to_hdfs(AVRO_SCHEMA_DIR)
+      exec_hive_query_from_file(os.path.join(dataset_dir, generated_avro_file))
     loading_time_map[workload] = time.time() - start_time
 
   total_time = 0.0
diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py
index 993a5ed5b..6be097e72 100755
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -25,6 +25,7 @@
 import collections
 import csv
 import math
+import json
 import os
 import random
 import subprocess
@@ -58,6 +59,8 @@ parser.add_option("--table_names", dest="table_names", default=None,
 parser.add_option("--table_formats", dest="table_formats", default=None,
                   help="Override the test vectors and load using the specified table "\
                   "formats. Ex. --table_formats=seq/snap/block,text/none")
+parser.add_option("--hdfs_namenode", dest="hdfs_namenode", default="localhost:20500",
+                  help="HDFS name node for Avro schema URLs, default localhost:20500")
 (options, args) = parser.parse_args()
 
 if options.workload is None:
@@ -67,10 +70,12 @@ if options.workload is None:
 
 WORKLOAD_DIR = os.environ['IMPALA_HOME'] + '/testdata/workloads'
 DATASET_DIR = os.environ['IMPALA_HOME'] + '/testdata/datasets'
+AVRO_SCHEMA_DIR = "avro_schemas"
 
 COMPRESSION_TYPE = "SET mapred.output.compression.type=%s;"
 COMPRESSION_ENABLED = "SET hive.exec.compress.output=%s;"
 COMPRESSION_CODEC = "SET mapred.output.compression.codec=%s;"
+AVRO_COMPRESSION_CODEC = "SET avro.output.codec=%s;"
 SET_DYNAMIC_PARTITION_STATEMENT = "SET hive.exec.dynamic.partition=true;"
 SET_PARTITION_MODE_NONSTRICT_STATEMENT = "SET hive.exec.dynamic.partition.mode=nonstrict;"
 SET_HIVE_INPUT_FORMAT = "SET mapred.max.split.size=256000000;\n"\
@@ -88,17 +93,42 @@ COMPRESSION_MAP = {'def': 'org.apache.hadoop.io.compress.DefaultCodec',
                    'none': ''
                   }
 
-FILE_FORMAT_MAP = {'text': 'TEXTFILE',
-                   'seq': 'SEQUENCEFILE',
-                   'rc': 'RCFILE',
-                   'parquet': '\n' +
-                     'INPUTFORMAT \'com.cloudera.impala.hive.serde.ParquetInputFormat\'\n' +
-                     'OUTPUTFORMAT \'com.cloudera.impala.hive.serde.ParquetOutputFormat\'',
-                   'text_lzo': '\n' +
-                     'INPUTFORMAT \'com.hadoop.mapred.DeprecatedLzoTextInputFormat\'\n' +
-                     'OUTPUTFORMAT ' +
-                     '\'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\'\n'
-                  }
+AVRO_COMPRESSION_MAP = {
+  'def': 'deflate',
+  'snap': 'snappy',
+  'none': '',
+  }
+
+FILE_FORMAT_MAP = {
+  'text': 'TEXTFILE',
+  'seq': 'SEQUENCEFILE',
+  'rc': 'RCFILE',
+  'parquet':
+    "\nINPUTFORMAT 'com.cloudera.impala.hive.serde.ParquetInputFormat'" +
+    "\nOUTPUTFORMAT 'com.cloudera.impala.hive.serde.ParquetOutputFormat'",
+  'text_lzo':
+    "\nINPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'" +
+    "\nOUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'",
+  'avro':
+    "\nINPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'" +
+    "\nOUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'"
+  }
+
+HIVE_TO_AVRO_TYPE_MAP = {
+  'STRING': 'string',
+  'INT': 'int',
+  'TINYINT': 'int',
+  'SMALLINT': 'int',
+  'BIGINT': 'long',
+  'BOOLEAN': 'boolean',
+  'FLOAT': 'float',
+  'DOUBLE': 'double',
+  # Avro has no timestamp type, so convert to string
+  # TODO: this allows us to create our Avro test tables, but any tests that use
+  # a timestamp column will fail. We probably want to convert back to timestamps
+  # in our tests.
+  'TIMESTAMP': 'string',
+  }
 
 PARQUET_ALTER_STATEMENT = "ALTER TABLE %(table_name)s SET\n\
      SERDEPROPERTIES ('blocksize' = '1073741824', 'compression' = '%(compression)s');"
@@ -117,12 +147,66 @@ def build_create_statement(table_template, table_name, db_name, db_suffix,
                                             hdfs_location=hdfs_location)
   return create_statement
 
-def build_compression_codec_statement(codec, compression_type):
-  compression_codec = COMPRESSION_MAP[codec]
-  if compression_codec:
-    return COMPRESSION_TYPE % compression_type.upper() + '\n' +\
-           COMPRESSION_CODEC % compression_codec
-  return ''
+def build_table_template(file_format, columns, partition_columns, row_format,
+                         avro_schema_dir):
+  partitioned_by = str()
+  if partition_columns:
+    partitioned_by = 'PARTITIONED BY (%s)' % \
+      ', '.join(partition_columns.split('\n'))
+
+  row_format_stmt = str()
+  if row_format:
+    row_format_stmt = 'ROW FORMAT ' + row_format
+
+  tblproperties = str()
+  if file_format == 'avro':
+    tblproperties = "TBLPROPERTIES ('avro.schema.url'=" \
+        "'hdfs://%s/%s/{table_name}.json')" \
+        % (options.hdfs_namenode, avro_schema_dir)
+    # Override specified row format
+    row_format_stmt = "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'"
+
+  # Note: columns are ignored but allowed if a custom serde is specified
+  # (e.g. Avro)
+  return """
+CREATE EXTERNAL TABLE {{db_name}}{{db_suffix}}.{{table_name}} (
+{columns})
+{partitioned_by}
+{row_format}
+STORED AS {{file_format}}
+LOCATION '{hive_warehouse_dir}/{{hdfs_location}}'
+{tblproperties};
+""".format(
+    row_format=row_format_stmt,
+    columns=',\n'.join(columns.split('\n')),
+    partitioned_by=partitioned_by,
+    hive_warehouse_dir=options.hive_warehouse_dir,
+    tblproperties=tblproperties
+    ).strip()
+
+def avro_schema(columns):
+  record = {
+    "name": "a", # doesn't matter
+    "type": "record",
+    "fields": list()
+    }
+  for column_spec in columns.strip().split('\n'):
+    # column_spec looks something like "col_name col_type COMMENT comment"
+    # (comment may be omitted, we don't use it)
+    name = column_spec.split()[0]
+    type = column_spec.split()[1]
+    assert type.upper() in HIVE_TO_AVRO_TYPE_MAP, "Cannot convert to Avro type: %s" % type
+    record["fields"].append(
+      {'name': name,
+       'type': [HIVE_TO_AVRO_TYPE_MAP[type.upper()], "null"]}) # all columns nullable
+  return json.dumps(record)
+
+def build_compression_codec_statement(codec, compression_type, file_format):
+  codec = AVRO_COMPRESSION_MAP[codec] if file_format == 'avro' else COMPRESSION_MAP[codec]
+  if not codec:
+    return str()
+  return (AVRO_COMPRESSION_CODEC % codec) if file_format == 'avro' else (
+    COMPRESSION_TYPE % compression_type.upper() + '\n' + COMPRESSION_CODEC % codec)
 
 def build_codec_enabled_statement(codec):
   compression_enabled = 'false' if codec == 'none' else 'true'
@@ -138,9 +222,10 @@ def build_insert_into_statement(insert, db_name, db_suffix, table_name, file_for
 
   statement = SET_PARTITION_MODE_NONSTRICT_STATEMENT + "\n"
   statement += SET_DYNAMIC_PARTITION_STATEMENT + "\n"
-  # For some reason (hive bug?) we need to have the CombineHiveInputFormat set for cases
-  # where we are compressing in bzip on certain tables that have multiple files.
-  if 'bzip' in db_suffix and 'multi' in table_name:
+  # For some reason (hive bug?) we need to have the CombineHiveInputFormat set
+  # for cases where we are compressing in bzip or lzo on certain tables that
+  # have multiple files.
+  if 'multi' in table_name and ('bzip' in db_suffix or 'lzo' in db_suffix):
     statement += SET_HIVE_INPUT_FORMAT % "CombineHiveInputFormat"
   else:
     statement += SET_HIVE_INPUT_FORMAT % "HiveInputFormat"
@@ -149,7 +234,7 @@ def build_insert_into_statement(insert, db_name, db_suffix, table_name, file_for
 def build_insert(insert, db_name, db_suffix, file_format,
                  codec, compression_type, table_name):
   output = build_codec_enabled_statement(codec) + "\n"
-  output += build_compression_codec_statement(codec, compression_type) + "\n"
+  output += build_compression_codec_statement(codec, compression_type, file_format) + "\n"
   output += build_insert_into_statement(insert, db_name, db_suffix,
                                         table_name, file_format) + "\n"
   return output
@@ -180,15 +265,6 @@ def build_db_suffix(file_format, codec, compression_type):
   else:
     return '_%s_%s' % (file_format, codec)
 
-def write_parquet_to_file(file_name, array):
-  # Strip out all the hive SET statements
-  array.insert(0, 'refresh;\n')
-  write_array_to_file(file_name, 'w', array)
-
-def write_array_to_file(file_name, mode, array):
-  with open(file_name, mode) as f:
-    f.write('\n\n'.join(array))
-
 # Does a hdfs directory listing and returns array with all the subdir names.
 def get_hdfs_subdirs_with_data(path):
   tmp_file = tempfile.TemporaryFile("w+")
@@ -201,13 +277,34 @@ def get_hdfs_subdirs_with_data(path):
   # So to get subdirectory names just return everything after the last '/'
   return [line[line.rfind('/') + 1:].strip() for line in tmp_file.readlines()]
 
+class Statements(object):
+  """Simple container object for storing SQL statements to be output to a
+  file. Useful for ordering the statements correctly."""
+  def __init__(self):
+    self.create = list()
+    self.load = list()
+    self.load_base = list()
+
+  def write_to_file(self, filename):
+    # Only write to file if there's something to actually write
+    if self.create or self.load_base or self.load:
+      # Make sure we create the base tables first
+      output = self.create + self.load_base + self.load
+      with open(filename, 'w') as f:
+        f.write('\n\n'.join(output))
+
 def generate_statements(output_name, test_vectors, sections,
                         schema_include_constraints, schema_exclude_constraints):
-  output_stats = [SET_HIVE_INPUT_FORMAT % "HiveInputFormat"]
-  output_create = []
-  output_load = []
-  output_load_base = []
-  output_parquet = []
+  # The Avro SerDe causes strange problems with other unrelated tables (e.g.,
+  # Avro files will be written to LZO-compressed text tables). We generate
+  # separate schema statement files for Avro tables so we can invoke Hive
+  # completely separately for them.
+  # See https://issues.apache.org/jira/browse/HIVE-4195.
+  avro_output = Statements()
+  # Parquet statements to be executed separately by Impala
+  parquet_output = Statements()
+  default_output = Statements()
+
   table_names = None
   if options.table_names:
     table_names = [name.lower() for name in options.table_names.split(',')]
@@ -216,6 +313,7 @@ def generate_statements(output_name, test_vectors, sections,
     file_format, data_set, codec, compression_type =\
         [row.file_format, row.dataset, row.compression_codec, row.compression_type]
     table_format = '%s/%s/%s' % (file_format, codec, compression_type)
+    output = default_output if 'avro' not in table_format else avro_output
 
     for section in sections:
       alter = section.get('ALTER')
@@ -223,6 +321,9 @@ def generate_statements(output_name, test_vectors, sections,
       insert = section['DEPENDENT_LOAD']
       load_local = section['LOAD']
       base_table_name = section['BASE_TABLE_NAME']
+      columns = section['COLUMNS']
+      partition_columns = section['PARTITION_COLUMNS']
+      row_format = section['ROW_FORMAT']
       table_name = base_table_name
       db_suffix = build_db_suffix(file_format, codec, compression_type)
       db_name = '{0}{1}'.format(data_set, options.scale_factor)
@@ -253,14 +354,36 @@ def generate_statements(output_name, test_vectors, sections,
         print 'Skipping \'%s\' due to exclude constraint match' % table_name
         continue
 
-      output_create.append(build_create_statement(create, table_name, db_name, db_suffix,
-                                                  file_format, codec, hdfs_location))
+      # If a CREATE section is provided, use that. Otherwise a COLUMNS section
+      # must be provided (and optionally PARTITION_COLUMNS and ROW_FORMAT
+      # sections), which is used to generate the create table statement.
+      if create:
+        table_template = create
+        if file_format == 'avro':
+          # We don't know how to generalize CREATE sections to Avro.
+          print "CREATE section not supported with Avro, skipping: '%s'" % table_name
+          continue
+      else:
+        assert columns, "No CREATE or COLUMNS section defined for table " + table_name
+        avro_schema_dir = "%s/%s" % (AVRO_SCHEMA_DIR, data_set)
+        table_template = build_table_template(
+          file_format, columns, partition_columns, row_format, avro_schema_dir)
+        # Write Avro schema to local file
+        if not os.path.exists(avro_schema_dir):
+          os.makedirs(avro_schema_dir)
+        with open("%s/%s.json" % (avro_schema_dir, table_name),"w") as f:
+          f.write(avro_schema(columns))
+
+      output.create.append(
+        build_create_statement(table_template, table_name, db_name, db_suffix,
+                               file_format, codec, hdfs_location))
+
       # The ALTER statement in hive does not accept fully qualified table names.
       # We need the use statement.
       if alter:
         use_table = 'USE {db_name}{db_suffix};\n'.format(db_name=db_name,
                                                          db_suffix=db_suffix)
-        output_create.append(use_table + alter.format(table_name=table_name))
+        output.create.append(use_table + alter.format(table_name=table_name))
 
       # If the directory already exists in HDFS, assume that data files already exist
       # and skip loading the data. Otherwise, the data is generated using either an
@@ -271,35 +394,35 @@ def generate_statements(output_name, test_vectors, sections,
         print 'HDFS path:', data_path, 'does not exists or is empty. Data will be loaded.'
         if not db_suffix:
           if load_local:
-            output_load_base.append(build_load_statement(load_local, db_name,
+            output.load_base.append(build_load_statement(load_local, db_name,
                                                          db_suffix, table_name))
           else:
             print 'Empty base table load for %s. Skipping load generation' % table_name
         elif file_format == 'parquet':
           if insert:
-            # In most cases the same load logic can be used for the parquet and 
+            # In most cases the same load logic can be used for the parquet and
             # non-parquet case, but sometimes it needs to be special cased.
             insert = insert if 'LOAD_PARQUET' not in section else section['LOAD_PARQUET']
-            output_parquet.append(build_insert_into_statement(
+            parquet_output.load.append(build_insert_into_statement(
                 insert, db_name, db_suffix, table_name, 'parquet', for_impala=True))
           else:
             print \
                 'Empty parquet load for table %s. Skipping insert generation' % table_name
         else:
           if insert:
-            output_load.append(build_insert(insert, db_name, db_suffix, file_format,
+            output.load.append(build_insert(insert, db_name, db_suffix, file_format,
                                             codec, compression_type, table_name))
           else:
               print 'Empty insert for table %s. Skipping insert generation' % table_name
 
-  # Make sure we create the base tables first
-  output_load = output_create + output_load_base + output_load
-  write_array_to_file('load-' + output_name + '-generated.sql', 'w', output_load)
-  write_parquet_to_file('load-parquet-' + output_name + '-generated.sql', output_parquet);
+  avro_output.write_to_file('load-' + output_name + '-avro-generated.sql')
+  parquet_output.write_to_file('load-' + output_name + '-parquet-generated.sql')
+  default_output.write_to_file('load-' + output_name + '-generated.sql')
 
 def parse_schema_template_file(file_name):
-  VALID_SECTION_NAMES = ['DATASET', 'BASE_TABLE_NAME', 'CREATE', 'DEPENDENT_LOAD',
-                         'LOAD', 'ALTER', 'LOAD_PARQUET']
+  VALID_SECTION_NAMES = ['DATASET', 'BASE_TABLE_NAME', 'COLUMNS', 'PARTITION_COLUMNS',
+                         'ROW_FORMAT', 'CREATE', 'DEPENDENT_LOAD', 'LOAD', 'ALTER',
+                         'LOAD_PARQUET']
   return parse_test_file(file_name, VALID_SECTION_NAMES, skip_unknown_sections=False)
 
 if __name__ == "__main__":
diff --git a/testdata/bin/generate-test-vectors.py b/testdata/bin/generate-test-vectors.py
index a82acb9b7..945d3b3c1 100755
--- a/testdata/bin/generate-test-vectors.py
+++ b/testdata/bin/generate-test-vectors.py
@@ -82,7 +82,9 @@ def is_valid_combination(vector):
         (vector[COMPRESSION_IDX] != 'none' and vector[COMPRESSION_TYPE_IDX] == 'none') or
         (vector[FILE_FORMAT_IDX] != 'seq' and vector[COMPRESSION_TYPE_IDX] == 'record') or
         (vector[FILE_FORMAT_IDX] == 'parquet' and
-        (vector[COMPRESSION_IDX] == 'gzip' or vector[COMPRESSION_IDX] == 'bzip')))
+        (vector[COMPRESSION_IDX] == 'gzip' or vector[COMPRESSION_IDX] == 'bzip')) or
+        (vector[FILE_FORMAT_IDX] == 'avro' and
+         vector[COMPRESSION_IDX] not in ['none', 'snap', 'def']))
 
   # The pairwise generator may call this with different vector lengths. In that case this
   # should always return true.
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index bfc7781c3..7a478e5e7 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3,23 +3,23 @@
 functional
 ---- BASE_TABLE_NAME
 alltypes
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id int COMMENT 'Add a comment',
-  bool_col boolean,
-  tinyint_col tinyint,
-  smallint_col smallint,
-  int_col int,
-  bigint_col bigint,
-  float_col float,
-  double_col double,
-  date_string_col string,
-  string_col string,
-  timestamp_col timestamp)
-partitioned by (year int, month int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- PARTITION_COLUMNS
+year int
+month int
+---- COLUMNS
+id int COMMENT 'Add a comment'
+bool_col boolean
+tinyint_col tinyint
+smallint_col smallint
+int_col int
+bigint_col bigint
+float_col float
+double_col double
+date_string_col string
+string_col string
+timestamp_col timestamp
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- ALTER
 ALTER TABLE {table_name} ADD PARTITION(year=2009, month=1);
 ALTER TABLE {table_name} ADD PARTITION(year=2009, month=2);
@@ -81,44 +81,42 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/target/AllTypes/101201.txt' OVERW
 functional
 ---- BASE_TABLE_NAME
 alltypesnopart
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id int,
-  bool_col boolean,
-  tinyint_col tinyint,
-  smallint_col smallint,
-  int_col int,
-  bigint_col bigint,
-  float_col float,
-  double_col double,
-  date_string_col string,
-  string_col string,
-  timestamp_col timestamp)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+id int
+bool_col boolean
+tinyint_col tinyint
+smallint_col smallint
+int_col int
+bigint_col bigint
+float_col float
+double_col double
+date_string_col string
+string_col string
+timestamp_col timestamp
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ====
 ---- DATASET
 functional
 ---- BASE_TABLE_NAME
 alltypessmall
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id int,
-  bool_col boolean,
-  tinyint_col tinyint,
-  smallint_col smallint,
-  int_col int,
-  bigint_col bigint,
-  float_col float,
-  double_col double,
-  date_string_col string,
-  string_col string,
-  timestamp_col timestamp)
-partitioned by (year int, month int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- PARTITION_COLUMNS
+year int
+month int
+---- COLUMNS
+id int
+bool_col boolean
+tinyint_col tinyint
+smallint_col smallint
+int_col int
+bigint_col bigint
+float_col float
+double_col double
+date_string_col string
+string_col string
+timestamp_col timestamp
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- ALTER
 ALTER TABLE {table_name} ADD PARTITION(year=2009, month=1);
 ALTER TABLE {table_name} ADD PARTITION(year=2009, month=2);
@@ -138,23 +136,23 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/target/AllTypesSmall/090401.txt'
 functional
 ---- BASE_TABLE_NAME
 alltypestiny
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id int,
-  bool_col boolean,
-  tinyint_col tinyint,
-  smallint_col smallint,
-  int_col int,
-  bigint_col bigint,
-  float_col float,
-  double_col double,
-  date_string_col string,
-  string_col string,
-  timestamp_col timestamp)
-partitioned by (year int, month int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- PARTITION_COLUMNS
+year int
+month int
+---- COLUMNS
+id int
+bool_col boolean
+tinyint_col tinyint
+smallint_col smallint
+int_col int
+bigint_col bigint
+float_col float
+double_col double
+date_string_col string
+string_col string
+timestamp_col timestamp
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- ALTER
 ALTER TABLE {table_name} ADD PARTITION(year=2009, month=1);
 ALTER TABLE {table_name} ADD PARTITION(year=2009, month=2);
@@ -387,23 +385,24 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/AllTypesErrorNoNulls/0903.txt' OV
 functional
 ---- BASE_TABLE_NAME
 alltypesagg
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id int,
-  bool_col boolean,
-  tinyint_col tinyint,
-  smallint_col smallint,
-  int_col int,
-  bigint_col bigint,
-  float_col float,
-  double_col double,
-  date_string_col string,
-  string_col string,
-  timestamp_col timestamp)
-partitioned by (year int, month int, day int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- PARTITION_COLUMNS
+year int
+month int
+day int
+---- COLUMNS
+id int
+bool_col boolean
+tinyint_col tinyint
+smallint_col smallint
+int_col int
+bigint_col bigint
+float_col float
+double_col double
+date_string_col string
+string_col string
+timestamp_col timestamp
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- ALTER
 ALTER TABLE {table_name} ADD PARTITION(year=2010, month=1, day=1);
 ALTER TABLE {table_name} ADD PARTITION(year=2010, month=1, day=2);
@@ -435,23 +434,24 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/target/AllTypesAgg/100110.txt' OV
 functional
 ---- BASE_TABLE_NAME
 alltypesaggnonulls
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id int,
-  bool_col boolean,
-  tinyint_col tinyint,
-  smallint_col smallint,
-  int_col int,
-  bigint_col bigint,
-  float_col float,
-  double_col double,
-  date_string_col string,
-  string_col string,
-  timestamp_col timestamp)
-partitioned by (year int, month int, day int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- PARTITION_COLUMNS
+year int
+month int
+day int
+---- COLUMNS
+id int
+bool_col boolean
+tinyint_col tinyint
+smallint_col smallint
+int_col int
+bigint_col bigint
+float_col float
+double_col double
+date_string_col string
+string_col string
+timestamp_col timestamp
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- ALTER
 ALTER TABLE {table_name} ADD PARTITION(year=2010, month=1, day=1);
 ALTER TABLE {table_name} ADD PARTITION(year=2010, month=1, day=2);
@@ -483,28 +483,23 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/target/AllTypesAggNoNulls/100110.
 functional
 ---- BASE_TABLE_NAME
 testtbl
----- CREATE
--- testtbl is empty
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id bigint,
-  name string,
-  zip int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+id bigint
+name string
+zip int
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ====
 ---- DATASET
 functional
 ---- BASE_TABLE_NAME
 dimtbl
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id bigint,
-  name string,
-  zip int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+id bigint
+name string
+zip int
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -514,15 +509,13 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/DimTbl/data.csv' OVERWRITE INTO T
 functional
 ---- BASE_TABLE_NAME
 jointbl
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  test_id bigint,
-  test_name string,
-  test_zip int,
-  alltypes_id int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+test_id bigint
+test_name string
+test_zip int
+alltypes_id int
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -532,16 +525,14 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/JoinTbl/data.csv' OVERWRITE INTO
 functional
 ---- BASE_TABLE_NAME
 liketbl
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  str_col string,
-  match_like_col string,
-  no_match_like_col string,
-  match_regex_col string,
-  no_match_regex_col string)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+str_col string
+match_like_col string
+no_match_like_col string
+match_regex_col string
+no_match_regex_col string
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -679,15 +670,13 @@ TBLPROPERTIES("hbase.table.name" = "hbasealltypesagg");
 functional
 ---- BASE_TABLE_NAME
 escapenoquotes
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  col1 string,
-  col2 string,
-  col3 int,
-  col4 int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+col1 string
+col2 string
+col3 int
+col4 int
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -697,17 +686,15 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/data/escape-no-quotes.txt' OVERWR
 functional
 ---- BASE_TABLE_NAME
 overflow
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  tinyint_col tinyint,
-  smallint_col smallint,
-  int_col int,
-  bigint_col bigint,
-  float_col float,
-  double_col double)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+tinyint_col tinyint
+smallint_col smallint
+int_col int
+bigint_col bigint
+float_col float
+double_col double
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -717,11 +704,8 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/data/overflow.txt' OVERWRITE INTO
 functional
 ---- BASE_TABLE_NAME
 greptiny
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  field string)
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+field string
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -731,14 +715,12 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/hive_benchmark/grepTiny/part-0000
 functional
 ---- BASE_TABLE_NAME
 rankingssmall
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  pageRank int,
-  pageURL string,
-  avgDuration int)
-row format delimited fields terminated by '|'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+pageRank int
+pageURL string
+avgDuration int
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -748,20 +730,18 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/hive_benchmark/htmlTiny/Rankings.
 functional
 ---- BASE_TABLE_NAME
 uservisitssmall
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  sourceIP string,
-  destURL string,
-  visitDate string,
-  adRevenue float,
-  userAgent string,
-  cCode string,
-  lCode string,
-  sKeyword string,
-  avgTimeOnSite int)
-row format delimited fields terminated by '|'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+sourceIP string
+destURL string
+visitDate string
+adRevenue float
+userAgent string
+cCode string
+lCode string
+sKeyword string
+avgTimeOnSite int
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -771,31 +751,31 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/hive_benchmark/htmlTiny/UserVisit
 functional
 ---- BASE_TABLE_NAME
 emptytable
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  field string)
-partitioned by (f2 int);
+---- PARTITION_COLUMNS
+f2 int
+---- COLUMNS
+field string
 ====
 ---- DATASET
 functional
 ---- BASE_TABLE_NAME
 alltypesaggmultifiles
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id int,
-  bool_col boolean,
-  tinyint_col tinyint,
-  smallint_col smallint,
-  int_col int,
-  bigint_col bigint,
-  float_col float,
-  double_col double,
-  date_string_col string,
-  string_col string,
-  timestamp_col timestamp)
-partitioned by (year int, month int, day int)
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- PARTITION_COLUMNS
+year int
+month int
+day int
+---- COLUMNS
+id int
+bool_col boolean
+tinyint_col tinyint
+smallint_col smallint
+int_col int
+bigint_col bigint
+float_col float
+double_col double
+date_string_col string
+string_col string
+timestamp_col timestamp
 ---- ALTER
 ALTER TABLE {table_name} ADD PARTITION(year=2010, month=1, day=1);
 ALTER TABLE {table_name} ADD PARTITION(year=2010, month=1, day=2);
@@ -824,22 +804,18 @@ insert into table {db_name}{db_suffix}.{table_name} partition (year, month, day)
 functional
 ---- BASE_TABLE_NAME
 alltypesaggmultifilesnopart
----- CREATE
-DROP TABLE if EXISTS {db_name}{db_suffix}.{table_name};
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id int,
-  bool_col boolean,
-  tinyint_col tinyint,
-  smallint_col smallint,
-  int_col int,
-  bigint_col bigint,
-  float_col float,
-  double_col double,
-  date_string_col string,
-  string_col string,
-  timestamp_col timestamp)
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+id int
+bool_col boolean
+tinyint_col tinyint
+smallint_col smallint
+int_col int
+bigint_col bigint
+float_col float
+double_col double
+date_string_col string
+string_col string
+timestamp_col timestamp
 ---- DEPENDENT_LOAD
 insert overwrite table {db_name}{db_suffix}.{table_name} SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col FROM {db_name}.{table_name} where id % 4 = 0;
 insert into table {db_name}{db_suffix}.{table_name} SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col FROM {db_name}.{table_name} where id % 4 = 1;
@@ -858,13 +834,10 @@ insert into table {db_name}{db_suffix}.{table_name} SELECT id, bool_col, tinyint
 functional
 ---- BASE_TABLE_NAME
 stringpartitionkey
----- CREATE
--- Regression for IMP-163, failure to load tables partitioned by string column
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id int)
-PARTITIONED BY (string_col string)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- PARTITION_COLUMNS
+string_col string
+---- COLUMNS
+id int
 ---- ALTER
 ALTER TABLE {table_name} ADD PARTITION (string_col = "partition1");
 ====
@@ -872,13 +845,11 @@ ALTER TABLE {table_name} ADD PARTITION (string_col = "partition1");
 functional
 ---- BASE_TABLE_NAME
 tinytable
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  a string,
-  b string)
-row format delimited fields terminated by ','
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+a string
+b string
+---- ROW_FORMAT
+delimited fields terminated by ','
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -888,12 +859,10 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/TinyTable/data.csv' OVERWRITE INT
 functional
 ---- BASE_TABLE_NAME
 tinyinttable
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  int_col int)
-row format delimited fields terminated by ','
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+int_col int
+---- ROW_FORMAT
+delimited fields terminated by ','
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -903,15 +872,14 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/TinyIntTable/data.csv' OVERWRITE
 functional
 ---- BASE_TABLE_NAME
 nulltable
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  a string,
-  b string,
-  c string,
-  d int,
-  e double)
-row format delimited fields terminated by ','
-stored as {file_format};
+---- COLUMNS
+a string
+b string
+c string
+d int
+e double
+---- ROW_FORMAT
+delimited fields terminated by ','
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} select 'a', '', NULL, NULL, NULL from {db_name}.alltypes limit 1;
 ---- LOAD
@@ -921,15 +889,14 @@ INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} select 'a', '', NULL, N
 functional
 ---- BASE_TABLE_NAME
 nullescapedtable
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  a string,
-  b string,
-  c string,
-  d int,
-  e double)
-row format delimited fields terminated by ',' escaped by '\\'
-stored as {file_format};
+---- COLUMNS
+a string
+b string
+c string
+d int
+e double
+---- ROW_FORMAT
+delimited fields terminated by ',' escaped by '\\'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} select 'a', '', NULL, NULL, NULL from {db_name}.alltypes limit 1;
 ---- LOAD
@@ -939,13 +906,12 @@ INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} select 'a', '', NULL, N
 functional
 ---- BASE_TABLE_NAME
 escapechartesttable
----- CREATE
--- Create a test data with the escape character as the same as the tuple delimiter
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (bool_col boolean)
-partitioned by (id int)
-row format delimited fields terminated by ',' escaped by '\n'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- PARTITION_COLUMNS
+id int
+---- COLUMNS
+bool_col boolean
+---- ROW_FORMAT
+delimited fields terminated by ',' escaped by '\n'
 ---- ALTER
 ALTER TABLE {table_name} ADD PARTITION(id=0);
 ALTER TABLE {table_name} ADD PARTITION(id=1);
@@ -971,13 +937,11 @@ select bool_col,id FROM {db_name}.alltypesagg where id < 10;
 functional
 ---- BASE_TABLE_NAME
 TblWithRaggedColumns
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  str_col string,
-  int_col int)
-row format delimited fields terminated by ','  escaped by '\\'
-stored as {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+str_col string
+int_col int
+---- ROW_FORMAT
+delimited fields terminated by ','  escaped by '\\'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -1011,16 +975,14 @@ LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
 functional
 ---- BASE_TABLE_NAME
 zipcode_incomes
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-  id STRING,
-  zip STRING,
-  description1 STRING,
-  description2 STRING,
-  income int)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+id STRING
+zip STRING
+description1 STRING
+description2 STRING
+income int
+---- ROW_FORMAT
+DELIMITED FIELDS TERMINATED BY ','
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
diff --git a/testdata/datasets/tpcds/tpcds_schema_template.sql b/testdata/datasets/tpcds/tpcds_schema_template.sql
index c714ab2fc..84545269f 100644
--- a/testdata/datasets/tpcds/tpcds_schema_template.sql
+++ b/testdata/datasets/tpcds/tpcds_schema_template.sql
@@ -5,36 +5,32 @@
 tpcds
 ---- BASE_TABLE_NAME
 store_sales
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    ss_sold_date_sk           int,
-    ss_sold_time_sk           int,
-    ss_item_sk                int,
-    ss_customer_sk            int,
-    ss_cdemo_sk               int,
-    ss_hdemo_sk               int,
-    ss_addr_sk                int,
-    ss_store_sk               int,
-    ss_promo_sk               int,
-    ss_ticket_number          int,
-    ss_quantity               int,
-    ss_wholesale_cost         float,
-    ss_list_price             float,
-    ss_sales_price            float,
-    ss_ext_discount_amt       float,
-    ss_ext_sales_price        float,
-    ss_ext_wholesale_cost     float,
-    ss_ext_list_price         float,
-    ss_ext_tax                float,
-    ss_coupon_amt             float,
-    ss_net_paid               float,
-    ss_net_paid_inc_tax       float,
-    ss_net_profit             float
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+ss_sold_date_sk           int
+ss_sold_time_sk           int
+ss_item_sk                int
+ss_customer_sk            int
+ss_cdemo_sk               int
+ss_hdemo_sk               int
+ss_addr_sk                int
+ss_store_sk               int
+ss_promo_sk               int
+ss_ticket_number          int
+ss_quantity               int
+ss_wholesale_cost         float
+ss_list_price             float
+ss_sales_price            float
+ss_ext_discount_amt       float
+ss_ext_sales_price        float
+ss_ext_wholesale_cost     float
+ss_ext_list_price         float
+ss_ext_tax                float
+ss_coupon_amt             float
+ss_net_paid               float
+ss_net_paid_inc_tax       float
+ss_net_profit             float
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -45,22 +41,18 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpcds
 ---- BASE_TABLE_NAME
 customer_demographics
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    cd_demo_sk                int,
-    cd_gender                 string,
-    cd_marital_status         string,
-    cd_education_status       string,
-    cd_purchase_estimate      int,
-    cd_credit_rating          string,
-    cd_dep_count              int,
-    cd_dep_employed_count     int,
-    cd_dep_college_count      int
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+cd_demo_sk                int
+cd_gender                 string
+cd_marital_status         string
+cd_education_status       string
+cd_purchase_estimate      int
+cd_credit_rating          string
+cd_dep_count              int
+cd_dep_employed_count     int
+cd_dep_college_count      int
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -71,41 +63,37 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpcds
 ---- BASE_TABLE_NAME
 date_dim
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    d_date_sk                 int,
-    d_date_id                 string,
-    d_date                    string,
-    d_month_seq               int,
-    d_week_seq                int,
-    d_quarter_seq             int,
-    d_year                    int,
-    d_dow                     int,
-    d_moy                     int,
-    d_dom                     int,
-    d_qoy                     int,
-    d_fy_year                 int,
-    d_fy_quarter_seq          int,
-    d_fy_week_seq             int,
-    d_day_name                string,
-    d_quarter_name            string,
-    d_holiday                 string,
-    d_weekend                 string,
-    d_following_holiday       string,
-    d_first_dom               int,
-    d_last_dom                int,
-    d_same_day_ly             int,
-    d_same_day_lq             int,
-    d_current_day             string,
-    d_current_week            string,
-    d_current_month           string,
-    d_current_quarter         string,
-    d_current_year            string
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+d_date_sk                 int
+d_date_id                 string
+d_date                    string
+d_month_seq               int
+d_week_seq                int
+d_quarter_seq             int
+d_year                    int
+d_dow                     int
+d_moy                     int
+d_dom                     int
+d_qoy                     int
+d_fy_year                 int
+d_fy_quarter_seq          int
+d_fy_week_seq             int
+d_day_name                string
+d_quarter_name            string
+d_holiday                 string
+d_weekend                 string
+d_following_holiday       string
+d_first_dom               int
+d_last_dom                int
+d_same_day_ly             int
+d_same_day_lq             int
+d_current_day             string
+d_current_week            string
+d_current_month           string
+d_current_quarter         string
+d_current_year            string
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -116,23 +104,19 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpcds
 ---- BASE_TABLE_NAME
 time_dim
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    t_time_sk                 int,
-    t_time_id                 string,
-    t_time                    int,
-    t_hour                    int,
-    t_minute                  int,
-    t_second                  int,
-    t_am_pm                   string,
-    t_shift                   string,
-    t_sub_shift               string,
-    t_meal_time               string
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+t_time_sk                 int
+t_time_id                 string
+t_time                    int
+t_hour                    int
+t_minute                  int
+t_second                  int
+t_am_pm                   string
+t_shift                   string
+t_sub_shift               string
+t_meal_time               string
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -143,35 +127,31 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpcds
 ---- BASE_TABLE_NAME
 item
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    i_item_sk                 int,
-    i_item_id                 string,
-    i_rec_start_date          string,
-    i_rec_end_date            string,
-    i_item_desc               string,
-    i_current_price           float,
-    i_wholesale_cost          float,
-    i_brand_id                int,
-    i_brand                   string,
-    i_class_id                int,
-    i_class                   string,
-    i_category_id             int,
-    i_category                string,
-    i_manufact_id             int,
-    i_manufact                string,
-    i_size                    string,
-    i_formulation             string,
-    i_color                   string,
-    i_units                   string,
-    i_container               string,
-    i_manager_id              int,
-    i_product_name            string
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+i_item_sk                 int
+i_item_id                 string
+i_rec_start_date          string
+i_rec_end_date            string
+i_item_desc               string
+i_current_price           float
+i_wholesale_cost          float
+i_brand_id                int
+i_brand                   string
+i_class_id                int
+i_class                   string
+i_category_id             int
+i_category                string
+i_manufact_id             int
+i_manufact                string
+i_size                    string
+i_formulation             string
+i_color                   string
+i_units                   string
+i_container               string
+i_manager_id              int
+i_product_name            string
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -182,42 +162,38 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpcds
 ---- BASE_TABLE_NAME
 store
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    s_store_sk                int,
-    s_store_id                string,
-    s_rec_start_date          string,
-    s_rec_end_date            string,
-    s_closed_date_sk          int,
-    s_store_name              string,
-    s_number_employees        int,
-    s_floor_space             int,
-    s_hours                   string,
-    s_manager                 string,
-    s_market_id               int,
-    s_geography_class         string,
-    s_market_desc             string,
-    s_market_manager          string,
-    s_division_id             int,
-    s_division_name           string,
-    s_company_id              int,
-    s_company_name            string,
-    s_street_number           string,
-    s_street_name             string,
-    s_street_type             string,
-    s_suite_number            string,
-    s_city                    string,
-    s_county                  string,
-    s_state                   string,
-    s_zip                     string,
-    s_country                 string,
-    s_gmt_offset              float,
-    s_tax_precentage          float
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+s_store_sk                int
+s_store_id                string
+s_rec_start_date          string
+s_rec_end_date            string
+s_closed_date_sk          int
+s_store_name              string
+s_number_employees        int
+s_floor_space             int
+s_hours                   string
+s_manager                 string
+s_market_id               int
+s_geography_class         string
+s_market_desc             string
+s_market_manager          string
+s_division_id             int
+s_division_name           string
+s_company_id              int
+s_company_name            string
+s_street_number           string
+s_street_name             string
+s_street_type             string
+s_suite_number            string
+s_city                    string
+s_county                  string
+s_state                   string
+s_zip                     string
+s_country                 string
+s_gmt_offset              float
+s_tax_precentage          float
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -228,31 +204,27 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpcds
 ---- BASE_TABLE_NAME
 customer
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    c_customer_sk             int,
-    c_customer_id             string,
-    c_current_cdemo_sk        int,
-    c_current_hdemo_sk        int,
-    c_current_addr_sk         int,
-    c_first_shipto_date_sk    int,
-    c_first_sales_date_sk     int,
-    c_salutation              string,
-    c_first_name              string,
-    c_last_name               string,
-    c_preferred_cust_flag     string,
-    c_birth_day               int,
-    c_birth_month             int,
-    c_birth_year              int,
-    c_birth_country           string,
-    c_login                   string,
-    c_email_address           string,
-    c_last_review_date        string
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+c_customer_sk             int
+c_customer_id             string
+c_current_cdemo_sk        int
+c_current_hdemo_sk        int
+c_current_addr_sk         int
+c_first_shipto_date_sk    int
+c_first_sales_date_sk     int
+c_salutation              string
+c_first_name              string
+c_last_name               string
+c_preferred_cust_flag     string
+c_birth_day               int
+c_birth_month             int
+c_birth_year              int
+c_birth_country           string
+c_login                   string
+c_email_address           string
+c_last_review_date        string
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -263,32 +235,28 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpcds
 ---- BASE_TABLE_NAME
 promotion
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    p_promo_sk                int,
-    p_promo_id                string,
-    p_start_date_sk           int,
-    p_end_date_sk             int,
-    p_item_sk                 int,
-    p_cost                    float,
-    p_response_target         int,
-    p_promo_name              string,
-    p_channel_dmail           string,
-    p_channel_email           string,
-    p_channel_catalog         string,
-    p_channel_tv              string,
-    p_channel_radio           string,
-    p_channel_press           string,
-    p_channel_event           string,
-    p_channel_demo            string,
-    p_channel_details         string,
-    p_purpose                 string,
-    p_discount_active         string
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+p_promo_sk                int
+p_promo_id                string
+p_start_date_sk           int
+p_end_date_sk             int
+p_item_sk                 int
+p_cost                    float
+p_response_target         int
+p_promo_name              string
+p_channel_dmail           string
+p_channel_email           string
+p_channel_catalog         string
+p_channel_tv              string
+p_channel_radio           string
+p_channel_press           string
+p_channel_event           string
+p_channel_demo            string
+p_channel_details         string
+p_purpose                 string
+p_discount_active         string
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -299,18 +267,14 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpcds
 ---- BASE_TABLE_NAME
 household_demographics
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    hd_demo_sk                int,
-    hd_income_band_sk         int,
-    hd_buy_potential          string,
-    hd_dep_count              int,
-    hd_vehicle_count          int
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+hd_demo_sk                int
+hd_income_band_sk         int
+hd_buy_potential          string
+hd_dep_count              int
+hd_vehicle_count          int
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -321,26 +285,22 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpcds
 ---- BASE_TABLE_NAME
 customer_address
----- CREATE
-create external table {db_name}{db_suffix}.{table_name}
-(
-    ca_address_sk             int,
-    ca_address_id             string,
-    ca_street_number          string,
-    ca_street_name            string,
-    ca_street_type            string,
-    ca_suite_number           string,
-    ca_city                   string,
-    ca_county                 string,
-    ca_state                  string,
-    ca_zip                    string,
-    ca_country                string,
-    ca_gmt_offset             float,
-    ca_location_type          string
-)
-row format delimited fields terminated by '|'
-STORED AS {file_format}
-location '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+ca_address_sk             int
+ca_address_id             string
+ca_street_number          string
+ca_street_name            string
+ca_street_type            string
+ca_suite_number           string
+ca_city                   string
+ca_county                 string
+ca_state                  string
+ca_zip                    string
+ca_country                string
+ca_gmt_offset             float
+ca_location_type          string
+---- ROW_FORMAT
+delimited fields terminated by '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
diff --git a/testdata/datasets/tpch/tpch_schema_template.sql b/testdata/datasets/tpch/tpch_schema_template.sql
index 81b6a486e..e8a703f54 100644
--- a/testdata/datasets/tpch/tpch_schema_template.sql
+++ b/testdata/datasets/tpch/tpch_schema_template.sql
@@ -5,27 +5,25 @@
 tpch
 ---- BASE_TABLE_NAME
 lineitem
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-L_ORDERKEY BIGINT,
-L_PARTKEY BIGINT,
-L_SUPPKEY BIGINT,
-L_LINENUMBER INT,
-L_QUANTITY DOUBLE,
-L_EXTENDEDPRICE DOUBLE,
-L_DISCOUNT DOUBLE,
-L_TAX DOUBLE,
-L_RETURNFLAG STRING,
-L_LINESTATUS STRING,
-L_SHIPDATE STRING,
-L_COMMITDATE STRING,
-L_RECEIPTDATE STRING,
-L_SHIPINSTRUCT STRING,
-L_SHIPMODE STRING,
-L_COMMENT STRING)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+L_ORDERKEY BIGINT
+L_PARTKEY BIGINT
+L_SUPPKEY BIGINT
+L_LINENUMBER INT
+L_QUANTITY DOUBLE
+L_EXTENDEDPRICE DOUBLE
+L_DISCOUNT DOUBLE
+L_TAX DOUBLE
+L_RETURNFLAG STRING
+L_LINESTATUS STRING
+L_SHIPDATE STRING
+L_COMMITDATE STRING
+L_RECEIPTDATE STRING
+L_SHIPINSTRUCT STRING
+L_SHIPMODE STRING
+L_COMMENT STRING
+---- ROW_FORMAT
+DELIMITED FIELDS TERMINATED BY '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -36,21 +34,18 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpch
 ---- BASE_TABLE_NAME
 part
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-P_PARTKEY BIGINT,
-P_NAME STRING,
-P_MFGR STRING,
-P_BRAND STRING,
-P_TYPE
-STRING,
-P_SIZE INT,
-P_CONTAINER STRING,
-P_RETAILPRICE DOUBLE,
-P_COMMENT STRING)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+P_PARTKEY BIGINT
+P_NAME STRING
+P_MFGR STRING
+P_BRAND STRING
+P_TYPE STRING
+P_SIZE INT
+P_CONTAINER STRING
+P_RETAILPRICE DOUBLE
+P_COMMENT STRING
+---- ROW_FORMAT
+DELIMITED FIELDS TERMINATED BY '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -61,16 +56,14 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpch
 ---- BASE_TABLE_NAME
 partsupp
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-PS_PARTKEY BIGINT,
-PS_SUPPKEY BIGINT,
-PS_AVAILQTY INT,
-PS_SUPPLYCOST DOUBLE,
-PS_COMMENT STRING)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+PS_PARTKEY BIGINT
+PS_SUPPKEY BIGINT
+PS_AVAILQTY INT
+PS_SUPPLYCOST DOUBLE
+PS_COMMENT STRING
+---- ROW_FORMAT
+DELIMITED FIELDS TERMINATED BY '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -81,18 +74,16 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpch
 ---- BASE_TABLE_NAME
 supplier
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-S_SUPPKEY BIGINT,
-S_NAME STRING,
-S_ADDRESS STRING,
-S_NATIONKEY SMALLINT,
-S_PHONE STRING,
-S_ACCTBAL DOUBLE,
-S_COMMENT STRING)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+S_SUPPKEY BIGINT
+S_NAME STRING
+S_ADDRESS STRING
+S_NATIONKEY SMALLINT
+S_PHONE STRING
+S_ACCTBAL DOUBLE
+S_COMMENT STRING
+---- ROW_FORMAT
+DELIMITED FIELDS TERMINATED BY '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -103,15 +94,13 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpch
 ---- BASE_TABLE_NAME
 nation
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-N_NATIONKEY SMALLINT,
-N_NAME STRING,
-N_REGIONKEY SMALLINT,
-N_COMMENT STRING)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+N_NATIONKEY SMALLINT
+N_NAME STRING
+N_REGIONKEY SMALLINT
+N_COMMENT STRING
+---- ROW_FORMAT
+DELIMITED FIELDS TERMINATED BY '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -122,14 +111,12 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpch
 ---- BASE_TABLE_NAME
 region
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-R_REGIONKEY SMALLINT,
-R_NAME STRING,
-R_COMMENT STRING)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+R_REGIONKEY SMALLINT
+R_NAME STRING
+R_COMMENT STRING
+---- ROW_FORMAT
+DELIMITED FIELDS TERMINATED BY '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -140,20 +127,18 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpch
 ---- BASE_TABLE_NAME
 orders
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-O_ORDERKEY BIGINT,
-O_CUSTKEY BIGINT,
-O_ORDERSTATUS STRING,
-O_TOTALPRICE DOUBLE,
-O_ORDERDATE STRING,
-O_ORDERPRIORITY STRING,
-O_CLERK STRING,
-O_SHIPPRIORITY INT,
-O_COMMENT STRING)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+O_ORDERKEY BIGINT
+O_CUSTKEY BIGINT
+O_ORDERSTATUS STRING
+O_TOTALPRICE DOUBLE
+O_ORDERDATE STRING
+O_ORDERPRIORITY STRING
+O_CLERK STRING
+O_SHIPPRIORITY INT
+O_COMMENT STRING
+---- ROW_FORMAT
+DELIMITED FIELDS TERMINATED BY '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -164,19 +149,17 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpch
 ---- BASE_TABLE_NAME
 customer
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-C_CUSTKEY BIGINT,
-C_NAME STRING,
-C_ADDRESS STRING,
-C_NATIONKEY SMALLINT,
-C_PHONE STRING,
-C_ACCTBAL DOUBLE,
-C_MKTSEGMENT STRING,
-C_COMMENT STRING)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+C_CUSTKEY BIGINT
+C_NAME STRING
+C_ADDRESS STRING
+C_NATIONKEY SMALLINT
+C_PHONE STRING
+C_ACCTBAL DOUBLE
+C_MKTSEGMENT STRING
+C_COMMENT STRING
+---- ROW_FORMAT
+DELIMITED FIELDS TERMINATED BY '|'
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
 ---- LOAD
@@ -187,176 +170,135 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
 tpch
 ---- BASE_TABLE_NAME
 q2_minimum_cost_supplier_tmp1
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-s_acctbal double,
-s_name string,
-n_name string,
-p_partkey bigint,
-ps_supplycost double,
-p_mfgr string,
-s_address string,
-s_phone string,
-s_comment string)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+s_acctbal double
+s_name string
+n_name string
+p_partkey bigint
+ps_supplycost double
+p_mfgr string
+s_address string
+s_phone string
+s_comment string
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q2_minimum_cost_supplier_tmp2
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-p_partkey bigint,
-ps_min_supplycost double)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+p_partkey bigint
+ps_min_supplycost double
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q7_volume_shipping_tmp
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-supp_nation string,
-cust_nation string,
-s_nationkey smallint,
-c_nationkey smallint)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+supp_nation string
+cust_nation string
+s_nationkey smallint
+c_nationkey smallint
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q11_part_tmp
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-ps_partkey bigint,
-part_value double)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+ps_partkey bigint
+part_value double
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q11_sum_tmp
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (total_value double)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+total_value double
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 revenue
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-supplier_no bigint,
-total_revenue double)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+supplier_no bigint
+total_revenue double
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 max_revenue
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (max_revenue double)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+max_revenue double
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 supplier_tmp
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (s_suppkey bigint)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+s_suppkey bigint
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q16_tmp
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-p_brand string,
-p_type string,
-p_size int,
-ps_suppkey bigint)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+p_brand string
+p_type string
+p_size int
+ps_suppkey bigint
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 lineitem_tmp
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-t_partkey bigint,
-t_avg_quantity double)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+t_partkey bigint
+t_avg_quantity double
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q18_tmp
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-l_orderkey bigint,
-t_sum_quantity double)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+l_orderkey bigint
+t_sum_quantity double
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q20_tmp1
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (p_partkey bigint)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+p_partkey bigint
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q20_tmp2
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-l_partkey bigint,
-l_suppkey bigint,
-sum_quantity double)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+l_partkey bigint
+l_suppkey bigint
+sum_quantity double
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q20_tmp3
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (
-ps_suppkey bigint,
-ps_availqty int,
-sum_quantity double)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+ps_suppkey bigint
+ps_availqty int
+sum_quantity double
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q20_tmp4
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (ps_suppkey bigint)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+ps_suppkey bigint
 ====
 ---- DATASET
 tpch
 ---- BASE_TABLE_NAME
 q22_customer_tmp1
----- CREATE
-CREATE EXTERNAL TABLE {db_name}{db_suffix}.{table_name} (avg_acctbal double, cust_name_char string)
-STORED AS {file_format}
-LOCATION '${{hiveconf:hive.metastore.warehouse.dir}}/{hdfs_location}';
+---- COLUMNS
+avg_acctbal double
+cust_name_char string
 ====
diff --git a/testdata/workloads/functional-query/functional-query_core.csv b/testdata/workloads/functional-query/functional-query_core.csv
index d03a4e154..1e8ed780c 100644
--- a/testdata/workloads/functional-query/functional-query_core.csv
+++ b/testdata/workloads/functional-query/functional-query_core.csv
@@ -3,3 +3,5 @@ file_format:text, dataset:functional, compression_codec:none, compression_type:n
 file_format:seq, dataset:functional, compression_codec:none, compression_type:none
 file_format:seq, dataset:functional, compression_codec:snap, compression_type:block
 file_format:rc, dataset: functional, compression_codec: snap, compression_type: block
+file_format:avro, dataset: functional, compression_codec: none, compression_type: none
+file_format:avro, dataset: functional, compression_codec: snap, compression_type: block
diff --git a/testdata/workloads/functional-query/functional-query_dimensions.csv b/testdata/workloads/functional-query/functional-query_dimensions.csv
index 8ec8a8225..62fd498f8 100644
--- a/testdata/workloads/functional-query/functional-query_dimensions.csv
+++ b/testdata/workloads/functional-query/functional-query_dimensions.csv
@@ -1,4 +1,4 @@
-file_format: text,seq,rc
+file_format: text,seq,rc,avro
 dataset: functional
 compression_codec: none,def,gzip,bzip,snap,lzo
 compression_type: none,block,record
diff --git a/testdata/workloads/functional-query/functional-query_exhaustive.csv b/testdata/workloads/functional-query/functional-query_exhaustive.csv
index b520ed279..ed6166013 100644
--- a/testdata/workloads/functional-query/functional-query_exhaustive.csv
+++ b/testdata/workloads/functional-query/functional-query_exhaustive.csv
@@ -15,3 +15,5 @@ file_format: rc, dataset: functional, compression_codec: def, compression_type:
 file_format: rc, dataset: functional, compression_codec: gzip, compression_type: block
 file_format: rc, dataset: functional, compression_codec: bzip, compression_type: block
 file_format: rc, dataset: functional, compression_codec: snap, compression_type: block
+file_format: avro, dataset: functional, compression_codec: none, compression_type: none
+file_format: avro, dataset: functional, compression_codec: snap, compression_type: block
diff --git a/testdata/workloads/functional-query/functional-query_pairwise.csv b/testdata/workloads/functional-query/functional-query_pairwise.csv
index 27efc876d..f0aef5a6b 100644
--- a/testdata/workloads/functional-query/functional-query_pairwise.csv
+++ b/testdata/workloads/functional-query/functional-query_pairwise.csv
@@ -2,8 +2,9 @@
 file_format: text, dataset: functional, compression_codec: none, compression_type: none
 file_format: seq, dataset: functional, compression_codec: def, compression_type: block
 file_format: rc, dataset: functional, compression_codec: gzip, compression_type: block
+file_format: avro, dataset: functional, compression_codec: snap, compression_type: block
+file_format: avro, dataset: functional, compression_codec: none, compression_type: none
 file_format: rc, dataset: functional, compression_codec: bzip, compression_type: block
-file_format: seq, dataset: functional, compression_codec: snap, compression_type: record
+file_format: seq, dataset: functional, compression_codec: none, compression_type: none
 file_format: text, dataset: functional, compression_codec: lzo, compression_type: block
-file_format: rc, dataset: functional, compression_codec: snap, compression_type: block
-file_format: rc, dataset: functional, compression_codec: def, compression_type: block
+file_format: rc, dataset: functional, compression_codec: none, compression_type: none
diff --git a/testdata/workloads/hive-benchmark/hive-benchmark_dimensions.csv b/testdata/workloads/hive-benchmark/hive-benchmark_dimensions.csv
index 368dc90cc..7a32ba2eb 100644
--- a/testdata/workloads/hive-benchmark/hive-benchmark_dimensions.csv
+++ b/testdata/workloads/hive-benchmark/hive-benchmark_dimensions.csv
@@ -1,4 +1,4 @@
-file_format: text,seq,rc
+file_format: text,seq,rc,avro
 dataset: hive-benchmark
 compression_codec: none,def,gzip,bzip,snap,lzo
 compression_type: none,block,record
diff --git a/testdata/workloads/hive-benchmark/hive-benchmark_exhaustive.csv b/testdata/workloads/hive-benchmark/hive-benchmark_exhaustive.csv
index 4a6f02322..48836e6bb 100644
--- a/testdata/workloads/hive-benchmark/hive-benchmark_exhaustive.csv
+++ b/testdata/workloads/hive-benchmark/hive-benchmark_exhaustive.csv
@@ -15,3 +15,5 @@ file_format: rc, dataset: hive-benchmark, compression_codec: def, compression_ty
 file_format: rc, dataset: hive-benchmark, compression_codec: gzip, compression_type: block
 file_format: rc, dataset: hive-benchmark, compression_codec: bzip, compression_type: block
 file_format: rc, dataset: hive-benchmark, compression_codec: snap, compression_type: block
+file_format: avro, dataset: hive-benchmark, compression_codec: none, compression_type: none
+file_format: avro, dataset: hive-benchmark, compression_codec: snap, compression_type: block
diff --git a/testdata/workloads/hive-benchmark/hive-benchmark_pairwise.csv b/testdata/workloads/hive-benchmark/hive-benchmark_pairwise.csv
index e3946a4e7..edeaf8e98 100644
--- a/testdata/workloads/hive-benchmark/hive-benchmark_pairwise.csv
+++ b/testdata/workloads/hive-benchmark/hive-benchmark_pairwise.csv
@@ -2,8 +2,9 @@
 file_format: text, dataset: hive-benchmark, compression_codec: none, compression_type: none
 file_format: seq, dataset: hive-benchmark, compression_codec: def, compression_type: block
 file_format: rc, dataset: hive-benchmark, compression_codec: gzip, compression_type: block
+file_format: avro, dataset: hive-benchmark, compression_codec: snap, compression_type: block
+file_format: avro, dataset: hive-benchmark, compression_codec: none, compression_type: none
 file_format: rc, dataset: hive-benchmark, compression_codec: bzip, compression_type: block
-file_format: seq, dataset: hive-benchmark, compression_codec: snap, compression_type: record
+file_format: seq, dataset: hive-benchmark, compression_codec: none, compression_type: none
 file_format: text, dataset: hive-benchmark, compression_codec: lzo, compression_type: block
-file_format: rc, dataset: hive-benchmark, compression_codec: snap, compression_type: block
-file_format: rc, dataset: hive-benchmark, compression_codec: def, compression_type: block
+file_format: rc, dataset: hive-benchmark, compression_codec: none, compression_type: none
diff --git a/testdata/workloads/targeted-perf/targeted-perf_core.csv b/testdata/workloads/targeted-perf/targeted-perf_core.csv
index d2060e5f3..dcf4bc6fb 100644
--- a/testdata/workloads/targeted-perf/targeted-perf_core.csv
+++ b/testdata/workloads/targeted-perf/targeted-perf_core.csv
@@ -3,3 +3,5 @@ file_format:text, dataset:tpch, compression_codec:none, compression_type:none
 file_format:seq, dataset:tpch, compression_codec:gzip, compression_type:block
 file_format:seq, dataset:tpch, compression_codec:snap, compression_type:block
 file_format:rc, dataset:tpch, compression_codec:none, compression_type:none
+file_format:avro, dataset:tpch, compression_codec: none, compression_type: none
+file_format:avro, dataset:tpch, compression_codec: snap, compression_type: block
diff --git a/testdata/workloads/targeted-stress/targeted-stress_core.csv b/testdata/workloads/targeted-stress/targeted-stress_core.csv
index d2060e5f3..dcf4bc6fb 100644
--- a/testdata/workloads/targeted-stress/targeted-stress_core.csv
+++ b/testdata/workloads/targeted-stress/targeted-stress_core.csv
@@ -3,3 +3,5 @@ file_format:text, dataset:tpch, compression_codec:none, compression_type:none
 file_format:seq, dataset:tpch, compression_codec:gzip, compression_type:block
 file_format:seq, dataset:tpch, compression_codec:snap, compression_type:block
 file_format:rc, dataset:tpch, compression_codec:none, compression_type:none
+file_format:avro, dataset:tpch, compression_codec: none, compression_type: none
+file_format:avro, dataset:tpch, compression_codec: snap, compression_type: block
diff --git a/testdata/workloads/tpcds/tpcds_dimensions.csv b/testdata/workloads/tpcds/tpcds_dimensions.csv
index f90082679..a01357f60 100644
--- a/testdata/workloads/tpcds/tpcds_dimensions.csv
+++ b/testdata/workloads/tpcds/tpcds_dimensions.csv
@@ -1,4 +1,4 @@
-file_format: text,seq,rc
+file_format: text,seq,rc,avro
 dataset: tpcds
 compression_codec: none,def,gzip,bzip,snap,lzo
 compression_type: none,block,record
diff --git a/testdata/workloads/tpcds/tpcds_exhaustive.csv b/testdata/workloads/tpcds/tpcds_exhaustive.csv
index e79552a62..e060811b0 100644
--- a/testdata/workloads/tpcds/tpcds_exhaustive.csv
+++ b/testdata/workloads/tpcds/tpcds_exhaustive.csv
@@ -15,3 +15,5 @@ file_format: rc, dataset: tpcds, compression_codec: def, compression_type: block
 file_format: rc, dataset: tpcds, compression_codec: gzip, compression_type: block
 file_format: rc, dataset: tpcds, compression_codec: bzip, compression_type: block
 file_format: rc, dataset: tpcds, compression_codec: snap, compression_type: block
+file_format: avro, dataset: tpcds, compression_codec: none, compression_type: none
+file_format: avro, dataset: tpcds, compression_codec: snap, compression_type: block
diff --git a/testdata/workloads/tpcds/tpcds_pairwise.csv b/testdata/workloads/tpcds/tpcds_pairwise.csv
index d3feca315..aea395180 100644
--- a/testdata/workloads/tpcds/tpcds_pairwise.csv
+++ b/testdata/workloads/tpcds/tpcds_pairwise.csv
@@ -2,8 +2,9 @@
 file_format: text, dataset: tpcds, compression_codec: none, compression_type: none
 file_format: seq, dataset: tpcds, compression_codec: def, compression_type: block
 file_format: rc, dataset: tpcds, compression_codec: gzip, compression_type: block
+file_format: avro, dataset: tpcds, compression_codec: snap, compression_type: block
+file_format: avro, dataset: tpcds, compression_codec: none, compression_type: none
 file_format: rc, dataset: tpcds, compression_codec: bzip, compression_type: block
-file_format: seq, dataset: tpcds, compression_codec: snap, compression_type: record
+file_format: seq, dataset: tpcds, compression_codec: none, compression_type: none
 file_format: text, dataset: tpcds, compression_codec: lzo, compression_type: block
-file_format: rc, dataset: tpcds, compression_codec: snap, compression_type: block
-file_format: rc, dataset: tpcds, compression_codec: def, compression_type: block
+file_format: rc, dataset: tpcds, compression_codec: none, compression_type: none
diff --git a/testdata/workloads/tpch/tpch_core.csv b/testdata/workloads/tpch/tpch_core.csv
index d2060e5f3..dcf4bc6fb 100644
--- a/testdata/workloads/tpch/tpch_core.csv
+++ b/testdata/workloads/tpch/tpch_core.csv
@@ -3,3 +3,5 @@ file_format:text, dataset:tpch, compression_codec:none, compression_type:none
 file_format:seq, dataset:tpch, compression_codec:gzip, compression_type:block
 file_format:seq, dataset:tpch, compression_codec:snap, compression_type:block
 file_format:rc, dataset:tpch, compression_codec:none, compression_type:none
+file_format:avro, dataset:tpch, compression_codec: none, compression_type: none
+file_format:avro, dataset:tpch, compression_codec: snap, compression_type: block
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index b8bcb02ff..49a2b9543 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -117,7 +117,8 @@ class ImpalaTestSuite(BaseTestSuite):
         updated_sections.append(
             self.__update_results(test_file_name, test_section, result))
       else:
-        verify_raw_results(test_section, result)
+        verify_raw_results(test_section, result,
+                           vector.get_value('table_format').file_format)
 
     if pytest.config.option.update_results:
       output_file = os.path.join('/tmp', test_file_name.replace('/','_') + ".test")
diff --git a/tests/common/test_result_verifier.py b/tests/common/test_result_verifier.py
index 9200b1680..82dba1c90 100644
--- a/tests/common/test_result_verifier.py
+++ b/tests/common/test_result_verifier.py
@@ -148,12 +148,7 @@ def verify_results(expected_results, actual_results, order_matters):
 
   assert expected_results == actual_results, failure_str
 
-def verify_column_types(actual_col_types, exec_result_schema):
-  actual_col_types = [c.strip().upper() for c in actual_col_types.split(',')]
-  expected_col_types = parse_column_types(exec_result_schema)
-  verify_results(actual_col_types, expected_col_types, order_matters=True)
-
-def verify_raw_results(test_section, exec_result):
+def verify_raw_results(test_section, exec_result, file_format):
   """
   Accepts a raw exec_result object and verifies it matches the expected results
 
@@ -169,9 +164,23 @@ def verify_raw_results(test_section, exec_result):
     return
 
   if 'TYPES' in test_section:
-    verify_column_types(test_section['TYPES'], exec_result.schema)
     expected_types = [c.strip().upper() for c in test_section['TYPES'].split(',')]
-    actual_types = parse_column_types(exec_result.schema)
+
+    # Avro does not support as many types as Hive, so the Avro test tables may
+    # have different column types than we expect (e.g., INT instead of
+    # TINYINT). We represent TIMESTAMP columns as strings in Avro, so we bail in
+    # this case since the results will be wrong. Otherwise we bypass the type
+    # checking by ignoring the actual types of the Avro table.
+    if file_format == 'avro':
+      if 'TIMESTAMP' in expected_types:
+        LOG.info("TIMESTAMP columns unsupported in Avro, skipping verification.")
+        return
+      LOG.info("Skipping type verification of Avro-format table.")
+      actual_types = expected_types
+    else:
+      actual_types = parse_column_types(exec_result.schema)
+
+    verify_results(expected_types, actual_types, order_matters=True)
   else:
     # This is an insert, so we are comparing the number of rows inserted
     expected_types = ['BIGINT']
diff --git a/tests/query_test/test_aggregation.py b/tests/query_test/test_aggregation.py
index 0ccdac87a..d2cda0caf 100644
--- a/tests/query_test/test_aggregation.py
+++ b/tests/query_test/test_aggregation.py
@@ -40,6 +40,10 @@ class TestAggregation(ImpalaTestSuite):
     cls.TestMatrix.add_constraint(lambda v: v.get_value('exec_option')['batch_size'] == 0)
     cls.TestMatrix.add_constraint(lambda v: v.get_value('agg_func') in ['min', 'max'] if\
                                             v.get_value('data_type') == 'bool' else True)
+    # Avro doesn't have timestamp type
+    cls.TestMatrix.add_constraint(
+        lambda v: not (v.get_value('table_format').file_format == 'avro' and
+                       v.get_value('data_type') == 'timestamp'))
 
   def test_aggregation(self, vector):
     data_type, agg_func = (vector.get_value('data_type'), vector.get_value('agg_func'))
diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py
index ea98b685c..4185e070f 100644
--- a/tests/query_test/test_queries.py
+++ b/tests/query_test/test_queries.py
@@ -19,6 +19,11 @@ class TestQueries(ImpalaTestSuite):
     self.run_test_case('QueryTest/aggregation', vector)
 
   def test_exprs(self, vector):
+    # Don't attempt to evaluate timestamp expressions with Avro tables (which
+    # don't support a timestamp type)
+    # TODO: Enable some of these tests for Avro if possible
+    if vector.get_value('table_format').file_format == 'avro':
+      pytest.skip()
     self.run_test_case('QueryTest/exprs', vector)
 
   def test_hdfs_scan_node(self, vector):