IMPALA-11952 (part 2): Fix print function syntax

Python 3 now treats print as a function and requires the parenthesis in invocation. print "Hello World!" is now: print("Hello World!") This fixes all locations to use the function invocation. This is more complicated when the output is being redirected to a file or when avoiding the usual newline. print >> sys.stderr , "Hello World!" is now: print("Hello World!", file=sys.stderr) To support this properly and guarantee equivalent behavior between python 2 and python 3, all files that use print now add this import: from __future__ import print_function This also fixes random flake8 issues that intersect with the changes. Testing: - check-python-syntax.sh shows no errors related to print Change-Id: Ib634958369ad777a41e72d80c8053b74384ac351 Reviewed-on: http://gerrit.cloudera.org:8080/19552 Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com> Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Michael Smith <michael.smith@cloudera.com>
2025-12-25 02:03:09 -05:00 · 2023-02-26 13:54:52 -08:00
parent c71de994b0
commit 2b550634d2
75 changed files with 275 additions and 196 deletions
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -19,6 +19,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 from string import Template
 import os
 import shutil
@@ -338,7 +339,7 @@ def move_if_different(src_file, dest_file):
  if not os.path.isfile(dest_file) or not filecmp.cmp(src_file, dest_file):
    shutil.move(src_file, dest_file)
  else:
-    print 'Retaining existing file: %s' % (dest_file)
+    print('Retaining existing file: %s' % (dest_file))

 BE_PATH = os.path.join(os.environ['IMPALA_HOME'], 'be/generated-sources/impala-ir/')
 IR_FUNCTIONS_FILE = 'impala-ir-functions.h'
@@ -352,7 +353,7 @@ if not os.path.exists(BE_PATH):
  os.makedirs(BE_PATH)

 if __name__ == "__main__":
-  print "Generating IR description files"
+  print("Generating IR description files")
  enums_file = open(TMP_IR_FUNCTIONS_PATH, 'w')
  enums_file.write(enums_preamble)

--- a/bin/check-rat-report.py
+++ b/bin/check-rat-report.py
@@ -33,6 +33,7 @@
 # time, and the RAT JAR is not included in the Impala repo; it must be downloaded
 # separately.

+from __future__ import print_function
 import fnmatch
 import re
 import sys
@@ -74,5 +75,5 @@ for r in resources:
 if not all_ok:
  sys.exit(1)

-print 'OK'
+print('OK')
 sys.exit(0)
--- a/bin/collect_minidumps.py
+++ b/bin/collect_minidumps.py
@@ -24,6 +24,7 @@
 #       --role_name=statestored --max_output_size=50000000 --end_time=1463033495000 \
 #       --output_file_path=/tmp/minidump_package.tar.gz

+from __future__ import print_function
 import os
 import re
 import sys
@@ -162,7 +163,7 @@ def get_config_parameter_value(conf_dir, role_name, config_parameter_name):
        if m:
          config_parameter_value = m.group(1)
  except IOError as e:
-    print >> sys.stderr, 'Error: Unable to open "{0}".'.format(file_path)
+    print('Error: Unable to open "{0}".'.format(file_path), file=sys.stderr)
    sys.exit(1)
  return config_parameter_value

@@ -179,7 +180,7 @@ def get_minidump_dir(conf_dir, role_name):
  result = os.path.join(minidump_path, role_name)
  if not os.path.isdir(result):
    msg = 'Error: minidump directory does not exist.'
-    print >> sys.stderr, msg
+    print(msg, file=sys.stderr)
    sys.exit(1)
  return result

@@ -202,11 +203,11 @@ def main():
  options, args = parser.parse_args()
  if not options.conf_dir:
    msg = 'Error: conf_dir is not specified.'
-    print >> sys.stderr, msg
+    print(msg, file=sys.stderr)
    sys.exit(1)
  if not options.output_file_path:
    msg = 'Error: output_file_path is not specified.'
-    print >> sys.stderr, msg
+    print(msg, file=sys.stderr)
    sys.exit(1)

  minidump_dir = get_minidump_dir(options.conf_dir, options.role_name)
@@ -216,7 +217,7 @@ def main():
      end_time=options.end_time,
      output_file_path=options.output_file_path)
  status, msg = file_archiver.make_tarball()
-  print >> sys.stderr, msg
+  print(msg, file=sys.stderr)
  sys.exit(status)

 if __name__ == '__main__':
--- a/bin/compare_branches.py
+++ b/bin/compare_branches.py
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# Future imports must happen at the beginning of the file
+from __future__ import print_function
+
 HELP = '''
 Compares two specified branches, using the Gerrit Change-Id as the
 primary identifier. Ignored commits can be added via a JSON
@@ -171,7 +174,7 @@ def cherrypick(cherry_pick_hashes, full_target_branch_name, partial_ok):

  Note that this function does not push to the remote.
  """
-  print "Cherrypicking %d changes." % (len(cherry_pick_hashes),)
+  print("Cherrypicking %d changes." % (len(cherry_pick_hashes),))

  if len(cherry_pick_hashes) == 0:
    return
@@ -181,8 +184,8 @@ def cherrypick(cherry_pick_hashes, full_target_branch_name, partial_ok):
  target_branch_sha = subprocess.check_output(
      ['git', 'rev-parse', full_target_branch_name]).strip()
  if head_sha != target_branch_sha:
-    print "Cannot cherrypick because %s (%s) and HEAD (%s) are divergent." % (
-        full_target_branch_name, target_branch_sha, head_sha)
+    print("Cannot cherrypick because %s (%s) and HEAD (%s) are divergent." % (
+        full_target_branch_name, target_branch_sha, head_sha))
    sys.exit(1)

  cherry_pick_hashes.reverse()
@@ -192,7 +195,7 @@ def cherrypick(cherry_pick_hashes, full_target_branch_name, partial_ok):
    if ret != 0:
      if partial_ok and i > 0:
        subprocess.check_call(['git', 'cherry-pick', '--abort'])
-        print "Failed to cherry-pick %s; stopping picks." % (cherry_pick_hash,)
+        print("Failed to cherry-pick %s; stopping picks." % (cherry_pick_hash,))
        return
      else:
        raise Exception("Failed to cherry-pick: %s" % (cherry_pick_hash,))
@@ -238,10 +241,10 @@ def main():
  commits_ignored = []  # Track commits actually ignored for debug logging

  cherry_pick_hashes = []
-  print '-' * 80
-  print 'Commits in {0} but not in {1}:'.format(
-      full_source_branch_name, full_target_branch_name)
-  print '-' * 80
+  print('-' * 80)
+  print('Commits in {0} but not in {1}:'.format(
+      full_source_branch_name, full_target_branch_name))
+  print('-' * 80)
  jira_keys = []
  jira_key_pat = re.compile(r'(IMPALA-\d+)')
  skip_commits_matching = options.skip_commits_matching.format(
@@ -267,17 +270,17 @@ def main():
      logging.debug("NOT ignoring commit {0} since not in ignored commits ({1},{2})"
                   .format(commit_hash, options.source_branch, options.target_branch))
    if not change_in_target and not ignore_by_config and not ignore_by_commit_message:
-      print u'{0} {1} ({2}) - {3}'\
-          .format(commit_hash, msg.decode('utf8'), date, author.decode('utf8'))\
-          .encode('utf8')
+      print(u'{0} {1} ({2}) - {3}'
+          .format(commit_hash, msg.decode('utf8'), date, author.decode('utf8'))
+          .encode('utf8'))
      cherry_pick_hashes.append(commit_hash)
      jira_keys += jira_key_pat.findall(msg)

-  print '-' * 80
+  print('-' * 80)

-  print "Jira keys referenced (Note: not all commit messages will reference a jira key):"
-  print ','.join(jira_keys)
-  print '-' * 80
+  print("Jira keys referenced (Note: not all commit messages will reference a jira key):")
+  print(','.join(jira_keys))
+  print('-' * 80)

  logging.debug("Commits actually ignored (change was not in target): {0}"
               .format(pformat(commits_ignored)))
--- a/bin/diagnostics/experimental/plan-graph.py
+++ b/bin/diagnostics/experimental/plan-graph.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License

+from __future__ import print_function
 import argparse
 import math
 import re
--- a/bin/diagnostics/experimental/tpcds_run_comparator.py
+++ b/bin/diagnostics/experimental/tpcds_run_comparator.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License

+from __future__ import print_function
 import argparse
 import csv
 import math
--- a/bin/gen_build_version.py
+++ b/bin/gen_build_version.py
@@ -22,6 +22,7 @@
 # This script generates be/src/common/version.h which contains the build version based
 # on the git hash.

+from __future__ import print_function
 import os
 from subprocess import call

@@ -46,15 +47,15 @@ version_file_exists = os.path.isfile(VERSION_FILE_NAME)
 # If we have a version file and cannot obtain a git hash, skip generating a new
 # version file.
 if version_file_exists and not can_obtain_git_hash:
-  print "Cannot obtain git hash, using existing version file."
+  print("Cannot obtain git hash, using existing version file.")
 else:
  # Remove existing version files only if they exist.
  # TODO: Might be useful to make a common utility function remove_if_clean.
  if version_file_exists:
-    print 'Removing existing file: %s' % (VERSION_FILE_NAME)
+    print('Removing existing file: %s' % (VERSION_FILE_NAME))
    os.remove(VERSION_FILE_NAME)
  if os.path.isfile(VERSION_CC_FILE_NAME):
-    print 'Removing existing file: %s' % (VERSION_CC_FILE_NAME)
+    print('Removing existing file: %s' % (VERSION_CC_FILE_NAME))
    os.remove(VERSION_CC_FILE_NAME)

  # SAVE_VERSION_SCRIPT will generate a dummy version.info file if we cannot obtain the
@@ -74,7 +75,7 @@ try:
 finally:
  version_file.close()

-print '\n'.join([version, git_hash, build_time])
+print('\n'.join([version, git_hash, build_time]))

 file_contents = """
 //
--- a/bin/generate_xml_config.py
+++ b/bin/generate_xml_config.py
@@ -40,6 +40,7 @@ REPL:
 """

 from __future__ import with_statement
+from __future__ import print_function
 import imp
 import os
 import re
@@ -77,7 +78,7 @@ def dump_config(d, source_path, out):

      -->
      <configuration>""".format(source_path=os.path.abspath(source_path))
-  print >>out, dedent(header)
+  print(dedent(header), file=out)
  for k, v in sorted(d.iteritems()):
    try:
      k_new = _substitute_env_vars(k)
@@ -87,24 +88,24 @@ def dump_config(d, source_path, out):
    except KeyError as e:
      raise Exception("failed environment variable substitution for value {k}: {e}"
                      .format(k=k, e=e))
-    print >>out, """\
+    print("""\
      <property>
        <name>{name}</name>
        <value>{value}</value>
-      </property>""".format(name=xmlescape(k_new), value=xmlescape(v_new))
-  print >>out, "</configuration>"
+      </property>""".format(name=xmlescape(k_new), value=xmlescape(v_new)), file=out)
+  print("</configuration>", file=out)


 def main():
  if len(sys.argv) != 3:
-    print >>sys.stderr, "usage: {prog} <template> <out>".format(prog=sys.argv[0])
+    print("usage: {prog} <template> <out>".format(prog=sys.argv[0]), file=sys.stderr)
    sys.exit(1)

  _, in_path, out_path = sys.argv
  try:
    mod = imp.load_source('template', in_path)
  except:  # noqa
-    print >>sys.stderr, "Unable to load template: %s" % in_path
+    print("Unable to load template: %s" % in_path, file=sys.stderr)
    raise
  conf = mod.__dict__.get('CONFIG')
  if not isinstance(conf, dict):
--- a/bin/get_code_size.py
+++ b/bin/get_code_size.py
@@ -19,6 +19,7 @@

 # This tool walks the build directory (release by default) and will print the text, data,
 # and bss section sizes of the archives.
+from __future__ import print_function
 import fnmatch
 import os
 import re
--- a/bin/inline_pom.py
+++ b/bin/inline_pom.py
@@ -21,6 +21,7 @@
 #
 # Usage: inline_pom.py <pom.xml>...

+from __future__ import print_function
 import re
 import sys
 from tempfile import mkstemp
--- a/bin/jenkins/critique-gerrit-review.py
+++ b/bin/jenkins/critique-gerrit-review.py
@@ -36,6 +36,7 @@
 # TODO: generalise to other warnings
 # * clang-tidy

+from __future__ import print_function
 from argparse import ArgumentParser
 from collections import defaultdict
 import json
@@ -229,6 +230,6 @@ if __name__ == "__main__":
  comments = get_flake8_comments(revision)
  merge_comments(comments, get_misc_comments(revision))
  review_input = {"comments": comments}
-  print json.dumps(review_input, indent=True)
+  print(json.dumps(review_input, indent=True))
  if not args.dryrun:
    post_review_to_gerrit(review_input)
--- a/bin/jenkins/dockerized-impala-preserve-vars.py
+++ b/bin/jenkins/dockerized-impala-preserve-vars.py
@@ -28,6 +28,7 @@
 # If an environment variable is not defined in the current environment,
 # it is omitted with a warning.

+from __future__ import print_function
 import sys
 import os

--- a/bin/jenkins/populate_m2_directory.py
+++ b/bin/jenkins/populate_m2_directory.py
@@ -16,6 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import json
 import subprocess
 import os
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -20,6 +20,7 @@
 # This script is used to load the proper datasets for the specified workloads. It loads
 # all data via Hive except for parquet data which needs to be loaded via Impala.
 # Most ddl commands are executed by Impala.
+from __future__ import print_function
 import collections
 import getpass
 import logging
@@ -102,7 +103,7 @@ HIVE_CMD = os.path.join(os.environ['HIVE_HOME'], 'bin/beeline')
 hive_auth = "auth=none"
 if options.use_kerberos:
  if not options.principal:
-    print "--principal is required when --use_kerberos is specified"
+    print("--principal is required when --use_kerberos is specified")
    exit(1)
  hive_auth = "principal=" + options.principal

--- a/bin/parse-thrift-profile.py
+++ b/bin/parse-thrift-profile.py
@@ -39,6 +39,7 @@
 # 2018-04-13T15:06:34.144000 e44af7f93edb8cd6:1b1f801600000000 TRuntimeProfileTree(nodes=[TRuntimeProf...


+from __future__ import print_function
 from impala_py_lib import profiles
 import sys

@@ -47,9 +48,9 @@ if len(sys.argv) == 1 or sys.argv[1] == "-":
 elif len(sys.argv) == 2:
  input_data = file(sys.argv[1])
 else:
-  print >> sys.stderr, "Usage: %s [file]" % (sys.argv[0],)
+  print("Usage: %s [file]" % (sys.argv[0],), file=sys.stderr)
  sys.exit(1)

 for line in input_data:
  tree = profiles.decode_profile_line(line)
-  print tree
+  print(tree)
--- a/bin/push_to_asf.py
+++ b/bin/push_to_asf.py
@@ -35,6 +35,7 @@
 # TODO: Improve console output: replace 'print' with format strings
 #       and use sys.stderr/sys.stdout.

+from __future__ import print_function
 import logging
 import optparse
 import os
@@ -87,10 +88,10 @@ def confirm_prompt(prompt):
  if the user confirms.
  """
  while True:
-    print prompt, "[Y/n]:",
+    print(prompt, "[Y/n]:", end=' ')

    if not os.isatty(sys.stdout.fileno()):
-      print "Not running interactively. Assuming 'N'."
+      print("Not running interactively. Assuming 'N'.")
      return False

    r = raw_input().strip().lower()
@@ -115,14 +116,14 @@ def check_apache_remote():
        ['git', 'config', '--local', '--get',
            'remote.' + OPTIONS.apache_remote + '.url']).strip()
  except subprocess.CalledProcessError:
-    print >>sys.stderr, "No remote named " + OPTIONS.apache_remote + \
-        ". Please set one up, for example with: "
-    print >>sys.stderr, "  git remote add apache", APACHE_REPO
+    print("No remote named " + OPTIONS.apache_remote
+        + ". Please set one up, for example with: ", file=sys.stderr)
+    print("  git remote add apache", APACHE_REPO, file=sys.stderr)
    sys.exit(1)
  if url != APACHE_REPO:
-    print >>sys.stderr, "Unexpected URL for remote " + OPTIONS.apache_remote + "."
-    print >>sys.stderr, "  Got:     ", url
-    print >>sys.stderr, "  Expected:", APACHE_REPO
+    print("Unexpected URL for remote " + OPTIONS.apache_remote + ".", file=sys.stderr)
+    print("  Got:     ", url, file=sys.stderr)
+    print("  Expected:", APACHE_REPO, file=sys.stderr)
    sys.exit(1)


@@ -135,15 +136,15 @@ def check_gerrit_remote():
    url = check_output(['git', 'config', '--local', '--get',
                        'remote.' + OPTIONS.gerrit_remote + '.url']).strip()
  except subprocess.CalledProcessError:
-    print >>sys.stderr, "No remote named " + OPTIONS.gerrit_remote + \
-        ". Please set one up following "
-    print >>sys.stderr, "the contributor guide."
+    print("No remote named " + OPTIONS.gerrit_remote
+        + ". Please set one up following ", file=sys.stderr)
+    print("the contributor guide.", file=sys.stderr)
    sys.exit(1)

  if not GERRIT_URL_RE.match(url):
-    print >>sys.stderr, "Unexpected URL for remote " + OPTIONS.gerrit_remote
-    print >>sys.stderr, "  Got:     ", url
-    print >>sys.stderr, "  Expected to find host '%s' in the URL" % GERRIT_HOST
+    print("Unexpected URL for remote " + OPTIONS.gerrit_remote, file=sys.stderr)
+    print("  Got:     ", url, file=sys.stderr)
+    print("  Expected URL to match '%s'" % GERRIT_URL_RE, file=sys.stderr)
    sys.exit(1)


@@ -211,40 +212,40 @@ def do_update(branch, gerrit_sha, apache_sha):
  # must have gotten committed to Apache outside of gerrit, and we'd need some
  # manual intervention.
  if not is_fast_forward(apache_sha, gerrit_sha):
-    print >>sys.stderr, "Cannot update branch '%s' from gerrit:" % branch
-    print >>sys.stderr, "Apache revision %s is not an ancestor of gerrit revision %s" % (
-      apache_sha[:8], gerrit_sha[:8])
-    print >>sys.stderr,\
-        "Something must have been committed to Apache and bypassed gerrit."
-    print >>sys.stderr, "Manual intervention is required."
+    print("Cannot update branch '%s' from gerrit:" % branch, file=sys.stderr)
+    print("Apache revision %s is not an ancestor of gerrit revision %s" % (
+      apache_sha[:8], gerrit_sha[:8]), file=sys.stderr)
+    print("Something must have been committed to Apache and bypassed gerrit.",
+          file=sys.stderr)
+    print("Manual intervention is required.", file=sys.stderr)
    sys.exit(1)

  # List the commits that are going to be pushed to the ASF, so that the committer
  # can verify and "sign off".
  commits = rev_list("%s..%s" % (apache_sha, gerrit_sha))
  commits.reverse()  # Display from oldest to newest.
-  print "-" * 60
-  print Colors.GREEN + ("%d commit(s) need to be pushed from Gerrit to ASF:" %\
-      len(commits)) + Colors.RESET
+  print("-" * 60)
+  print(Colors.GREEN + ("%d commit(s) need to be pushed from Gerrit to ASF:" %
+      len(commits)) + Colors.RESET)
  push_sha = None
  for sha in commits:
    oneline = describe_commit(sha)
-    print "  ", oneline
+    print("  ", oneline)
    committer = get_committer_email(sha)
    if committer != get_my_email():
-      print Colors.RED + "   !!! Committed by someone else (%s) !!!" %\
-          committer, Colors.RESET
+      print(Colors.RED + "   !!! Committed by someone else (%s) !!!" %
+          committer, Colors.RESET)
      if not confirm_prompt(Colors.RED +\
          "   !!! Are you sure you want to push on behalf of another committer?" +\
          Colors.RESET):
        # Even if they don't want to push this commit, we could still push any
        # earlier commits that the user _did_ author.
        if push_sha is not None:
-          print "... will still update to prior commit %s..." % push_sha
+          print("... will still update to prior commit %s..." % push_sha)
        break
    push_sha = sha
  if push_sha is None:
-    print "Nothing to push"
+    print("Nothing to push")
    return

  # Everything has been confirmed. Do the actual push
@@ -252,11 +253,11 @@ def do_update(branch, gerrit_sha, apache_sha):
  if OPTIONS.dry_run:
    cmd.append('--dry-run')
  cmd.append('%s:refs/heads/%s' % (push_sha, branch))
-  print Colors.GREEN + "Running: " + Colors.RESET + " ".join(cmd)
+  print(Colors.GREEN + "Running: " + Colors.RESET + " ".join(cmd))
  subprocess.check_call(cmd)
-  print Colors.GREEN + "Successfully updated %s to %s" % (branch, gerrit_sha) +\
-      Colors.RESET
-  print
+  print(Colors.GREEN + "Successfully updated %s to %s" % (branch, gerrit_sha)
+      + Colors.RESET)
+  print()


 def main():
@@ -295,14 +296,14 @@ def main():
  apache_branches = get_branches(OPTIONS.apache_remote)
  for branch, apache_sha in sorted(apache_branches.iteritems()):
    gerrit_sha = rev_parse("remotes/" + OPTIONS.gerrit_remote + "/" + branch)
-    print "Branch '%s':\t" % branch,
+    print("Branch '%s':\t" % branch, end='')
    if gerrit_sha is None:
-      print Colors.YELLOW, "found on Apache but not in gerrit", Colors.RESET
+      print(Colors.YELLOW, "found on Apache but not in gerrit", Colors.RESET)
      continue
    if gerrit_sha == apache_sha:
-      print Colors.GREEN, "up to date", Colors.RESET
+      print(Colors.GREEN, "up to date", Colors.RESET)
      continue
-    print Colors.YELLOW, "needs update", Colors.RESET
+    print(Colors.YELLOW, "needs update", Colors.RESET)
    do_update(branch, gerrit_sha, apache_sha)


--- a/bin/run-workload.py
+++ b/bin/run-workload.py
@@ -27,6 +27,7 @@
 #   - Stores the execution details in JSON format.
 #

+from __future__ import print_function
 import getpass
 import json
 import logging
@@ -158,16 +159,16 @@ def prettytable_print(results, failed=False):
  table.float_format = '.2'
  # Group the results by table format.
  for table_format_str, gr in groupby(results, lambda x: x.query.table_format_str):
-    print "Table Format: %s" % table_format_str
+    print("Table Format: %s" % table_format_str)
    for result in gr:
      start_time = result.start_time.strftime("%Y-%m-%d %H:%M:%S") if result.start_time \
          is not None else '-'
      row = [result.query.name, start_time, result.time_taken, result.client_name]
      if failed: row.append(result.query_error)
      table.add_row(row)
-    print table.get_string(sortby='Client ID')
+    print(table.get_string(sortby='Client ID'))
    table.clear_rows()
-    print str()
+    print(str())

 def print_result_summary(results):
  """Print failed and successfull queries for a given result list"""
@@ -271,8 +272,8 @@ if __name__ == "__main__":
      if not all(result.success for result in workload_runner.results): exit_code = 1

      # Print the results
-      print "\nWorkload: {0}, Scale Factor: {1}\n".format(
-          workload_runner.workload.name.upper(), workload_runner.scale_factor)
+      print("\nWorkload: {0}, Scale Factor: {1}\n".format(
+          workload_runner.workload.name.upper(), workload_runner.scale_factor))
      print_result_summary(workload_runner.results)

  # Store the results
--- a/bin/single_node_perf_run.py
+++ b/bin/single_node_perf_run.py
@@ -69,6 +69,7 @@
 #   --start_minicluster   start a new Hadoop minicluster
 #   --ninja               use ninja, rather than Make, as the build tool

+from __future__ import print_function
 from optparse import OptionParser
 from tempfile import mkdtemp

@@ -208,7 +209,7 @@ def backup_workloads():
  temp_dir = mkdtemp()
  sh.cp(os.path.join(IMPALA_HOME, "testdata", "workloads"),
        temp_dir, R=True, _out=sys.stdout, _err=sys.stderr)
-  print "Backed up workloads to {0}".format(temp_dir)
+  print("Backed up workloads to {0}".format(temp_dir))
  return temp_dir


--- a/bin/validate-unified-backend-test-filters.py
+++ b/bin/validate-unified-backend-test-filters.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import os
 import subprocess
 import sys
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -25,6 +25,7 @@
 #
 # TODO Add support for SQL Error Codes
 #      https://msdn.microsoft.com/en-us/library/ms714687%28v=vs.85%29.aspx
+from __future__ import print_function
 error_codes = (
  ("OK", 0, ""),

--- a/common/thrift/generate_metrics.py
+++ b/common/thrift/generate_metrics.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import sys
 import os
 import re
@@ -129,13 +130,13 @@ def metric_to_mdl(m):
  """Returns the metric in the mdl format, or None if the metric isn't supported."""
  # TODO: Stamp out metrics with arguments, e.g. output each rpc call_duration metric.
  if '$0' in m['key']:
-    print >>sys.stderr, "Skipping metrics with unbound argument, key=%s" % m['key']
+    print("Skipping metrics with unbound argument, key=%s" % m['key'], file=sys.stderr)
    return None

  # TODO: Stamp out individual metrics for other metric types.
  SUPPORTED_METRIC_KINDS = ['COUNTER', 'GAUGE']
  if m['kind'] not in SUPPORTED_METRIC_KINDS:
-    print >>sys.stderr, "Skipping %s metric %s" % (m['kind'], m['key'])
+    print("Skipping %s metric %s" % (m['kind'], m['key']), file=sys.stderr)
    return None

  return dict(
--- a/infra/deploy/deploy.py
+++ b/infra/deploy/deploy.py
@@ -32,6 +32,7 @@
 #   Creates a new Impala_Kudu service called "new_service" using /data/impala/
 #   for its scratch directories.

+from __future__ import print_function
 import argparse
 import hashlib
 import os
@@ -134,11 +135,11 @@ def find_dependencies(args, cluster):
            if not found:
                raise Exception("Could not find dependency service (type %s, name %s)" %
                                (service_type, arg))
-            print "Found explicit dependency service %s" % (found.name)
+            print("Found explicit dependency service %s" % (found.name))
            deps.append(found)
        else:
            if not required:
-                print "Skipping optional dependency of type %s" % (service_type,)
+                print("Skipping optional dependency of type %s" % (service_type,))
                continue
            if len(candidates) > 1:
                raise Exception("Found %d possible implicit dependency services of type %s" %
@@ -148,7 +149,7 @@ def find_dependencies(args, cluster):
                                (service_type,))
            else:
                found = candidates.values()[0]
-                print "Found implicit dependency service %s" % (found.name,)
+                print("Found implicit dependency service %s" % (found.name,))
                deps.append(found)
    return deps

@@ -157,7 +158,7 @@ def check_new_service_does_not_exist(api, cluster, new_name):
        if service.displayName == new_name:
            raise Exception("New service name %s already in use" % (new_name,))

-    print "New service name %s is not in use" % (new_name,)
+    print("New service name %s is not in use" % (new_name,))

 def find_template_service(api, cluster, based_on):
    template_service = None
@@ -166,7 +167,7 @@ def find_template_service(api, cluster, based_on):
            if service.type != "IMPALA":
                raise Exception("Based-on service %s is of wrong type %s" %
                                (based_on, service.type))
-            print "Found based-on service: %s" % (based_on,)
+            print("Found based-on service: %s" % (based_on,))
            template_service = service

    if based_on and not template_service:
@@ -177,10 +178,10 @@ def find_template_service(api, cluster, based_on):
 def find_master_host(api, cm_hostname, master_hostname):
    for h in api.get_all_hosts():
        if master_hostname and h.hostname == master_hostname:
-            print "Found master host %s" % (master_hostname,)
+            print("Found master host %s" % (master_hostname,))
            return h
        elif not master_hostname and h.hostname == cm_hostname:
-            print "Found implicit master host on CM host %s" % (cm_hostname,)
+            print("Found implicit master host on CM host %s" % (cm_hostname,))
            return h

    if master_hostname:
@@ -225,11 +226,11 @@ def get_best_parcel(api, cluster):
        parcel = None

    if parcel:
-        print "Chose best parcel %s-%s (stage %s)" % (parcel.product,
+        print("Chose best parcel %s-%s (stage %s)" % (parcel.product,
                                                      parcel.version,
-                                                      parcel.stage)
+                                                      parcel.stage))
    else:
-        print "Found no candidate parcels"
+        print("Found no candidate parcels")

    return parcel

@@ -238,9 +239,9 @@ def ensure_parcel_repo_added(api):
    config = cm.get_config(view='summary')
    parcel_urls = config.get("REMOTE_PARCEL_REPO_URLS", "").split(",")
    if IMPALA_KUDU_PARCEL_URL in parcel_urls:
-        print "Impala_Kudu parcel URL already present"
+        print("Impala_Kudu parcel URL already present")
    else:
-        print "Adding Impala_Kudu parcel URL"
+        print("Adding Impala_Kudu parcel URL")
        parcel_urls.append(IMPALA_KUDU_PARCEL_URL)
        config["REMOTE_PARCEL_REPO_URLS"] = ",".join(parcel_urls)
        cm.update_config(config)
@@ -252,8 +253,8 @@ def wait_for_parcel_stage(cluster, parcel, stage):
            return
        if new_parcel.state.errors:
            raise Exception(str(new_parcel.state.errors))
-        print "progress: %s / %s" % (new_parcel.state.progress,
-                                     new_parcel.state.totalProgress)
+        print("progress: %s / %s" % (new_parcel.state.progress,
+                                     new_parcel.state.totalProgress))
        time.sleep(1)
    else:
        raise Exception("Parcel %s-%s did not reach stage %s in %d seconds" %
@@ -262,33 +263,33 @@ def wait_for_parcel_stage(cluster, parcel, stage):
 def ensure_parcel_activated(cluster, parcel):
    parcel_stage = parcel.stage
    if parcel_stage == "AVAILABLE_REMOTELY":
-        print "Downloading parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Downloading parcel: %s-%s " % (parcel.product, parcel.version))
        parcel.start_download()
        wait_for_parcel_stage(cluster, parcel, "DOWNLOADED")
-        print "Downloaded parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Downloaded parcel: %s-%s " % (parcel.product, parcel.version))
        parcel_stage = "DOWNLOADED"
    if parcel_stage == "DOWNLOADED":
-        print "Distributing parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Distributing parcel: %s-%s " % (parcel.product, parcel.version))
        parcel.start_distribution()
        wait_for_parcel_stage(cluster, parcel, "DISTRIBUTED")
-        print "Distributed parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Distributed parcel: %s-%s " % (parcel.product, parcel.version))
        parcel_stage = "DISTRIBUTED"
    if parcel_stage == "DISTRIBUTED":
-        print "Activating parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Activating parcel: %s-%s " % (parcel.product, parcel.version))
        parcel.activate()
        wait_for_parcel_stage(cluster, parcel, "ACTIVATED")
-        print "Activated parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Activated parcel: %s-%s " % (parcel.product, parcel.version))
        parcel_stage = "ACTIVATED"

-    print "Parcel %s-%s is activated" % (parcel.product, parcel.version)
+    print("Parcel %s-%s is activated" % (parcel.product, parcel.version))

 def print_configs(entity_name, config_dict):
    for attr, value in config_dict.iteritems():
-        print "Set %s config %s=\'%s\'" % (entity_name, attr, value)
+        print("Set %s config %s=\'%s\'" % (entity_name, attr, value))

 def create_new_service(api, cluster, new_name, deps, scratch_dirs, master_host):
    new_service = cluster.create_service(new_name, "IMPALA")
-    print "Created new service %s" % (new_name,)
+    print("Created new service %s" % (new_name,))

    service_config = {}
    for d in deps:
@@ -312,13 +313,13 @@ def create_new_service(api, cluster, new_name, deps, scratch_dirs, master_host):
                md5.update(h.hostId)
                new_role_name = "%s-%s-%s" % (new_name, rcg.roleType, md5.hexdigest())
                new_service.create_role(new_role_name, rcg.roleType, h.hostId)
-                print "Created new role %s" % (new_role_name,)
+                print("Created new role %s" % (new_role_name,))
        else:
            md5 = hashlib.md5()
            md5.update(master_host.hostId)
            new_role_name = "%s-%s-%s" % (new_name, rcg.roleType, md5.hexdigest())
            new_service.create_role(new_role_name, rcg.roleType, master_host.hostId)
-            print "Created new role %s" % (new_role_name,)
+            print("Created new role %s" % (new_role_name,))

 def transform_path(rcg_name, rcg_config_dict, rcg_config_name):
    # TODO: Do a better job with paths where the role type is embedded.
@@ -379,7 +380,7 @@ def transform_rcg_config(rcg):

 def clone_existing_service(cluster, new_name, template_service):
    new_service = cluster.create_service(new_name, "IMPALA")
-    print "Created new service %s" % (new_name,)
+    print("Created new service %s" % (new_name,))

    service_config, _ = template_service.get_config()
    service_config["impala_service_env_safety_valve"] = "IMPALA_KUDU=1"
@@ -395,7 +396,7 @@ def clone_existing_service(cluster, new_name, template_service):
            new_rcg = new_service.create_role_config_group(new_rcg_name,
                                                           new_rcg_name,
                                                           old_rcg.roleType)
-            print "Created new rcg %s" % (new_rcg_name,)
+            print("Created new rcg %s" % (new_rcg_name,))
        else:
            new_rcg = new_service.get_role_config_group("%s-%s-BASE" % (new_name,
                                                                        old_rcg.roleType))
@@ -414,7 +415,7 @@ def clone_existing_service(cluster, new_name, template_service):
            new_role = new_service.create_role(new_role_name,
                                               new_rcg.roleType,
                                               old_role.hostRef.hostId)
-            print "Created new role %s" % (new_role_name,)
+            print("Created new role %s" % (new_role_name,))
            new_role_names.append(new_role.name)
        new_rcg.move_roles(new_role_names)

@@ -448,7 +449,8 @@ def main():
            parcel = get_best_parcel(api, cluster)
            if parcel:
                break
-            print "Could not find parcel in attempt %d, will sleep and retry" % (attempt,)
+            print("Could not find parcel in attempt %d, will sleep and retry" %
+                  (attempt,))
            time.sleep(1)
        else:
            raise Exception("No parcel showed up in %d seconds" % (MAX_PARCEL_REPO_WAIT_SECS,))
--- a/lib/python/impala_py_lib/gdb/impala-gdb.py
+++ b/lib/python/impala_py_lib/gdb/impala-gdb.py
@@ -19,6 +19,7 @@
 # A collection of useful Python GDB modules and commands for
 # debugging Impala core dumps.
 #
+from __future__ import print_function
 import gdb
 from collections import defaultdict

--- a/lib/python/impala_py_lib/jenkins/generate_junitxml.py
+++ b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
@@ -21,6 +21,7 @@ A script for generating arbitrary junit XML reports while building Impala.
 These files will be consumed by jenkins.impala.io to generate reports for
 easier triaging of build and setup errors.
 """
+from __future__ import print_function
 import argparse
 import codecs
 import errno
--- a/lib/python/impala_py_lib/profiles.py
+++ b/lib/python/impala_py_lib/profiles.py
@@ -18,6 +18,7 @@

 # This file contains library functions to decode and access Impala query profiles.

+from __future__ import print_function
 import base64
 import datetime
 import zlib
@@ -30,7 +31,7 @@ def decode_profile_line(line):
  space_separated = line.split(" ")
  if len(space_separated) == 3:
    ts = int(space_separated[0])
-    print datetime.datetime.fromtimestamp(ts / 1000.0).isoformat(), space_separated[1]
+    print(datetime.datetime.fromtimestamp(ts / 1000.0).isoformat(), space_separated[1])
    base64_encoded = space_separated[2]
  elif len(space_separated) == 1:
    base64_encoded = space_separated[0]
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -94,6 +94,7 @@
 # This should be used sparingly, because these commands are executed
 # serially.
 #
+from __future__ import print_function
 import collections
 import csv
 import glob
@@ -140,7 +141,7 @@ parser.add_option("--hdfs_namenode", dest="hdfs_namenode", default="localhost:20
 (options, args) = parser.parse_args()

 if options.workload is None:
-  print "A workload name must be specified."
+  print("A workload name must be specified.")
  parser.print_help()
  sys.exit(1)

@@ -604,7 +605,7 @@ def eval_section(section_str):
  # features (e.g. "for i in {1..n}")
  p = subprocess.Popen(['/bin/bash', '-c', cmd], stdout=subprocess.PIPE)
  stdout, stderr = p.communicate()
-  if stderr: print stderr
+  if stderr: print(stderr)
  assert p.returncode == 0
  return stdout.strip()

@@ -638,30 +639,31 @@ def generate_statements(output_name, test_vectors, sections,
      table_name = section['BASE_TABLE_NAME'].strip()

      if table_names and (table_name.lower() not in table_names):
-        print 'Skipping table: %s.%s, table is not in specified table list' % (db, table_name)
+        print('Skipping table: %s.%s, table is not in specified table list' %
+              (db, table_name))
        continue

      # Check Hive version requirement, if present.
      if section['HIVE_MAJOR_VERSION'] and \
         section['HIVE_MAJOR_VERSION'].strip() != \
         os.environ['IMPALA_HIVE_MAJOR_VERSION'].strip():
-        print "Skipping table '{0}.{1}': wrong Hive major version".format(db, table_name)
+        print("Skipping table '{0}.{1}': wrong Hive major version".format(db, table_name))
        continue

      if table_format in schema_only_constraints and \
         table_name.lower() not in schema_only_constraints[table_format]:
-        print ('Skipping table: %s.%s, \'only\' constraint for format did not '
-              'include this table.') % (db, table_name)
+        print(('Skipping table: %s.%s, \'only\' constraint for format did not '
+              'include this table.') % (db, table_name))
        continue

      if schema_include_constraints[table_name.lower()] and \
         table_format not in schema_include_constraints[table_name.lower()]:
-        print 'Skipping \'%s.%s\' due to include constraint match.' % (db, table_name)
+        print('Skipping \'%s.%s\' due to include constraint match.' % (db, table_name))
        continue

      if schema_exclude_constraints[table_name.lower()] and\
         table_format in schema_exclude_constraints[table_name.lower()]:
-        print 'Skipping \'%s.%s\' due to exclude constraint match.' % (db, table_name)
+        print('Skipping \'%s.%s\' due to exclude constraint match.' % (db, table_name))
        continue

      alter = section.get('ALTER')
@@ -752,7 +754,7 @@ def generate_statements(output_name, test_vectors, sections,
      # TODO: Currently, Kudu does not support partitioned tables via Impala.
      # If a CREATE_KUDU section was provided, assume it handles the partition columns
      if file_format == 'kudu' and partition_columns != '' and not create_kudu:
-        print "Ignore partitions on Kudu table: %s.%s" % (db_name, table_name)
+        print("Ignore partitions on Kudu table: %s.%s" % (db_name, table_name))
        continue

      # If a CREATE section is provided, use that. Otherwise a COLUMNS section
@@ -817,15 +819,15 @@ def generate_statements(output_name, test_vectors, sections,
      # and skip loading the data. Otherwise, the data is generated using either an
      # INSERT INTO statement or a LOAD statement.
      if not force_reload and hdfs_location in existing_tables:
-        print 'HDFS path:', data_path, 'contains data. Data loading can be skipped.'
+        print('HDFS path:', data_path, 'contains data. Data loading can be skipped.')
      else:
-        print 'HDFS path:', data_path, 'does not exist or is empty. Data will be loaded.'
+        print('HDFS path:', data_path, 'does not exist or is empty. Data will be loaded.')
        if not db_suffix:
          if load:
            hive_output.load_base.append(build_load_statement(load, db_name,
                                                              db_suffix, table_name))
          else:
-            print 'Empty base table load for %s. Skipping load generation' % table_name
+            print('Empty base table load for %s. Skipping load generation' % table_name)
        elif file_format in ['kudu', 'parquet', 'iceberg']:
          if insert_hive:
            hive_output.load.append(build_insert(insert_hive, db_name, db_suffix,
@@ -835,8 +837,8 @@ def generate_statements(output_name, test_vectors, sections,
                file_format, codec, compression_type, table_name, data_path,
                for_impala=True))
          else:
-            print 'Empty parquet/kudu load for table %s. Skipping insert generation' \
-              % table_name
+            print('Empty parquet/kudu load for table %s. Skipping insert generation'
+              % table_name)
        else:
          if insert_hive:
            insert = insert_hive
@@ -844,7 +846,7 @@ def generate_statements(output_name, test_vectors, sections,
            hive_output.load.append(build_insert(insert, db_name, db_suffix, file_format,
                codec, compression_type, table_name, data_path, create_hive=create_hive))
          else:
-            print 'Empty insert for table %s. Skipping insert generation' % table_name
+            print('Empty insert for table %s. Skipping insert generation' % table_name)

    impala_create.write_to_file("create-%s-impala-generated-%s-%s-%s.sql" %
        (output_name, file_format, codec, compression_type))
@@ -879,8 +881,8 @@ def parse_schema_template_file(file_name):
 if __name__ == "__main__":
  if options.table_formats is None:
    if options.exploration_strategy not in KNOWN_EXPLORATION_STRATEGIES:
-      print 'Invalid exploration strategy:', options.exploration_strategy
-      print 'Valid values:', ', '.join(KNOWN_EXPLORATION_STRATEGIES)
+      print('Invalid exploration strategy:', options.exploration_strategy)
+      print('Valid values:', ', '.join(KNOWN_EXPLORATION_STRATEGIES))
      sys.exit(1)

    test_vectors = [vector.value for vector in\
@@ -896,7 +898,7 @@ if __name__ == "__main__":
  convert_orc_to_full_acid = options.workload == 'functional-query'

  target_dataset = test_vectors[0].dataset
-  print 'Target Dataset: ' + target_dataset
+  print('Target Dataset: ' + target_dataset)
  dataset_load_dir = os.path.join(SQL_OUTPUT_DIR, target_dataset)
  # If the directory containing the sql files does not exist, create it. Else nuke all the
  # files corresponding to the current workload.
@@ -918,7 +920,7 @@ if __name__ == "__main__":
                                      '%s_schema_template.sql' % target_dataset)

  if not os.path.isfile(schema_template_file):
-    print 'Schema file not found: ' + schema_template_file
+    print('Schema file not found: ' + schema_template_file)
    sys.exit(1)

  constraints_file = os.path.join(DATASET_DIR, target_dataset, 'schema_constraints.csv')
--- a/testdata/bin/generate-test-vectors.py
+++ b/testdata/bin/generate-test-vectors.py
@@ -40,6 +40,7 @@
 # The pairwise generation is done using the Python 'AllPairs' module. This module can be
 # downloaded from http://pypi.python.org/pypi/AllPairs/2.0.1
 #
+from __future__ import print_function
 import collections
 import csv
 import math
@@ -56,7 +57,7 @@ parser.add_option("-w", "--workload", dest="workload",
 (options, args) = parser.parse_args()

 if options.workload is None:
-  print "A workload name must be specified."
+  print("A workload name must be specified.")
  parser.print_help()
  sys.exit(1)

@@ -115,12 +116,12 @@ def read_dimension_file(file_name):
         continue
      values = line.split(':')
      if len(values) != 2:
-        print 'Invalid dimension file format. Expected format is <dimension name>: val1,'\
-              ' val2, ... Found: ' + line
+        print('Invalid dimension file format. Expected format is <dimension name>: val1,'
+              ' val2, ... Found: ' + line)
        sys.exit(1)
      if not values[0] in KNOWN_DIMENSION_NAMES:
-        print 'Unknown dimension name: ' + values[0]
-        print 'Valid dimension names: ' + ', '.join(KNOWN_DIMENSION_NAMES)
+        print('Unknown dimension name: ' + values[0])
+        print('Valid dimension names: ' + ', '.join(KNOWN_DIMENSION_NAMES))
        sys.exit(1)
      dimension_map[values[0]] = [val.strip() for val in values[1].split(',')]
  return dimension_map
@@ -132,7 +133,7 @@ def write_vectors_to_csv(output_dir, output_file, matrix):
    output_text += '\n' + ', '.join(row)

  output_path = os.path.join(output_dir, output_file)
-  print 'Writing test vectors to: ' + output_path
+  print('Writing test vectors to: ' + output_path)
  with open(output_path, 'wb') as output_file:
    output_file.write(output_text)
    output_file.write('\n')
@@ -140,10 +141,10 @@ def write_vectors_to_csv(output_dir, output_file, matrix):
 dimension_file = os.path.join(WORKLOAD_DIR, options.workload,
                              '%s_dimensions.csv' % options.workload)
 if not os.path.isfile(dimension_file):
-  print 'Dimension file not found: ' + dimension_file
+  print('Dimension file not found: ' + dimension_file)
  sys.exit(1)

-print 'Reading dimension file: ' + dimension_file
+print('Reading dimension file: ' + dimension_file)
 vector_map = read_dimension_file(dimension_file)
 vectors = []

--- a/testdata/bin/load-tpc-kudu.py
+++ b/testdata/bin/load-tpc-kudu.py
@@ -22,6 +22,7 @@
 # Kudu tables are created in the specified 'target-db' using the existing HDFS tables
 # from 'source-db'.

+from __future__ import print_function
 import logging
 import os
 import sqlparse
@@ -72,7 +73,7 @@ def load_data():
      query = sqlparse.format(query.rstrip(';'), strip_comments=True)
      query_str = query.format(**sql_params)
      if (len(query_str)) == 0: continue
-      if verbose: print query_str
+      if verbose: print(query_str)
      impala.execute(query_str)

 def get_test_file_path(workload):
--- a/testdata/bin/rewrite-iceberg-metadata.py
+++ b/testdata/bin/rewrite-iceberg-metadata.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import glob
 import json
 import os
--- a/testdata/bin/wait-for-hiveserver2.py
+++ b/testdata/bin/wait-for-hiveserver2.py
@@ -22,6 +22,7 @@
 # TODO: Consider combining this with wait-for-metastore.py. A TCLIService client
 # can perhaps also talk to the metastore.

+from __future__ import print_function
 import os
 import time
 import getpass
@@ -71,20 +72,20 @@ while time.time() - now < TIMEOUT_SECONDS:
      close_session_req = TCLIService.TCloseSessionReq()
      close_session_req.sessionHandle = resp.sessionHandle
      hs2_client.CloseSession(close_session_req)
-      print "HiveServer2 service is up at %s." % options.hs2_hostport
+      print("HiveServer2 service is up at %s." % options.hs2_hostport)
      exit(0)
  except Exception as e:
    if "SASL" in e.message:  # Bail out on SASL failures
-      print "SASL failure when attempting connection:"
+      print("SASL failure when attempting connection:")
      raise
    if "GSS" in e.message:   # Other GSSAPI failures
-      print "GSS failure when attempting connection:"
+      print("GSS failure when attempting connection:")
      raise
-    print "Waiting for HiveServer2 at %s..." % options.hs2_hostport
-    print e
+    print("Waiting for HiveServer2 at %s..." % options.hs2_hostport)
+    print(e)
  finally:
    hs2_transport.close()
    time.sleep(0.5)

-print "HiveServer2 service failed to start within %s seconds." % TIMEOUT_SECONDS
+print("HiveServer2 service failed to start within %s seconds." % TIMEOUT_SECONDS)
 exit(1)
--- a/testdata/bin/wait-for-metastore.py
+++ b/testdata/bin/wait-for-metastore.py
@@ -21,6 +21,7 @@
 # to execute the get_database("default") Thrift RPC until the call succeeds,
 # or a timeout is reached.

+from __future__ import print_function
 import os
 import time
 from optparse import OptionParser
@@ -56,19 +57,19 @@ while time.time() - now < TIMEOUT_SECONDS:
    hive_transport.open()
    resp = hive_client.get_database("default")
    if resp is not None:
-      print "Metastore service is up at %s." % options.metastore_hostport
+      print("Metastore service is up at %s." % options.metastore_hostport)
      exit(0)
  except Exception as e:
    if "SASL" in e.message:  # Bail out on SASL failures
-      print "SASL failure when attempting connection:"
+      print("SASL failure when attempting connection:")
      raise
    if "GSS" in e.message:   # Other GSSAPI failures
-      print "GSS failure when attempting connection:"
+      print("GSS failure when attempting connection:")
      raise
-    print "Waiting for the Metastore at %s..." % options.metastore_hostport
+    print("Waiting for the Metastore at %s..." % options.metastore_hostport)
  finally:
    hive_transport.close()
    time.sleep(0.5)

-print "Metastore service failed to start within %s seconds." % TIMEOUT_SECONDS
+print("Metastore service failed to start within %s seconds." % TIMEOUT_SECONDS)
 exit(1)
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import os
 import sys

@@ -39,7 +40,7 @@ def _get_yarn_nm_ram_mb():
  # - leave at least 20G for other services
  # - don't need more than 48G
  ret = min(max(available_ram_gb * 1024 - 20 * 1024, 4096), 48 * 1024)
-  print >>sys.stderr, "Configuring Yarn NM to use {0}MB RAM".format(ret)
+  print("Configuring Yarn NM to use {0}MB RAM".format(ret), file=sys.stderr)
  return ret


--- a/testdata/common/cgroups.py
+++ b/testdata/common/cgroups.py
@@ -19,6 +19,7 @@

 # Utility code for creating cgroups for the Impala development environment.
 # May be used as a library or as a command-line utility for manual testing.
+from __future__ import print_function
 import os
 import sys
 import errno
@@ -81,7 +82,7 @@ def create_impala_cgroup_path(instance_num):

 if __name__ == "__main__":
  if options.cluster_size < 0:
-    print 'Please specify a cluster size >= 0'
+    print('Please specify a cluster size >= 0')
    sys.exit(1)
  for i in range(options.cluster_size):
    create_impala_cgroup_path(i)
--- a/testdata/common/text_delims_table.py
+++ b/testdata/common/text_delims_table.py
@@ -22,6 +22,7 @@
 # command line, will generate data files in the specified directory and a
 # print a SQL load statement to incorporate into dataload SQL script generation.

+from __future__ import print_function
 from shutil import rmtree
 from optparse import OptionParser
 from contextlib import contextmanager
--- a/testdata/common/widetable.py
+++ b/testdata/common/widetable.py
@@ -22,6 +22,7 @@
 # generate a CSV data file and prints a SQL load statement to incorporate
 # into dataload SQL script generation.

+from __future__ import print_function
 from datetime import datetime, timedelta
 import itertools
 import optparse
@@ -119,7 +120,7 @@ if __name__ == "__main__":

  if options.get_columns:
    # Output column descriptors
-    print '\n'.join(get_columns(options.num_columns))
+    print('\n'.join(get_columns(options.num_columns)))

  if options.create_data:
    # Generate data locally, and output the SQL load command for use in dataload
--- a/tests/benchmark/report_benchmark_results.py
+++ b/tests/benchmark/report_benchmark_results.py
@@ -29,6 +29,7 @@
 # if necessary (2.5).

 from __future__ import division
+from __future__ import print_function
 import difflib
 import json
 import logging
@@ -1150,5 +1151,5 @@ if __name__ == "__main__":
    if ref_grouped:
      ref_impala_version = get_impala_version(ref_grouped)

-  print build_summary_header(current_impala_version, ref_impala_version)
-  print report
+  print(build_summary_header(current_impala_version, ref_impala_version))
+  print(report)
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -17,6 +17,7 @@
 #
 # Basic object model of an Impala cluster (set of Impala processes).

+from __future__ import print_function
 import json
 import logging
 import os
@@ -158,7 +159,7 @@ class ImpalaCluster(object):
        result = client.execute("select 1")
        assert result.success
        ++n
-      except Exception as e: print e
+      except Exception as e: print(e)
      finally:
        client.close()
    return n
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -17,6 +17,7 @@
 #
 # The base class that should be used for almost all Impala tests

+from __future__ import print_function
 import glob
 import grp
 import json
@@ -1202,7 +1203,7 @@ class ImpalaTestSuite(BaseTestSuite):
                            db_name=db_name, table_name=table_name))
        return
      except Exception as ex:
-        print str(ex)
+        print(str(ex))
        time.sleep(0.2)
        continue
    raise Exception("Table {0}.{1} didn't show up after {2}s", db_name, table_name,
--- a/tests/common/resource_pool_config.py
+++ b/tests/common/resource_pool_config.py
@@ -20,6 +20,7 @@
 # the tests it is used for. However, it is generic enough that it can be extended if
 # more functionality is required for adding tests.

+from __future__ import print_function
 import os
 from time import sleep, time
 import xml.etree.ElementTree as ET
@@ -98,7 +99,7 @@ class ResourcePoolConfig(object):
        if pool_name == name.split('.')[-1] and pool_attribute in name:
          return property
      except Exception as e:
-        print "Current DOM element being inspected: \n{0}".format(ET.dump(property))
+        print("Current DOM element being inspected: \n{0}".format(ET.dump(property)))
        raise e
    assert False, "{0} attribute not found for pool {1} in the config XML:\n{2}".format(
      pool_attribute, pool_name, ET.dump(xml_root))
--- a/tests/comparison/cluster.py
+++ b/tests/comparison/cluster.py
@@ -16,6 +16,7 @@
 # under the License.

 """This module provides utilities for interacting with a cluster."""
+from __future__ import print_function

 # This should be moved into the test/util folder eventually. The problem is this
 # module depends on db_connection which use some query generator classes.
--- a/tests/comparison/data_generator_mapper.py
+++ b/tests/comparison/data_generator_mapper.py
@@ -20,6 +20,7 @@
 '''This is a mapper for use with hadoop streaming. See data_generator.DatabasePopulator
   for more information on how this file is used.
 '''
+from __future__ import print_function

 import os
 import random
--- a/tests/comparison/db_connection.py
+++ b/tests/comparison/db_connection.py
@@ -21,6 +21,7 @@
   connection.

 '''
+from __future__ import print_function
 import hashlib
 import impala.dbapi
 import re
--- a/tests/comparison/discrepancy_searcher.py
+++ b/tests/comparison/discrepancy_searcher.py
@@ -22,6 +22,7 @@
   results.

 '''
+from __future__ import print_function
 # TODO: IMPALA-4600: refactor this module

 from copy import deepcopy
--- a/tests/comparison/query_generator.py
+++ b/tests/comparison/query_generator.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 from collections import defaultdict
 from copy import deepcopy
 from itertools import ifilter
--- a/tests/comparison/util/verify-oracle-connection.py
+++ b/tests/comparison/util/verify-oracle-connection.py
@@ -31,6 +31,7 @@

 # Importing the whole module instead of doing selective import seems to help find linker
 # errors.
+from __future__ import print_function
 import cx_Oracle

 # Host on which Oracle Database lies.
@@ -56,7 +57,7 @@ def main():
    finally:
      cursor.close()
    assert rows == EXPECTED_RESULT
-  print 'success'
+  print('success')


 if '__main__' == __name__:
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -17,6 +17,7 @@

 # py.test configuration module
 #
+from __future__ import print_function
 from impala.dbapi import connect as impala_connect
 from kudu import connect as kudu_connect
 from random import choice, sample
--- a/tests/custom_cluster/test_blacklist.py
+++ b/tests/custom_cluster/test_blacklist.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite

 import pytest
@@ -226,7 +227,7 @@ class TestBlacklistFaultyDisk(CustomClusterTestSuite):
      dir_path = tempfile.mkdtemp()
      self.created_dirs.append(dir_path)
      result.append(dir_path)
-      print "Generated dir" + dir_path
+      print("Generated dir" + dir_path)
    return result

  def setup_method(self, method):
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import glob
 import os
 import psutil
@@ -89,7 +90,7 @@ class TestBreakpadBase(CustomClusterTestSuite):
        # For every process in the list we might see the original Impala process plus a
        # forked off child that is writing the minidump. We need to catch both.
        for pid in process.get_pids():
-          print "Checking pid %s" % pid
+          print("Checking pid %s" % pid)
          psutil_process = psutil.Process(pid)
          psutil_process.wait(timeout)
      except psutil.NoSuchProcess:
--- a/tests/custom_cluster/test_catalog_hms_failures.py
+++ b/tests/custom_cluster/test_catalog_hms_failures.py
@@ -14,6 +14,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import pytest
 import os
 import time
@@ -75,7 +76,7 @@ class TestHiveMetaStoreFailure(CustomClusterTestSuite):
    try:
      self.client.execute("describe %s" % tbl_name)
    except ImpalaBeeswaxException as e:
-      print str(e)
+      print(str(e))
      assert "Failed to load metadata for table: %s. Running 'invalidate metadata %s' "\
          "may resolve this problem." % (tbl_name, tbl_name) in str(e)
    self.run_hive_server()
--- a/tests/custom_cluster/test_client_ssl.py
+++ b/tests/custom_cluster/test_client_ssl.py
@@ -16,6 +16,7 @@
 # under the License.
 #

+from __future__ import print_function
 import json
 import logging
 import os
@@ -128,7 +129,7 @@ class TestClientSsl(CustomClusterTestSuite):
    p.send_cmd("profile")
    result = p.get_result()

-    print result.stderr
+    print(result.stderr)
    assert "Query Status: Cancelled" in result.stdout
    assert impalad.wait_for_num_in_flight_queries(0)

--- a/tests/custom_cluster/test_events_custom_configs.py
+++ b/tests/custom_cluster/test_events_custom_configs.py
@@ -14,6 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from __future__ import print_function
 import logging
 import pytest

--- a/tests/custom_cluster/test_local_catalog.py
+++ b/tests/custom_cluster/test_local_catalog.py
@@ -17,6 +17,7 @@

 # Test behaviors specific to --use_local_catalog being enabled.

+from __future__ import print_function
 import pytest
 import Queue
 import random
@@ -272,7 +273,7 @@ class TestLocalCatalogRetries(CustomClusterTestSuite):
          q = random.choice(queries)
          attempt += 1
          try:
-            print 'Attempt', attempt, 'client', str(client)
+            print('Attempt', attempt, 'client', str(client))
            ret = self.execute_query_unchecked(client, q)
          except Exception as e:
            if 'InconsistentMetadataFetchException' in str(e):
--- a/tests/custom_cluster/test_query_retries.py
+++ b/tests/custom_cluster/test_query_retries.py
@@ -21,6 +21,7 @@
 # TODO: Re-factor tests into multiple classes.
 # TODO: Add a test that cancels queries while a retry is running

+from __future__ import print_function
 import pytest
 import re
 import shutil
@@ -1183,7 +1184,7 @@ class TestQueryRetriesFaultyDisk(CustomClusterTestSuite):
      dir_path = tempfile.mkdtemp()
      self.created_dirs.append(dir_path)
      result.append(dir_path)
-      print "Generated dir" + dir_path
+      print("Generated dir" + dir_path)
    return result

  def __validate_web_ui_state(self):
--- a/tests/custom_cluster/test_restart_services.py
+++ b/tests/custom_cluster/test_restart_services.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import logging
 import os
 import pytest
@@ -515,7 +516,7 @@ class TestGracefulShutdown(CustomClusterTestSuite, HS2TestSuite):
  def test_shutdown_executor_with_delay(self):
    """Regression test for IMPALA-7931 that adds delays to status reporting and
    to fetching of results to trigger races that previously resulted in query failures."""
-    print self.exploration_strategy
+    print(self.exploration_strategy)
    if self.exploration_strategy() != 'exhaustive':
      pytest.skip()
    self.do_test_shutdown_executor(fetch_delay_s=5)
--- a/tests/custom_cluster/test_scratch_disk.py
+++ b/tests/custom_cluster/test_scratch_disk.py
@@ -17,6 +17,7 @@
 #
 # Tests for query expiration.

+from __future__ import print_function
 import os
 import pytest
 import re
@@ -82,7 +83,7 @@ class TestScratchDir(CustomClusterTestSuite):
      if not non_existing:
        self.created_dirs.append(dir_path)
      result.append(dir_path)
-      print "Generated dir" + dir_path
+      print("Generated dir" + dir_path)
    return result

  def setup_method(self, method):
--- a/tests/custom_cluster/test_topic_update_frequency.py
+++ b/tests/custom_cluster/test_topic_update_frequency.py
@@ -9,6 +9,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import print_function
 from multiprocessing.pool import ThreadPool

 import pytest
--- a/tests/custom_cluster/test_udf_concurrency.py
+++ b/tests/custom_cluster/test_udf_concurrency.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import os
 import pytest
 import random
@@ -82,7 +83,7 @@ class TestUdfConcurrency(CustomClusterTestSuite):
    try:
      setup_client.execute(setup_query)
    except Exception as e:
-      print "Unable to create initial function: {0}".format(setup_query)
+      print("Unable to create initial function: {0}".format(setup_query))
      raise

    errors = []
@@ -126,7 +127,7 @@ class TestUdfConcurrency(CustomClusterTestSuite):
    # join all threads.
    for t in runner_threads: t.join()

-    for e in errors: print e
+    for e in errors: print(e)

    # Checks that no impalad has crashed.
    assert cluster.num_responsive_coordinators() == exp_num_coordinators
@@ -167,13 +168,13 @@ class TestUdfConcurrency(CustomClusterTestSuite):
      s = create_fn_to_use.format(unique_database, udf_tgt_path)
      setup_client.execute(s)
    except Exception as e:
-      print e
+      print(e)
      assert False
    for i in range(0, num_drops):
      try:
        setup_client.execute(create_fn_to_drop.format(unique_database, i, udf_tgt_path))
      except Exception as e:
-        print e
+        print(e)
        assert False

    errors = []
@@ -208,5 +209,5 @@ class TestUdfConcurrency(CustomClusterTestSuite):
    for t in runner_threads: t.join()

    # Check for any errors.
-    for e in errors: print e
+    for e in errors: print(e)
    assert len(errors) == 0
--- a/tests/custom_cluster/test_web_pages.py
+++ b/tests/custom_cluster/test_web_pages.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import json
 import random
 import re
--- a/tests/metadata/test_hms_integration.py
+++ b/tests/metadata/test_hms_integration.py
@@ -23,7 +23,7 @@
 # TODO: For each test, verify all the metadata available via Hive and
 # Impala, in all the possible ways of validating that metadata.

-
+from __future__ import print_function
 import pytest
 import random
 import string
@@ -735,7 +735,7 @@ class TestHmsIntegration(ImpalaTestSuite):
                            '(x int, y int) stored as parquet') as table_name:
        self.client.execute('invalidate metadata')
        self.client.execute('invalidate metadata %s' % table_name)
-        print self.impala_table_stats(table_name)
+        print(self.impala_table_stats(table_name))
        assert 'PARQUET' == self.impala_table_stats(table_name)[()]['format']
        self.run_stmt_in_hive(
            'alter table %s set fileformat avro' % table_name)
--- a/tests/query_test/test_aggregation.py
+++ b/tests/query_test/test_aggregation.py
@@ -17,6 +17,7 @@

 # Validates all aggregate functions across all datatypes
 #
+from __future__ import print_function
 import pytest

 from testdata.common import widetable
@@ -150,7 +151,8 @@ class TestAggregation(ImpalaTestSuite):
      # NDV is inherently approximate. Compare with some tolerance.
      err = abs(result_lut[key] - int(actual_string))
      rel_err =  err / float(result_lut[key])
-      print key, result_lut[key], actual_string,abs(result_lut[key] - int(actual_string))
+      print(key, result_lut[key], actual_string,
+            abs(result_lut[key] - int(actual_string)))
      assert err <= 1 or rel_err < 0.05
    elif data_type in ('float', 'double') and agg_func != 'count':
      # Compare with a margin of error.
--- a/tests/query_test/test_compressed_formats.py
+++ b/tests/query_test/test_compressed_formats.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import math
 import os
 import pytest
@@ -96,7 +97,7 @@ class TestCompressedFormatsBase(ImpalaTestSuite):
      assert result and int(result) > 0
    except Exception as e:
      error_msg = str(e)
-      print error_msg
+      print(error_msg)
      if expected_error is None or expected_error not in error_msg:
        print("Unexpected error:\n{0}".format(error_msg))
        raise
--- a/tests/query_test/test_hdfs_caching.py
+++ b/tests/query_test/test_hdfs_caching.py
@@ -17,6 +17,7 @@

 # Validates limit on scan nodes

+from __future__ import print_function
 import pytest
 import re
 import time
@@ -87,7 +88,7 @@ class TestHdfsCaching(ImpalaTestSuite):
    elif num_metrics_increased != 1:
      # Test failed, print the metrics
      for i in range(0, len(cached_bytes_before)):
-        print "%d %d" % (cached_bytes_before[i], cached_bytes_after[i])
+        print("%d %d" % (cached_bytes_before[i], cached_bytes_after[i]))
      assert(False)

  def test_cache_cancellation(self, vector):
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 from kudu.schema import (
    BOOL,
    DOUBLE,
@@ -481,7 +482,7 @@ class TestKuduOperations(KuduTestSuite):
                date(2010, 1, 1), '')]
        i += 1
    cursor.execute("select count(*) from %s" % table_name)
-    print cursor.fetchall() == [(i, )]
+    print(cursor.fetchall() == [(i, )])

  def test_concurrent_schema_change(self, cursor, unique_database):
    """Tests that an insert into a Kudu table with a concurrent schema change either
--- a/tests/query_test/test_limit.py
+++ b/tests/query_test/test_limit.py
@@ -17,6 +17,7 @@

 # Validates limit on scan nodes

+from __future__ import print_function
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.test_vector import ImpalaTestDimension
@@ -79,5 +80,5 @@ class TestLimitBase(ImpalaTestSuite):
    except ImpalaBeeswaxException as e:
      assert not should_succeed, 'Query was not expected to fail: %s' % e
      if (expected_error not in str(e)):
-        print str(e)
+        print(str(e))
      assert expected_error in str(e)
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -21,6 +21,7 @@
 # tests can run with the normal exploration strategy and the overall test runtime doesn't
 # explode.

+from __future__ import print_function
 import os
 import pytest
 import random
@@ -410,10 +411,10 @@ class TestHdfsScannerSkew(ImpalaTestSuite):
          ratio = float(a) / float(b)
        else:
          ratio = float(b) / float(a)
-        print "Intra-node bytes read ratio:", ratio
+        print("Intra-node bytes read ratio:", ratio)
        if ratio < SKEW_THRESHOLD:
          count_skew += 1
-          print "Profile of skewed execution: ", profile
+          print("Profile of skewed execution: ", profile)
      return count_skew

    tbl_name = unique_database + ".lineitem_skew"
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -22,6 +22,7 @@
 # executing the remaining tests in parallel. To run only some of
 # these, use --skip-serial, --skip-stress, or --skip-parallel.
 # All additional command line options are passed to py.test.
+from __future__ import print_function
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_service import ImpaladService
 from tests.conftest import configure_logging
@@ -226,16 +227,16 @@ def build_ignore_dir_arg_list(valid_dirs):
 def print_metrics(substring):
  """Prints metrics with the given substring in the name"""
  for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
-    print ">" * 80
+    print(">" * 80)
    port = impalad.get_webserver_port()
    cert = impalad._get_webserver_certificate_file()
-    print "connections metrics for impalad at port {0}:".format(port)
+    print("connections metrics for impalad at port {0}:".format(port))
    debug_info = json.loads(ImpaladService(impalad.hostname, webserver_port=port,
        webserver_certificate_file=cert).read_debug_webpage('metrics?json'))
    for metric in debug_info['metric_group']['metrics']:
      if substring in metric['name']:
-        print json.dumps(metric, indent=1)
-    print "<" * 80
+        print(json.dumps(metric, indent=1))
+    print("<" * 80)


 def detect_and_remove_flag(flag):
@@ -265,7 +266,7 @@ if __name__ == "__main__":

  def run(args):
    """Helper to print out arguments of test_executor before invoking."""
-    print "Running TestExecutor with args: %s" % (args,)
+    print("Running TestExecutor with args: %s" % (args,))
    test_executor.run_tests(args)

  os.chdir(TEST_DIR)
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -18,6 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import httplib
 import logging
 import os
@@ -896,14 +897,14 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
      os.chdir("%s/tests/shell/" % os.environ['IMPALA_HOME'])
      result = run_impala_shell_interactive(vector,
        "sOuRcE shell_case_sensitive.cmds; SeLeCt 'second command'")
-      print result.stderr
+      print(result.stderr)

      assert "Query: uSe FUNCTIONAL" in result.stderr
      assert "Query: ShOw TABLES" in result.stderr
      assert "alltypes" in result.stdout
      # This is from shell_case_sensitive2.cmds, the result of sourcing a file
      # from a sourced file.
-      print result.stderr
+      print(result.stderr)
      assert "SeLeCt 'second command'" in result.stderr
    finally:
      os.chdir(cwd)
--- a/tests/shell/util.py
+++ b/tests/shell/util.py
@@ -18,6 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 import logging
 import os
 import socket
--- a/tests/statestore/test_statestore.py
+++ b/tests/statestore/test_statestore.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 from collections import defaultdict
 import json
 import logging
@@ -155,7 +156,7 @@ class KillableThreadedServer(TServer):
    except TTransport.TTransportException as tx:
      pass
    except Exception as x:
-      print x
+      print(x)

    itrans.close()
    otrans.close()
--- a/tests/stress/extract_min_mem.py
+++ b/tests/stress/extract_min_mem.py
@@ -33,6 +33,7 @@
 #
 #   ./tests/stress/extract_min_mem.py mem_usage_scaling_runtime_info.json
 #
+from __future__ import print_function
 import json
 import sys

@@ -46,4 +47,4 @@ with open(sys.argv[1]) as f:
                   runtime_info['required_mem_mb_with_spilling']))

 results.sort()
-print ', '.join(["'Q{0}': {1}".format(num, mem) for num, mem in results])
+print(', '.join(["'Q{0}': {1}".format(num, mem) for num, mem in results]))
--- a/tests/stress/runtime_info.py
+++ b/tests/stress/runtime_info.py
@@ -20,6 +20,7 @@
 # Utility functions used by the stress test to save and load runtime info
 # about queries to and from JSON files.

+from __future__ import print_function
 from collections import defaultdict
 import json
 import logging
--- a/tests/unittests/test_file_parser.py
+++ b/tests/unittests/test_file_parser.py
@@ -17,6 +17,7 @@

 # Unit tests for the test file parser

+from __future__ import print_function
 from tests.common.base_test_suite import BaseTestSuite
 from tests.util.test_file_parser import parse_test_file_text

@@ -66,7 +67,7 @@ class TestTestFileParser(BaseTestSuite):
  def test_valid_parse(self):
    results = parse_test_file_text(test_text, VALID_SECTIONS)
    assert len(results) == 3
-    print results[0]
+    print(results[0])
    expected_results = {'QUERY': '# comment\nSELECT blah from Foo\ns\n',
                        'TYPES': 'string\n', 'RESULTS': "'Hi'\n",
                        'LINEAGE': "test_lineage_str > 'foo' AND 'bar'\nmulti_line\n"}
@@ -106,6 +107,6 @@ class TestTestFileParser(BaseTestSuite):
                        "#---- QUERY: TEST_WORKLOAD_Q2\n"
                        "#SELECT int_col from Bar\n"
                        "#---- RESULTS\n#231\n#---- TYPES\n#int\n"}
-    print expected_results
-    print results[1]
+    print(expected_results)
+    print(results[1])
    assert results[1] == expected_results
--- a/tests/util/acid_txn.py
+++ b/tests/util/acid_txn.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.

+from __future__ import print_function
 from tests.util.thrift_util import create_transport
 from hive_metastore import ThriftHiveMetastore
 from hive_metastore.ttypes import (AbortTxnRequest, AllocateTableWriteIdsRequest,
@@ -148,4 +149,4 @@ class AcidTxn(object):
        try:
          self.commit_txn(txn)
        except Exception as e:
-          print str(e)
+          print(str(e))
--- a/tests/util/run_impyla_http_query.py
+++ b/tests/util/run_impyla_http_query.py
@@ -20,6 +20,7 @@
 # It can be used by other tests (e.g. LdapImpylaHttpTest.java) that start a cluster with
 # an LDAP server to validate Impyla's functionality.

+from __future__ import print_function
 import argparse
 import logging

--- a/tests/util/test_file_parser.py
+++ b/tests/util/test_file_parser.py
@@ -17,6 +17,7 @@

 # This module is used for common utilities related to parsing test files

+from __future__ import print_function
 import codecs
 import collections
 import logging
@@ -210,8 +211,8 @@ def parse_test_file_text(text, valid_section_names, skip_unknown_sections=True):

      if subsection_name not in valid_section_names:
        if skip_unknown_sections or not subsection_name:
-          print sub_section
-          print 'Unknown section \'%s\'' % subsection_name
+          print(sub_section)
+          print('Unknown section \'%s\'' % subsection_name)
          continue
        else:
          raise RuntimeError, 'Unknown subsection: %s' % subsection_name