diff --git a/bin/build_thirdparty.sh b/bin/build_thirdparty.sh
index 77fa2834f..ea26d96bd 100755
--- a/bin/build_thirdparty.sh
+++ b/bin/build_thirdparty.sh
@@ -162,7 +162,7 @@ if [ $BUILD_ALL -eq 1 ] || [ $BUILD_GFLAGS -eq 1 ]; then
   build_preamble $IMPALA_HOME/thirdparty/gflags-${IMPALA_GFLAGS_VERSION} GFlags
   GFLAGS_INSTALL=`pwd`/third-party-install
   ./configure --with-pic --prefix=${GFLAGS_INSTALL}
-  make -j4 install
+   make -j${IMPALA_BUILD_THREADS:-4} install
 fi
 
 # Build pprof
@@ -173,7 +173,7 @@ if [ $BUILD_ALL -eq 1 ] || [ $BUILD_PPROF -eq 1 ]; then
   # we're not compiling the rest of our code to not omit frame pointers but it
   # still seems to generate useful profiling data.
   ./configure --enable-frame-pointers --with-pic
-  make -j4
+   make -j${IMPALA_BUILD_THREADS:-4}
 fi
 
 # Build glog
@@ -189,14 +189,14 @@ logging_unittest-logging_unittest.o : CXXFLAGS= -gstabs -O2
 EOF
   cat Makefile >> Makefile.gcc45sles_workaround
   mv Makefile.gcc45sles_workaround Makefile
-  make -j4
+   make -j${IMPALA_BUILD_THREADS:-4}
 fi
 
 # Build gtest
 if [ $BUILD_ALL -eq 1 ] || [ $BUILD_GTEST -eq 1 ]; then
   build_preamble $IMPALA_HOME/thirdparty/gtest-${IMPALA_GTEST_VERSION} GTest
   cmake .
-  make -j4
+   make -j${IMPALA_BUILD_THREADS:-4}
 fi
 
 # Build Snappy
@@ -217,15 +217,15 @@ fi
 # Build re2
 if [ $BUILD_ALL -eq 1 ] || [ $BUILD_RE2 -eq 1 ]; then
   build_preamble $IMPALA_HOME/thirdparty/re2 RE2
-  make -j4
+   make -j${IMPALA_BUILD_THREADS:-4}
 fi
 
 # Build Ldap
 if [ $BUILD_ALL -eq 1 ] || [ $BUILD_LDAP -eq 1 ]; then
     build_preamble $IMPALA_HOME/thirdparty/openldap-${IMPALA_OPENLDAP_VERSION} Openldap
     ./configure --enable-slapd=no --prefix=`pwd`/impala_install --enable-static --with-pic
-    make -j4
-    make -j4 depend
+     make -j${IMPALA_BUILD_THREADS:-4}
+     make -j${IMPALA_BUILD_THREADS:-4} depend
     make install
 fi
 
@@ -233,5 +233,5 @@ fi
 if [ $BUILD_ALL -eq 1 ] || [ $BUILD_AVRO -eq 1 ]; then
   build_preamble $IMPALA_HOME/thirdparty/avro-c-${IMPALA_AVRO_VERSION} Avro
   cmake .
-  make -j4
+   make -j${IMPALA_BUILD_THREADS:-4}
 fi
diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index 2368b88bd..bc82f242f 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -22,12 +22,12 @@ set -e
 
 . $IMPALA_HOME/bin/set-pythonpath.sh
 
-# Allow picking up strateg from environment
+# Allow picking up strategy from environment
 : ${EXPLORATION_STRATEGY:=core}
 NUM_ITERATIONS=1
 KERB_ARGS=""
 
-. ${IMPALA_HOME}/bin/impala-config.sh
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 if ${CLUSTER_DIR}/admin is_kerberized; then
   KERB_ARGS="--use_kerberos"
 fi
@@ -75,7 +75,7 @@ echo "Split and assign HBase regions"
 # To properly test HBase integeration, HBase regions are split and assigned by this
 # script. Restarting HBase will change the region server assignment. Run split-hbase.sh
 # before running any test.
-${IMPALA_HOME}/testdata/bin/split-hbase.sh
+${IMPALA_HOME}/testdata/bin/split-hbase.sh > /dev/null 2>&1
 
 for i in $(seq 1 $NUM_ITERATIONS)
 do
diff --git a/buildall.sh b/buildall.sh
index 7a4db0df1..0e898b23a 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -27,7 +27,7 @@ if [ ! -z "${MINIKDC_REALM}" ]; then
 fi
 
 export IMPALA_HOME=$ROOT
-. "$ROOT"/bin/impala-config.sh
+. "$ROOT"/bin/impala-config.sh > /dev/null 2>&1
 
 CLEAN_ACTION=1
 TESTDATA_ACTION=0
@@ -205,7 +205,7 @@ fi
 # Stop any running Impala services.
 ${IMPALA_HOME}/bin/start-impala-cluster.py --kill --force
 
-if [ $CLEAN_ACTION -eq 1 ] || [ $FORMAT_METASTORE -eq 1 ] || [ $FORMAT_CLUSTER -eq 1 ]
+if [[ $CLEAN_ACTION -eq 1 || $FORMAT_METASTORE -eq 1 || $FORMAT_CLUSTER -eq 1 ]]
 then
   # Kill any processes that may be accessing postgres metastore. To be safe, this is done
   # before we make any changes to the config files.
@@ -338,23 +338,25 @@ if [ ${TESTS_ACTION} -eq 1 -a \
   exit 1
 fi
 
-if [ $TESTDATA_ACTION -eq 1 ]
-then
-  # create and load test data
+if [ $TESTDATA_ACTION -eq 1 ]; then
+  # Create testdata.
   $IMPALA_HOME/bin/create_testdata.sh
-
   cd $ROOT
-  if [ "$SNAPSHOT_FILE" != "" ]
-  then
-    yes | ${IMPALA_HOME}/testdata/bin/create-load-data.sh $SNAPSHOT_FILE
-  else
-    ${IMPALA_HOME}/testdata/bin/create-load-data.sh
+  # We have three conditions.
+  # - A testdata and metastore snapshot exists.
+  # - Only the testdata snapshot exists.
+  # - Neither of the them exist.
+  CREATE_LOAD_DATA_ARGS=""
+  if [ $SNAPSHOT_FILE ] && [ $METASTORE_SNAPSHOT_FILE ]; then
+    CREATE_LOAD_DATA_ARGS="-snapshot_file ${SNAPSHOT_FILE} -skip_metadata_load"
+  elif [ $SNAPSHOT_FILE ] && [ -n $METASTORE_SNAPSHOT_FILE ]; then
+    CREATE_LOAD_DATA_ARGS="-snapshot_file ${SNAPSHOT_FILE}"
   fi
+  yes | ${IMPALA_HOME}/testdata/bin/create-load-data.sh ${CREATE_LOAD_DATA_ARGS}
 fi
 
-if [ $TESTS_ACTION -eq 1 ]
-then
-    ${IMPALA_HOME}/bin/run-all-tests.sh -e $EXPLORATION_STRATEGY
+if [ $TESTS_ACTION -eq 1 ]; then
+  ${IMPALA_HOME}/bin/run-all-tests.sh -e $EXPLORATION_STRATEGY
 fi
 
 # Generate list of files for Cscope to index
diff --git a/testdata/avro_schema_resolution/create_table.sql b/testdata/avro_schema_resolution/create_table.sql
index f9527f188..e75d389d1 100644
--- a/testdata/avro_schema_resolution/create_table.sql
+++ b/testdata/avro_schema_resolution/create_table.sql
@@ -25,8 +25,8 @@ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
 LOCATION '${hiveconf:hive.metastore.warehouse.dir}/avro_schema_resolution_test/';
 
-LOAD DATA LOCAL INPATH 'records1.avro' OVERWRITE INTO TABLE schema_resolution_test;
-LOAD DATA LOCAL INPATH 'records2.avro' INTO TABLE schema_resolution_test;
+LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/avro_schema_resolution/records1.avro' OVERWRITE INTO TABLE schema_resolution_test;
+LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/avro_schema_resolution/records2.avro' INTO TABLE schema_resolution_test;
 
 -- The following tables are used to test Impala's handling of HIVE-6308 which causes
 -- COMPUTE STATS and Hive's ANALYZE TABLE to fail for Avro tables with mismatched
@@ -105,4 +105,4 @@ STORED AS
 INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
 LOCATION '/test-warehouse/alltypes_avro_snap'
-TBLPROPERTIES ('avro.schema.url'='hdfs://${hiveconf:hive.metastore.warehouse.dir}/avro_schemas/functional/alltypes.json');
\ No newline at end of file
+TBLPROPERTIES ('avro.schema.url'='hdfs://${hiveconf:hive.metastore.warehouse.dir}/avro_schemas/functional/alltypes.json');
diff --git a/testdata/bin/check-schema-diff.sh b/testdata/bin/check-schema-diff.sh
index 5d1fde947..e003a6816 100755
--- a/testdata/bin/check-schema-diff.sh
+++ b/testdata/bin/check-schema-diff.sh
@@ -19,12 +19,11 @@
 #  - 0 implies that the schema diff is emppty, or that a reference githash was not found.
 #  - 1 implies that the schemas have changed.
 
-. ${IMPALA_HOME}/bin/impala-config.sh
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 set -ex
 
-# If /test-warehouse/githash.txt does not exist, exit with a 0
+DATASET=${1-}
 hdfs dfs -test -e  /test-warehouse/githash.txt || { exit 0; }
 GIT_HASH=$(echo $(hdfs dfs -cat /test-warehouse/githash.txt))
 # Check whether a non-empty diff exists.
-# TODO: Make this more granular (on the level of a dataset)
-git diff --exit-code ${GIT_HASH}..HEAD ${IMPALA_HOME}/testdata/datasets
+git diff --exit-code ${GIT_HASH}..HEAD ${IMPALA_HOME}/testdata/datasets/$DATASET
diff --git a/testdata/bin/compute-table-stats.sh b/testdata/bin/compute-table-stats.sh
index 42604dbb5..26d4c0907 100755
--- a/testdata/bin/compute-table-stats.sh
+++ b/testdata/bin/compute-table-stats.sh
@@ -4,7 +4,7 @@
 #
 set -e
 set -u
-. ${IMPALA_HOME}/bin/impala-config.sh
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 
 # Run compute stats over as many of the tables used in the Planner tests as possible.
 python ${IMPALA_HOME}/tests/util/compute_table_stats.py --db_names=functional\
diff --git a/testdata/bin/copy-data-sources.sh b/testdata/bin/copy-data-sources.sh
index ee561e3a0..69295d4e1 100755
--- a/testdata/bin/copy-data-sources.sh
+++ b/testdata/bin/copy-data-sources.sh
@@ -3,7 +3,7 @@
 #
 # This script copies the test data source library into hdfs.
 
-. ${IMPALA_HOME}/bin/impala-config.sh
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 set -e
 
 hadoop fs -mkdir -p /test-warehouse/data-sources/
diff --git a/testdata/bin/copy-udfs-udas.sh b/testdata/bin/copy-udfs-udas.sh
index 9ae80b395..58baa7cf8 100755
--- a/testdata/bin/copy-udfs-udas.sh
+++ b/testdata/bin/copy-udfs-udas.sh
@@ -7,7 +7,7 @@ if [ x${JAVA_HOME} == x ]; then
   echo JAVA_HOME not set
   exit 1
 fi
-. ${IMPALA_HOME}/bin/impala-config.sh
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 set -e
 
 BUILD=0
diff --git a/testdata/bin/create-hbase.sh b/testdata/bin/create-hbase.sh
index f9a696b75..73cc0249b 100755
--- a/testdata/bin/create-hbase.sh
+++ b/testdata/bin/create-hbase.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # Copyright (c) 2012 Cloudera, Inc. All rights reserved.
 
-. ${IMPALA_HOME}/bin/impala-config.sh
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 
 # To work around the HBase bug (HBASE-4467), unset $HADOOP_HOME before calling hbase
 HADOOP_HOME=
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index b35d12702..aee23df61 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -24,219 +24,309 @@
 # For more information look at testdata/bin/load-test-warehouse-snapshot.sh and
 # bin/load-data.py
 
-. ${IMPALA_HOME}/bin/impala-config.sh
-set -ex
-
-# Setup for HDFS caching
-${IMPALA_HOME}/testdata/bin/setup-hdfs-caching.sh
-
-# If the user has specified a command line argument, treat it as the test-warehouse
-# snapshot file and pass it to the load-test-warehouse-snapshot.sh script for processing.
-if [[ $1 ]]; then
-  ${IMPALA_HOME}/testdata/bin/load-test-warehouse-snapshot.sh "$1"
-else
-  echo "Loading hive builtins"
-  ${IMPALA_HOME}/testdata/bin/load-hive-builtins.sh
-
-  echo "Generating HBase data"
-  ${IMPALA_HOME}/testdata/bin/create-hbase.sh
-fi
-set -u
+# Exit on error.
+set -e
 
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
+SKIP_METADATA_LOAD=0
+SKIP_SNAPSHOT_LOAD=0
+SNAPSHOT_FILE=""
+LOAD_DATA_ARGS=""
+JDBC_URL="jdbc:hive2://localhost:11050/default;"
 DATA_LOADING_LOG_DIR=${IMPALA_TEST_CLUSTER_LOG_DIR}/data_loading
 mkdir -p ${DATA_LOADING_LOG_DIR}
 
-# Copy the test data source library into HDFS
-${IMPALA_HOME}/testdata/bin/copy-data-sources.sh
+while [ -n "$*" ]
+do
+  case $1 in
+    -skip_metadata_load)
+      SKIP_METADATA_LOAD=1
+      ;;
+    -skip_snapshot_load)
+      SKIP_SNAPSHOT_LOAD=1
+      ;;
+    -snapshot_file)
+      SNAPSHOT_FILE=${2-}
+      if [ ! -f $SNAPSHOT_FILE ]; then
+        echo "-snapshot_file does not exist: $SNAPSHOT_FILE"
+        exit 1;
+      fi
+      shift;
+      ;;
+    -help|-h|*)
+      echo "create-load-data.sh : Creates data and loads from scratch"
+      echo "[-skip_metadata_load] : Skips loading of metadata"
+      echo "[-skip_snapshot_load] : Assumes that the snapshot is already loaded"
+      echo "[-snapshot_file] : Loads the test warehouse snapshot into hdfs"
+      exit 1;
+      ;;
+    esac
+  shift;
+done
 
-# If a schema change is detected, force load the data.
-set +e
-LOAD_DATA_ARGS=""
-${IMPALA_HOME}/testdata/bin/check-schema-diff.sh
-if [[ $? -eq 1 ]]; then
-  LOAD_DATA_ARGS="--force"
+if [[ $SKIP_METADATA_LOAD -eq 0  && "$SNAPSHOT_FILE" = "" ]]; then
+  echo "Loading Hive Builtins"
+  ${IMPALA_HOME}/testdata/bin/load-hive-builtins.sh
+  echo "Generating HBase data"
+  ${IMPALA_HOME}/testdata/bin/create-hbase.sh &> ${DATA_LOADING_LOG_DIR}/create-hbase.log
+elif [ $SKIP_SNAPSHOT_LOAD -eq 0 ]; then
+  echo Loading hdfs data from snapshot: $SNAPSHOT_FILE
+  ${IMPALA_HOME}/testdata/bin/load-test-warehouse-snapshot.sh "$SNAPSHOT_FILE"
+  # Don't skip the metadata load if a schema change is detected.
+  if ! ${IMPALA_HOME}/testdata/bin/check-schema-diff.sh; then
+    echo "Schema change detected, metadata will be loaded."
+    SKIP_METADATA_LOAD=0
+  fi
+else
+  # hdfs data already exists, don't load it.
+  echo Skipping loading data to hdfs.
 fi
 
+function load-custom-schemas {
+  echo LOADING CUSTOM SCHEMAS
+  SCHEMA_SRC_DIR=${IMPALA_HOME}/testdata/data/schemas
+  SCHEMA_DEST_DIR=/test-warehouse/schemas
+  # clean the old schemas directory.
+  hadoop fs -rm -r -f ${SCHEMA_DEST_DIR}
+  hadoop fs -mkdir ${SCHEMA_DEST_DIR}
+  hadoop fs -put $SCHEMA_SRC_DIR/zipcode_incomes.parquet ${SCHEMA_DEST_DIR}/
+  hadoop fs -put $SCHEMA_SRC_DIR/unsupported.parquet ${SCHEMA_DEST_DIR}/
+  hadoop fs -put $SCHEMA_SRC_DIR/map.parquet ${SCHEMA_DEST_DIR}/
+  hadoop fs -put $SCHEMA_SRC_DIR/array.parquet ${SCHEMA_DEST_DIR}/
+  hadoop fs -put $SCHEMA_SRC_DIR/struct.parquet ${SCHEMA_DEST_DIR}/
+  hadoop fs -put $SCHEMA_SRC_DIR/alltypestiny.parquet ${SCHEMA_DEST_DIR}/
+  hadoop fs -put $SCHEMA_SRC_DIR/malformed_decimal_tiny.parquet ${SCHEMA_DEST_DIR}/
+  hadoop fs -put $SCHEMA_SRC_DIR/decimal.parquet ${SCHEMA_DEST_DIR}/
+
+  # CHAR and VARCHAR tables written by Hive
+  hadoop fs -mkdir -p /test-warehouse/chars_formats_avro_snap/
+  hadoop fs -put -f ${IMPALA_HOME}/testdata/data/chars-formats.avro \
+    /test-warehouse/chars_formats_avro_snap
+  hadoop fs -mkdir -p /test-warehouse/chars_formats_parquet/
+  hadoop fs -put -f ${IMPALA_HOME}/testdata/data/chars-formats.parquet \
+    /test-warehouse/chars_formats_parquet
+  hadoop fs -mkdir -p /test-warehouse/chars_formats_text/
+  hadoop fs -put -f ${IMPALA_HOME}/testdata/data/chars-formats.txt \
+    /test-warehouse/chars_formats_text
+}
+
+function load-data {
+  WORKLOAD=${1}
+  EXPLORATION_STRATEGY=${2:-"core"}
+  TABLE_FORMATS=${3:-}
+
+  MSG="Loading workload '$WORKLOAD'"
+  ARGS=("--workloads $WORKLOAD")
+  MSG+=" Using exploration strategy '$EXPLORATION_STRATEGY'"
+  ARGS+=("-e $EXPLORATION_STRATEGY")
+  if [ $TABLE_FORMATS ]; then
+    MSG+=" in table formats '$TABLE_FORMATS'"
+    ARGS+=("--table_formats $TABLE_FORMATS")
+  fi
+  if [ $LOAD_DATA_ARGS ]; then
+    ARGS+=("$LOAD_DATA_ARGS")
+  fi
+  # functional-query is unique. The dataset name is not the same as the workload name.
+  if [ "${WORKLOAD}" = "functional-query" ]; then
+    WORKLOAD="functional"
+  fi
+  # Force load the dataset if we detect a schema change.
+  if ! ${IMPALA_HOME}/testdata/bin/check-schema-diff.sh $WORKLOAD; then
+    ARGS+=("--force")
+    echo "Force loading $WORKLOAD because a schema change was detected"
+  fi
+  LOG_FILE=${DATA_LOADING_LOG_DIR}/data-load-${WORKLOAD}-${EXPLORATION_STRATEGY}.log
+  echo "$MSG. Logging to ${LOG_FILE}"
+  # Use unbuffered logging by executing with 'python -u'
+  python -u ${IMPALA_HOME}/bin/load-data.py ${ARGS[@]} &> ${LOG_FILE}
+}
+
+function cache-test-tables {
+  echo CACHING  tpch.nation AND functional.alltypestiny
+  # uncaching the tables first makes this operation idempotent.
+  ${IMPALA_HOME}/bin/impala-shell.sh -q "alter table functional.alltypestiny set uncached"
+  ${IMPALA_HOME}/bin/impala-shell.sh -q "alter table tpch.nation set uncached"
+  ${IMPALA_HOME}/bin/impala-shell.sh -q "alter table tpch.nation set cached in 'testPool'"
+  ${IMPALA_HOME}/bin/impala-shell.sh -q\
+    "alter table functional.alltypestiny set cached in 'testPool'"
+}
+
+function load-aux-workloads {
+  echo LOADING AUXILIARY WORKLOADS
+  LOG_FILE=${DATA_LOADING_LOG_DIR}/data-load-auxiliary-workloads-core.log
+  rm -f $LOG_FILE
+  # Load all the auxiliary workloads (if any exist)
+  if [ -d ${IMPALA_AUX_WORKLOAD_DIR} ] && [ -d ${IMPALA_AUX_DATASET_DIR} ]; then
+    python -u ${IMPALA_HOME}/bin/load-data.py --workloads all\
+        --workload_dir=${IMPALA_AUX_WORKLOAD_DIR}\
+        --dataset_dir=${IMPALA_AUX_DATASET_DIR}\
+        --exploration_strategy=core ${LOAD_DATA_ARGS} &>> $LOG_FILE
+  else
+    echo "Skipping load of auxilary workloads because directories do not exist"
+  fi
+}
+
+function copy-auth-policy {
+  echo COPYING AUTHORIZATION POLICY FILE
+  hadoop fs -rm -f /test-warehouse/authz-policy.ini
+  hadoop fs -put ${IMPALA_HOME}/fe/src/test/resources/authz-policy.ini /test-warehouse/
+}
+
+function copy-and-load-dependent-tables {
+  # COPY
+  # TODO: The multi-format table will move these files. So we need to copy them to a
+  # temporary location for that table to use. Should find a better way to handle this.
+  echo COPYING AND LOADING DATA FOR DEPENDENT TABLES
+  hadoop fs -rm -r -f /test-warehouse/alltypesmixedformat
+  hadoop fs -rm -r -f /tmp/alltypes_rc
+  hadoop fs -rm -r -f /tmp/alltypes_seq
+  hadoop fs -mkdir -p /tmp/alltypes_seq/year=2009
+  hadoop fs -mkdir -p /tmp/alltypes_rc/year=2009
+  hadoop fs -cp  /test-warehouse/alltypes_seq/year=2009/month=2/ /tmp/alltypes_seq/year=2009
+  hadoop fs -cp  /test-warehouse/alltypes_rc/year=2009/month=3/ /tmp/alltypes_rc/year=2009
+
+  # Create a hidden file in AllTypesSmall
+  hadoop fs -rm -f /test-warehouse/alltypessmall/year=2009/month=1/_hidden
+  hadoop fs -rm -f /test-warehouse/alltypessmall/year=2009/month=1/.hidden
+  hadoop fs -cp  /test-warehouse/zipcode_incomes/DEC_00_SF3_P077_with_ann_noheader.csv \
+   /test-warehouse/alltypessmall/year=2009/month=1/_hidden
+  hadoop fs -cp  /test-warehouse/zipcode_incomes/DEC_00_SF3_P077_with_ann_noheader.csv \
+   /test-warehouse/alltypessmall/year=2009/month=1/.hidden
+
+  # For tables that rely on loading data from local fs test-warehouse
+  # TODO: Find a good way to integrate this with the normal data loading scripts
+  beeline -n $USER -u "${JDBC_URL}" -f\
+    ${IMPALA_HOME}/testdata/bin/load-dependent-tables.sql
+}
+
+function create-internal-hbase-table {
+  echo CREATING INTERNAL HBASE TABLE
+  # TODO: For some reason DROP TABLE IF EXISTS sometimes fails on HBase if the table does
+  # not exist. To work around this, disable exit on error before executing this command.
+  # Need to investigate this more, but this works around the problem to unblock automation.
+  set +o errexit
+  beeline -n $USER -u "${JDBC_URL}" -e\
+    "DROP TABLE IF EXISTS functional_hbase.internal_hbase_table"
+  echo "disable 'functional_hbase.internal_hbase_table'" | hbase shell
+  echo "drop 'functional_hbase.internal_hbase_table'" | hbase shell
+  set -e
+  # Used by CatalogTest to confirm that non-external HBase tables are identified
+  # correctly (IMP-581)
+  # Note that the usual 'hbase.table.name' property is not specified to avoid
+  # creating tables in HBase as a side-effect.
+  cat > /tmp/create-hbase-internal.sql << EOF
+    CREATE TABLE functional_hbase.internal_hbase_table(key int, value string)
+    STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+    WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val");
+EOF
+  beeline -n $USER -u "${JDBC_URL}" -f /tmp/create-hbase-internal.sql
+  rm -f /tmp/create-hbase-internal.sql
+}
+
+function load-custom-data {
+  echo LOADING CUSTOM DATA
+  # Load the index files for corrupted lzo data.
+  hadoop fs -rm -f /test-warehouse/bad_text_lzo_text_lzo/bad_text.lzo.index
+  hadoop fs -put ${IMPALA_HOME}/testdata/bad_text_lzo/bad_text.lzo.index \
+      /test-warehouse/bad_text_lzo_text_lzo/
+
+  hadoop fs -rm -r -f /bad_text_lzo_text_lzo/
+  hadoop fs -mv /test-warehouse/bad_text_lzo_text_lzo/ /
+  # Cleanup the old bad_text_lzo files, if they exist.
+  hadoop fs -rm -r -f /test-warehouse/bad_text_lzo/
+
+  # Index all lzo files in HDFS under /test-warehouse
+  ${IMPALA_HOME}/testdata/bin/lzo_indexer.sh /test-warehouse
+
+  hadoop fs -mv /bad_text_lzo_text_lzo/ /test-warehouse/
+
+  # IMPALA-694: data file produced by parquet-mr version 1.2.5-cdh4.5.0
+  hadoop fs -put -f ${IMPALA_HOME}/testdata/data/bad_parquet_data.parquet \
+                    /test-warehouse/bad_parquet_parquet
+
+  # Data file produced by parquet-mr with repeated values (produces 0 bit width dictionary)
+  hadoop fs -put -f ${IMPALA_HOME}/testdata/data/repeated_values.parquet \
+                    /test-warehouse/bad_parquet_parquet
+
+  # IMPALA-720: data file produced by parquet-mr with multiple row groups
+  hadoop fs -put -f ${IMPALA_HOME}/testdata/data/multiple_rowgroups.parquet \
+                    /test-warehouse/bad_parquet_parquet
+
+  # IMPALA-1401: data file produced by Hive 13 containing page statistics with long min/max
+  # string values
+  hadoop fs -put -f ${IMPALA_HOME}/testdata/data/long_page_header.parquet \
+                    /test-warehouse/bad_parquet_parquet
+
+  # Remove an index file so we test an un-indexed LZO file
+  hadoop fs -rm /test-warehouse/alltypes_text_lzo/year=2009/month=1/000000_0.lzo.index
+
+  # Add a sequence file that only contains a header (see IMPALA-362)
+  hadoop fs -put -f ${IMPALA_HOME}/testdata/tinytable_seq_snap/tinytable_seq_snap_header_only \
+                    /test-warehouse/tinytable_seq_snap
+
+  beeline -n $USER -u "${JDBC_URL}" -f\
+    ${IMPALA_HOME}/testdata/avro_schema_resolution/create_table.sql
+}
+
+function build-and-copy-hive-udfs {
+  # Build the test Hive UDFs
+  pushd ${IMPALA_HOME}/tests/test-hive-udfs
+  mvn clean package
+  popd
+  # Copy the test UDF/UDA libraries into HDFS
+  ${IMPALA_HOME}/testdata/bin/copy-udfs-udas.sh
+}
+
+function copy-and-load-ext-data-source {
+  # Copy the test data source library into HDFS
+  ${IMPALA_HOME}/testdata/bin/copy-data-sources.sh
+  # Create data sources table.
+  ${IMPALA_HOME}/bin/impala-shell.sh -f\
+    ${IMPALA_HOME}/testdata/bin/create-data-source-table.sql
+}
+
+
+# Enable debug logging.
+set -x
+
+
 # For kerberized clusters, use kerberos
 if ${CLUSTER_DIR}/admin is_kerberized; then
   LOAD_DATA_ARGS="${LOAD_DATA_ARGS} --use_kerberos --principal=${MINIKDC_PRINC_HIVE}"
 fi
 
-set -e
+# Start Impala
+${IMPALA_HOME}/bin/start-impala-cluster.py -s 3 --log_dir=${DATA_LOADING_LOG_DIR}
+${IMPALA_HOME}/testdata/bin/setup-hdfs-caching.sh
 
-# Load schemas
-hadoop fs -rm -r -f /test-warehouse/schemas
-hadoop fs -mkdir /test-warehouse/schemas
-hadoop fs -put ${IMPALA_HOME}/testdata/data/schemas/zipcode_incomes.parquet \
-  /test-warehouse/schemas/
-hadoop fs -put ${IMPALA_HOME}/testdata/data/schemas/unsupported.parquet \
-  /test-warehouse/schemas/
-hadoop fs -put ${IMPALA_HOME}/testdata/data/schemas/map.parquet \
-  /test-warehouse/schemas/
-hadoop fs -put ${IMPALA_HOME}/testdata/data/schemas/array.parquet \
-  /test-warehouse/schemas/
-hadoop fs -put ${IMPALA_HOME}/testdata/data/schemas/struct.parquet \
-  /test-warehouse/schemas/
-hadoop fs -put ${IMPALA_HOME}/testdata/data/schemas/alltypestiny.parquet \
-  /test-warehouse/schemas/
-hadoop fs -put ${IMPALA_HOME}/testdata/data/schemas/malformed_decimal_tiny.parquet \
-  /test-warehouse/schemas/
-hadoop fs -put ${IMPALA_HOME}/testdata/data/schemas/decimal.parquet \
-  /test-warehouse/schemas/
-
-# CHAR and VARCHAR tables written by Hive
-hadoop fs -mkdir -p /test-warehouse/chars_formats_avro_snap/
-hadoop fs -put -f ${IMPALA_HOME}/testdata/data/chars-formats.avro \
-  /test-warehouse/chars_formats_avro_snap
-hadoop fs -mkdir -p /test-warehouse/chars_formats_parquet/
-hadoop fs -put -f ${IMPALA_HOME}/testdata/data/chars-formats.parquet \
-  /test-warehouse/chars_formats_parquet
-hadoop fs -mkdir -p /test-warehouse/chars_formats_text/
-hadoop fs -put -f ${IMPALA_HOME}/testdata/data/chars-formats.txt \
-  /test-warehouse/chars_formats_text
-
-# Load the data set
-pushd ${IMPALA_HOME}/bin
-./start-impala-cluster.py -s 3 --wait_for_cluster --log_dir=${DATA_LOADING_LOG_DIR}
-
-function load-data {
-  WORKLOAD=$1
-  EXPLORATION_STRATEGY=$2
-  LOG_FILE=${DATA_LOADING_LOG_DIR}/data-load-${WORKLOAD}-${EXPLORATION_STRATEGY}.log
-  echo "Loading workload: ${WORKLOAD} (${EXPLORATION_STRATEGY}). Logging to: "\
-      "${LOG_FILE}"
-  # Use unbuffered logging by executing with 'python -u'
-  python -u ./load-data.py --workloads ${WORKLOAD} \
-      --exploration_strategy ${EXPLORATION_STRATEGY} ${LOAD_DATA_ARGS} &> ${LOG_FILE}
-}
-
-load-data "functional-query" "exhaustive"
-load-data "tpch" "core"
-load-data "tpcds" "core"
-
-# Cache test tables
-./impala-shell.sh -q "alter table tpch.nation set cached in 'testPool'"
-./impala-shell.sh -q "alter table functional.alltypestiny set cached in 'testPool'"
-
-# Load the test data source and table
-./impala-shell.sh -f ${IMPALA_HOME}/testdata/bin/create-data-source-table.sql
-# Load all the auxiliary workloads (if any exist)
-if [ -d ${IMPALA_AUX_WORKLOAD_DIR} ] && [ -d ${IMPALA_AUX_DATASET_DIR} ]; then
-  python -u ./load-data.py --workloads all --workload_dir=${IMPALA_AUX_WORKLOAD_DIR}\
-      --dataset_dir=${IMPALA_AUX_DATASET_DIR} --exploration_strategy core \
-      ${LOAD_DATA_ARGS}
+if [ $SKIP_METADATA_LOAD -eq 0 ]; then
+  # load custom schems
+  load-custom-schemas
+  # load functional/tpcds/tpch
+  load-data "functional-query" "exhaustive"
+  load-data "tpch" "core"
+  load-data "tpcds" "core"
+  load-aux-workloads
+  copy-and-load-dependent-tables
+  load-custom-data
+  ${IMPALA_HOME}/testdata/bin/create-table-many-blocks.sh -p 1234 -b 1
+  build-and-copy-hive-udfs
 else
-  echo "Skipping load of auxilary workloads because directories do not exist"
+  echo "Skipped loading the metadata. Loading HBase."
+  load-data "functional-query" "core" "hbase/none"
 fi
-popd
 
-# Create a table w/ 1234 partitions. Used to validate fetching/updating partitions in
-# batches.
-${IMPALA_HOME}/testdata/bin/create-table-many-blocks.sh -p 1234 -b 1
 
-# Split HBase table
-echo "Splitting HBase table"
-${IMPALA_HOME}/testdata/bin/split-hbase.sh
-
-echo COPYING AUTHORIZATION POLICY FILE
-hadoop fs -rm -f /test-warehouse/authz-policy.ini
-hadoop fs -put ${IMPALA_HOME}/fe/src/test/resources/authz-policy.ini /test-warehouse/
-
-# TODO: The multi-format table will move these files. So we need to copy them to a
-# temporary location for that table to use. Should find a better way to handle this.
-echo COPYING DATA FOR DEPENDENT TABLES
-hadoop fs -rm -r -f /test-warehouse/alltypesmixedformat
-hadoop fs -rm -r -f /tmp/alltypes_rc
-hadoop fs -rm -r -f /tmp/alltypes_seq
-hadoop fs -mkdir -p /tmp/alltypes_seq/year=2009
-hadoop fs -mkdir -p /tmp/alltypes_rc/year=2009
-hadoop fs -cp  /test-warehouse/alltypes_seq/year=2009/month=2/ /tmp/alltypes_seq/year=2009
-hadoop fs -cp  /test-warehouse/alltypes_rc/year=2009/month=3/ /tmp/alltypes_rc/year=2009
-
-# Create a hidden file in AllTypesSmall
-hadoop fs -rm -f /test-warehouse/alltypessmall/year=2009/month=1/_hidden
-hadoop fs -rm -f /test-warehouse/alltypessmall/year=2009/month=1/.hidden
-hadoop fs -cp  /test-warehouse/zipcode_incomes/DEC_00_SF3_P077_with_ann_noheader.csv \
- /test-warehouse/alltypessmall/year=2009/month=1/_hidden
-hadoop fs -cp  /test-warehouse/zipcode_incomes/DEC_00_SF3_P077_with_ann_noheader.csv \
- /test-warehouse/alltypessmall/year=2009/month=1/.hidden
-
-# Configure alltypes_seq as a read-only table
+# Configure alltypes_seq as a read-only table. This is required for fe tests.
 hadoop fs -chmod -R 444 /test-warehouse/alltypes_seq/year=2009/month=1
 hadoop fs -chmod -R 444 /test-warehouse/alltypes_seq/year=2009/month=3
-
-# TODO: For some reason DROP TABLE IF EXISTS sometimes fails on HBase if the table does
-# not exist. To work around this, disable exit on error before executing this command.
-# Need to investigate this more, but this works around the problem to unblock automation.
-set +o errexit
-${HIVE_HOME}/bin/hive -hiveconf hive.root.logger=WARN,console -v \
-    -e "DROP TABLE IF EXISTS functional_hbase.internal_hbase_table"
-echo "disable 'functional_hbase.internal_hbase_table'" | hbase shell
-echo "drop 'functional_hbase.internal_hbase_table'" | hbase shell
-set -e
-
-# For tables that rely on loading data from local fs test-warehouse
-# TODO: Find a good way to integrate this with the normal data loading scripts
-${HIVE_HOME}/bin/hive -hiveconf hive.root.logger=WARN,console -v \
-  -f ${IMPALA_HOME}/testdata/bin/load-dependent-tables.sql
-if [ $? != 0 ]; then
-  echo DEPENDENT LOAD FAILED
-  exit 1
-fi
-
-# Load the index files for corrupted lzo data.
-hadoop fs -rm -f /test-warehouse/bad_text_lzo_text_lzo/bad_text.lzo.index
-hadoop fs -put ${IMPALA_HOME}/testdata/bad_text_lzo/bad_text.lzo.index \
-    /test-warehouse/bad_text_lzo_text_lzo/
-
-hadoop fs -rm -r -f /bad_text_lzo_text_lzo/
-hadoop fs -mv /test-warehouse/bad_text_lzo_text_lzo/ /
-# Cleanup the old bad_text_lzo files, if they exist.
-hadoop fs -rm -r -f /test-warehouse/bad_text_lzo/
-
-# Index all lzo files in HDFS under /test-warehouse
-${IMPALA_HOME}/testdata/bin/lzo_indexer.sh /test-warehouse
-
-hadoop fs -mv /bad_text_lzo_text_lzo/ /test-warehouse/
-
-# IMPALA-694: data file produced by parquet-mr version 1.2.5-cdh4.5.0
-hadoop fs -put -f ${IMPALA_HOME}/testdata/data/bad_parquet_data.parquet \
-                  /test-warehouse/bad_parquet_parquet
-
-# Data file produced by parquet-mr with repeated values (produces 0 bit width dictionary)
-hadoop fs -put -f ${IMPALA_HOME}/testdata/data/repeated_values.parquet \
-                  /test-warehouse/bad_parquet_parquet
-
-# IMPALA-720: data file produced by parquet-mr with multiple row groups
-hadoop fs -put -f ${IMPALA_HOME}/testdata/data/multiple_rowgroups.parquet \
-                  /test-warehouse/bad_parquet_parquet
-
-# IMPALA-1401: data file produced by Hive 13 containing page statistics with long min/max
-# string values
-hadoop fs -put -f ${IMPALA_HOME}/testdata/data/long_page_header.parquet \
-                  /test-warehouse/bad_parquet_parquet
-
-# Remove an index file so we test an un-indexed LZO file
-hadoop fs -rm /test-warehouse/alltypes_text_lzo/year=2009/month=1/000000_0.lzo.index
-
-# Add a sequence file that only contains a header (see IMPALA-362)
-hadoop fs -put -f ${IMPALA_HOME}/testdata/tinytable_seq_snap/tinytable_seq_snap_header_only \
-                  /test-warehouse/tinytable_seq_snap
-
-# Create special table for testing Avro schema resolution
-# (see testdata/avro_schema_resolution/README)
-pushd ${IMPALA_HOME}/testdata/avro_schema_resolution
-hive -f create_table.sql
-popd
-
+cache-test-tables
+copy-and-load-ext-data-source
+# The tests need the built hive-udfs jar on the local fs
+build-and-copy-hive-udfs
+${IMPALA_HOME}/testdata/bin/split-hbase.sh > /dev/null 2>&1
+create-internal-hbase-table
+# TODO: Investigate why all stats are not preserved. Theorectically, we only need to
+# recompute stats for HBase.
 ${IMPALA_HOME}/testdata/bin/compute-table-stats.sh
-
-# Build the test Hive UDFs
-pushd ${IMPALA_HOME}/tests/test-hive-udfs
-mvn clean package
-popd
-
-# Copy the test UDF/UDA libraries into HDFS
-${IMPALA_HOME}/testdata/bin/copy-udfs-udas.sh
-
-${IMPALA_HOME}/bin/start-impala-cluster.py --kill_only
+copy-auth-policy
diff --git a/testdata/bin/create-table-many-blocks.sh b/testdata/bin/create-table-many-blocks.sh
index a834bed5a..613f96f0e 100755
--- a/testdata/bin/create-table-many-blocks.sh
+++ b/testdata/bin/create-table-many-blocks.sh
@@ -20,7 +20,7 @@
 # way a table with 100K blocks can be created by using 100 partitions x 1000
 # blocks/files.
 
-. ${IMPALA_HOME}/bin/impala-config.sh
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 set -e
 set -u
 
diff --git a/testdata/bin/load-dependent-tables.sql b/testdata/bin/load-dependent-tables.sql
index b6b752e52..7bb5e1fd6 100644
--- a/testdata/bin/load-dependent-tables.sql
+++ b/testdata/bin/load-dependent-tables.sql
@@ -49,15 +49,6 @@ ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
 ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3)
   SET FILEFORMAT RCFILE;
 
-----
--- Used by CatalogTest to confirm that non-external HBase tables are identified
--- correctly (IMP-581) 
--- Note that the usual 'hbase.table.name' property is not specified to avoid
--- creating tables in HBase as a side-effect.
-CREATE TABLE functional_hbase.internal_hbase_table(key int, value string)
-STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
-WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val");
-
 ---- Unsupported Impala table types
 USE functional;
 CREATE VIEW IF NOT EXISTS hive_view AS SELECT 1 AS int_col FROM alltypes limit 1;
diff --git a/testdata/bin/load-hive-builtins.sh b/testdata/bin/load-hive-builtins.sh
index 639c97a63..a3e647f19 100755
--- a/testdata/bin/load-hive-builtins.sh
+++ b/testdata/bin/load-hive-builtins.sh
@@ -1,14 +1,14 @@
 #!/bin/bash
 
-. ${IMPALA_HOME}/bin/impala-config.sh
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 
 # TODO: remove this once we understand why Hive looks in HDFS for many of its jars
-${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HIVE_HOME}/lib/ 
-${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HIVE_HOME}/lib/ 
+${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HIVE_HOME}/lib/
+${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HIVE_HOME}/lib/
 ${HADOOP_HOME}/bin/hadoop fs -put ${HIVE_HOME}/lib/*.jar ${HIVE_HOME}/lib/
 
-${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HBASE_HOME}/lib/ 
-${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HBASE_HOME}/lib/ 
+${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HBASE_HOME}/lib/
+${HADOOP_HOME}/bin/hadoop fs -mkdir -p ${HBASE_HOME}/lib/
 ${HADOOP_HOME}/bin/hadoop fs -put ${HBASE_HOME}/lib/*.jar ${HBASE_HOME}/lib/
 
 ${HADOOP_HOME}/bin/hadoop fs -rm -r -f ${HADOOP_HOME}/share/hadoop/common/
diff --git a/testdata/bin/load-metastore-snapshot.sh b/testdata/bin/load-metastore-snapshot.sh
index ecc187dfe..89a2c4135 100755
--- a/testdata/bin/load-metastore-snapshot.sh
+++ b/testdata/bin/load-metastore-snapshot.sh
@@ -53,6 +53,10 @@ dropdb -U hiveuser hive_impala
 createdb -U hiveuser hive_impala
 # Copy the contents of the SNAPSHOT_FILE
 psql -U hiveuser hive_impala < ${SNAPSHOT_FILE} > /dev/null 2>&1
-
-
-
+# Two tables (tpch.nation and functional.alltypestiny) have cache_directive_id set in
+# their metadata. These directives are now stale, and will cause any query that attempts
+# to cache the data in the tables to fail.
+psql -U hiveuser -d hive_impala -c \
+  "delete from \"TABLE_PARAMS\" where \"PARAM_KEY\"='cache_directive_id'"
+psql -U hiveuser -d hive_impala -c \
+  "delete from \"PARTITION_PARAMS\" where \"PARAM_KEY\"='cache_directive_id'"
diff --git a/testdata/bin/load-test-warehouse-snapshot.sh b/testdata/bin/load-test-warehouse-snapshot.sh
index af94237a4..415241e6e 100755
--- a/testdata/bin/load-test-warehouse-snapshot.sh
+++ b/testdata/bin/load-test-warehouse-snapshot.sh
@@ -20,7 +20,7 @@
 # NOTE: Running this script will remove your existing test-warehouse directory. Be sure
 # to backup any data you need before running this script.
 
-. ${IMPALA_HOME}/bin/impala-config.sh
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 TEST_WAREHOUSE_HDFS_DIR=/test-warehouse
 
 if [[ ! $1 ]]; then
@@ -64,6 +64,11 @@ mkdir ${SNAPSHOT_STAGING_DIR}
 echo "Extracting tarball"
 tar -C ${SNAPSHOT_STAGING_DIR} -xzf ${SNAPSHOT_FILE}
 
+if [ ! -f ${SNAPSHOT_STAGING_DIR}/test-warehouse/githash.txt ]; then
+  echo "The test-warehouse snapshot does not containa githash, aborting load"
+  exit 1
+fi
+
 echo "Copying data to HDFS"
 hadoop fs -put ${SNAPSHOT_STAGING_DIR}/test-warehouse/* ${TEST_WAREHOUSE_HDFS_DIR}
 
diff --git a/testdata/workloads/tpch/queries/tpch-q15.test b/testdata/workloads/tpch/queries/tpch-q15.test
index b586607f2..73634044b 100644
--- a/testdata/workloads/tpch/queries/tpch-q15.test
+++ b/testdata/workloads/tpch/queries/tpch-q15.test
@@ -35,4 +35,4 @@ order by
 8449,'Supplier#000008449','Wp34zim9qYFbVctdW','20-469-856-8873',1772627.2087
 ---- TYPES
 BIGINT, STRING, STRING, STRING, DECIMAL
-====
\ No newline at end of file
+====
diff --git a/tests/util/compute_table_stats.py b/tests/util/compute_table_stats.py
index 14bddf9a2..e4b2e2ab8 100755
--- a/tests/util/compute_table_stats.py
+++ b/tests/util/compute_table_stats.py
@@ -29,17 +29,10 @@ def compute_stats(impala_client, db_names=None, table_names=None,
 
   all_dbs = set(name.lower() for name in impala_client.execute("show databases").data)
   selected_dbs = all_dbs if db_names is None else set(db_names)
-  if db_names is not None:
-    print 'Skipping compute stats on databases:\n%s' % '\n'.join(all_dbs - selected_dbs)
-
   for db in all_dbs.intersection(selected_dbs):
     all_tables =\
         set([t.lower() for t in impala_client.execute("show tables in %s" % db).data])
     selected_tables = all_tables if table_names is None else set(table_names)
-    if table_names:
-      print 'Skipping compute stats on tables:\n%s' %\
-          '\n'.join(['%s.%s' % (db, tbl)  for tbl in all_tables - selected_tables])
-
     for table in all_tables.intersection(selected_tables):
       statement = "compute stats %s.%s" % (db, table)
       print 'Executing: %s' % statement