mirror of
https://github.com/apache/impala.git
synced 2026-01-07 18:02:33 -05:00
This change moves (almost) all the functional data loading to the new data loading framework. This removes the need for the create.sql, load.sql, and load-raw-data.sql file. Instead we just have the single schema template file: testdata/datasets/functional/functional_schema_template.sql This template can be used to generate the schema for all file formats and compression variations. It also should help make loading data easier. Now you can run: bin/load-impala-data.sh "query-test" "exhaustive" And get all data needed for running the query tests. This change also includes the initial changes for new dataset/workload directory structure. The new structure looks like: testdata/workload <- Will contain query files and test vectors/dimensions testdata/datasets <- WIll contain the data files and schema templates Note: This is the first part of the change to this directory structure - it's not yet complete. # Please enter the commit message for your changes. Lines starting
45 lines
1.3 KiB
Bash
Executable File
45 lines
1.3 KiB
Bash
Executable File
#!/bin/bash
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
|
|
if [ x${JAVA_HOME} == x ]; then
|
|
echo JAVA_HOME not set
|
|
exit 1
|
|
fi
|
|
|
|
set -e
|
|
|
|
# Load the data set
|
|
pushd ${IMPALA_HOME}/bin
|
|
|
|
./load-data.sh functional exhaustive
|
|
if [ $? != 0 ]; then
|
|
echo LOAD OF FUNCTIONAL DATA FAILED
|
|
exit 1
|
|
fi
|
|
|
|
./load-data.sh tpch core
|
|
if [ $? != 0 ]; then
|
|
echo LOAD OF TPCH DATA FAILED
|
|
exit 1
|
|
fi
|
|
popd
|
|
|
|
# TODO: The multi-format table will move these files. So we need to copy them to a temporary location
|
|
# for that table to use. Should find a better way to handle this.
|
|
echo COPYING DATA FOR DEPENDENT TABLES
|
|
hadoop fs -rm -r -f /tmp/alltypes_rc
|
|
hadoop fs -rm -r -f /tmp/alltypes_seq
|
|
hadoop fs -mkdir -p /tmp/alltypes_seq/year=2009
|
|
hadoop fs -mkdir -p /tmp/alltypes_rc/year=2009
|
|
hadoop fs -cp /test-warehouse/alltypes_seq/year=2009/month=2/ /tmp/alltypes_seq/year=2009
|
|
hadoop fs -cp /test-warehouse/alltypes_rc/year=2009/month=3/ /tmp/alltypes_rc/year=2009
|
|
|
|
# For tables that rely on loading data from local fs test-warehouse
|
|
# TODO: Find a good way to integrate this with the normal data loading scripts
|
|
${HIVE_HOME}/bin/hive -hiveconf hive.root.logger=WARN,console -v \
|
|
-f ${IMPALA_HOME}/testdata/bin/load-dependent-tables.sql
|
|
if [ $? != 0 ]; then
|
|
echo DEPENDENT LOAD FAILED
|
|
exit 1
|
|
fi
|