mirror of
https://github.com/apache/impala.git
synced 2026-01-06 15:01:43 -05:00
Fixed data loading bugs, moved most tables out of load-dependent-tables
This commit is contained in:
committed by
Henry Robinson
parent
7584312540
commit
831ee529be
12
testdata/bin/create-load-data.sh
vendored
12
testdata/bin/create-load-data.sh
vendored
@@ -81,17 +81,19 @@ if [ $? != 0 ]; then
|
||||
fi
|
||||
|
||||
# Load the index files for corrupted lzo data.
|
||||
hadoop fs -rm -f /test-warehouse/bad_text_lzo/bad_text.lzo.index
|
||||
hadoop fs -rm -f /test-warehouse/bad_text_lzo_text_lzo/bad_text.lzo.index
|
||||
hadoop fs -put ${IMPALA_HOME}/testdata/bad_text_lzo/bad_text.lzo.index \
|
||||
/test-warehouse/bad_text_lzo/
|
||||
/test-warehouse/bad_text_lzo_text_lzo/
|
||||
|
||||
hadoop fs -rm -r -f /bad_text_lzo/
|
||||
hadoop fs -mv /test-warehouse/bad_text_lzo/ /
|
||||
hadoop fs -rm -r -f /bad_text_lzo_text_lzo/
|
||||
hadoop fs -mv /test-warehouse/bad_text_lzo_text_lzo/ /
|
||||
# Cleanup the old bad_text_lzo files, if they exist.
|
||||
hadoop fs -rm -r -f /test-warehouse/bad_text_lzo/
|
||||
|
||||
# Index all lzo files in HDFS under /test-warehouse
|
||||
${IMPALA_HOME}/testdata/bin/lzo_indexer.sh /test-warehouse
|
||||
|
||||
hadoop fs -mv /bad_text_lzo/ /test-warehouse/
|
||||
hadoop fs -mv /bad_text_lzo_text_lzo/ /test-warehouse/
|
||||
|
||||
# Run compute stats over as many of the tables used in the Planner tests as possible.
|
||||
# Due to Hive bugs HIVE-4119 and HIVE-4122, these tables need to be chosen carefully or
|
||||
|
||||
23
testdata/bin/generate-schema-statements.py
vendored
23
testdata/bin/generate-schema-statements.py
vendored
@@ -291,12 +291,12 @@ class Statements(object):
|
||||
self.load_base = list()
|
||||
|
||||
def write_to_file(self, filename):
|
||||
# Only write to file if there's something to actually write
|
||||
if self.create or self.load_base or self.load:
|
||||
# Make sure we create the base tables first
|
||||
output = self.create + self.load_base + self.load
|
||||
with open(filename, 'w') as f:
|
||||
f.write('\n\n'.join(output))
|
||||
# Make sure we create the base tables first. It is important that we always write
|
||||
# to the output file, even if there are no statements to generate. This makes sure
|
||||
# the output file is empty and the user doesn't unexpectedly load some stale data.
|
||||
output = self.create + self.load_base + self.load
|
||||
with open(filename, 'w') as f:
|
||||
f.write('\n\n'.join(output))
|
||||
|
||||
def generate_statements(output_name, test_vectors, sections,
|
||||
schema_include_constraints, schema_exclude_constraints):
|
||||
@@ -346,21 +346,21 @@ def generate_statements(output_name, test_vectors, sections,
|
||||
# hive does not allow hyphenated table names.
|
||||
if data_set == 'hive-benchmark':
|
||||
db_name = '{0}{1}'.format('hivebenchmark', options.scale_factor)
|
||||
|
||||
db = '{0}{1}'.format(db_name, db_suffix)
|
||||
data_path = os.path.join(options.hive_warehouse_dir, hdfs_location)
|
||||
|
||||
if table_names and (table_name.lower() not in table_names):
|
||||
print 'Skipping table: %s' % table_name
|
||||
print 'Skipping table: %s.%s' % (db, table_name)
|
||||
continue
|
||||
|
||||
if schema_include_constraints[table_name.lower()] and \
|
||||
table_format not in schema_include_constraints[table_name.lower()]:
|
||||
print 'Skipping \'%s\' due to include constraint match' % table_name
|
||||
print 'Skipping \'%s.%s\' due to include constraint match' % (db, table_name)
|
||||
continue
|
||||
|
||||
if schema_exclude_constraints[base_table_name.lower()] and\
|
||||
table_format in schema_exclude_constraints[base_table_name.lower()]:
|
||||
print 'Skipping \'%s\' due to exclude constraint match' % table_name
|
||||
print 'Skipping \'%s.%s\' due to exclude constraint match' % (db, table_name)
|
||||
continue
|
||||
|
||||
# If a CREATE section is provided, use that. Otherwise a COLUMNS section
|
||||
@@ -390,8 +390,7 @@ def generate_statements(output_name, test_vectors, sections,
|
||||
# The ALTER statement in hive does not accept fully qualified table names.
|
||||
# We need the use statement.
|
||||
if alter:
|
||||
use_table = 'USE {db_name}{db_suffix};\n'.format(db_name=db_name,
|
||||
db_suffix=db_suffix)
|
||||
use_table = 'USE {db_name};\n'.format(db_name=db)
|
||||
output.create.append(use_table + alter.format(table_name=table_name))
|
||||
|
||||
# If the directory already exists in HDFS, assume that data files already exist
|
||||
|
||||
37
testdata/bin/load-dependent-tables.sql
vendored
37
testdata/bin/load-dependent-tables.sql
vendored
@@ -49,26 +49,6 @@ ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
|
||||
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3)
|
||||
SET FILEFORMAT RCFILE;
|
||||
|
||||
-- Not really dependent: this table contains format errors and
|
||||
-- is accessed by the unit test: sequence-file-recover-test.
|
||||
CREATE DATABASE IF NOT EXISTS functional_seq_snap;
|
||||
USE functional_seq_snap;
|
||||
DROP TABLE IF EXISTS bad_seq_snap;
|
||||
CREATE EXTERNAL TABLE bad_seq_snap (field string) stored as SEQUENCEFILE
|
||||
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/bad_seq_snap';
|
||||
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/bad_seq_snap/bad_file' OVERWRITE INTO TABLE bad_seq_snap;
|
||||
|
||||
--- Error recovery test data for LZO compression.
|
||||
CREATE DATABASE IF NOT EXISTS functional_text_lzo;
|
||||
USE functional_text_lzo;
|
||||
DROP TABLE IF EXISTS bad_text;
|
||||
CREATE EXTERNAL TABLE bad_text (field string) stored as
|
||||
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
|
||||
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
|
||||
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/bad_text_lzo';
|
||||
|
||||
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/bad_text_lzo/bad_text.lzo' OVERWRITE INTO TABLE bad_text;
|
||||
|
||||
----
|
||||
-- Used by CatalogTest to confirm that non-external HBase tables are identified
|
||||
-- correctly (IMP-581)
|
||||
@@ -78,23 +58,6 @@ USE functional;
|
||||
CREATE TABLE internal_hbase_table(key int, value string)
|
||||
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
|
||||
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val");
|
||||
----
|
||||
-- For structured-type testing
|
||||
DROP TABLE IF EXISTS map_table;
|
||||
CREATE TABLE map_table(map_col map<int, string>);
|
||||
DROP TABLE IF EXISTS array_table;
|
||||
CREATE TABLE array_table(array_col array<int>);
|
||||
|
||||
----
|
||||
-- Create a table to test older rc files (pre hive9). The header for those files are
|
||||
-- different.
|
||||
CREATE DATABASE IF NOT EXISTS functional_rc;
|
||||
USE functional_rc;
|
||||
DROP TABLE IF EXISTS old_rcfile_table;
|
||||
CREATE EXTERNAL TABLE old_rcfile_table(key int, value string)
|
||||
STORED AS RCFILE
|
||||
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/old_rcfile';
|
||||
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/data/oldrcfile.rc' OVERWRITE into table old_rcfile_table;
|
||||
|
||||
---- Unsupported Impala table types
|
||||
USE functional;
|
||||
|
||||
@@ -1020,3 +1020,58 @@ INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}
|
||||
---- LOAD
|
||||
LOAD DATA LOCAL INPATH '{impala_home}/testdata/UnsupportedTypes/data.csv' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
old_rcfile_table
|
||||
---- COLUMNS
|
||||
key INT
|
||||
value STRING
|
||||
---- DEPENDENT_LOAD
|
||||
LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/data/oldrcfile.rc' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
bad_text_lzo
|
||||
---- COLUMNS
|
||||
field STRING
|
||||
---- DEPENDENT_LOAD
|
||||
-- Error recovery test data for LZO compression.
|
||||
LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_text_lzo/bad_text.lzo' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
bad_seq_snap
|
||||
---- COLUMNS
|
||||
field STRING
|
||||
---- DEPENDENT_LOAD
|
||||
-- This data file contains format errors and is accessed by the unit test: sequence-file-recover-test.
|
||||
LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_seq_snap/bad_file' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
map_table
|
||||
---- CREATE
|
||||
-- For structured-type testing
|
||||
DROP TABLE IF EXISTS map_table;
|
||||
CREATE TABLE {db_name}{db_suffix}.{table_name} (map_col map<int, string>);
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
array_table
|
||||
---- CREATE
|
||||
-- For structured-type testing
|
||||
CREATE TABLE {db_name}{db_suffix}.{table_name} (array_col array<int>);
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
array_table
|
||||
---- CREATE
|
||||
-- For structured-type testing
|
||||
CREATE TABLE {db_name}{db_suffix}.{table_name} (array_col array<int>);
|
||||
====
|
||||
|
||||
@@ -7,7 +7,14 @@ table_name:hbasealltypesagg, constraint:restrict_to, table_format:text/none/none
|
||||
table_name:hbasealltypeserror, constraint:restrict_to, table_format:text/none/none
|
||||
table_name:hbasealltypeserrornonulls, constraint:restrict_to, table_format:text/none/none
|
||||
table_name:hbasestringids, constraint:restrict_to, table_format:text/none/none
|
||||
table_name:escapechartesttable, constraint:exclude, file_format:parquet
|
||||
table_name:nulltable, constraint:exclude, file_format:parquet
|
||||
table_name:nullescapedtable, constraint:exclude, file_format:parquet
|
||||
table_name:TblWithRaggedColumns, constraint:exclude, file_format:parquet
|
||||
table_name:old_rcfile_table, constraint:restrict_to, table_format:rc/none/none
|
||||
table_name:bad_text_lzo, constraint:restrict_to, table_format:text/lzo/block
|
||||
table_name:bad_seq_snap, constraint:restrict_to, table_format:seq/snap/block
|
||||
|
||||
table_name:map_table, constraint:restrict_to, table_format:text/none/none
|
||||
table_name:array_table, constraint:restrict_to, table_format:text/none/none
|
||||
|
||||
table_name:escapechartesttable, constraint:exclude, table_format:parquet
|
||||
table_name:nulltable, constraint:exclude, table_format:parquet
|
||||
table_name:nullescapedtable, constraint:exclude, table_format:parquet
|
||||
table_name:TblWithRaggedColumns, constraint:exclude, table_format:parquet
|
||||
|
||||
|
@@ -174,7 +174,7 @@ int, int, int, boolean, tinyint, smallint, int, bigint, float, double, string, s
|
||||
2009,3,29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9',2012-03-22 00:00:00
|
||||
====
|
||||
---- QUERY
|
||||
select count(*) from functional_text_lzo.bad_text
|
||||
select count(*) from functional_text_lzo.bad_text_lzo
|
||||
---- ERRORS
|
||||
Blocksize: 536870911 is greater than MAX_BLOCK_SIZE: 67108864
|
||||
---- TYPES
|
||||
|
||||
@@ -10,7 +10,7 @@ Format error in record or block header at offset: 1784325
|
||||
Format error in record or block header at offset: 1790563
|
||||
Format error in record or block header at offset: 1791244
|
||||
Format error in record or block header at end of file.
|
||||
First error while processing: hdfs: test-warehouse/bad_seq_snap/bad_file at offset: 899514
|
||||
First error while processing: hdfs: test-warehouse/bad_seq_snap_seq_snap/bad_file at offset: 899514
|
||||
---- TYPES
|
||||
bigint
|
||||
---- RESULTS
|
||||
|
||||
Reference in New Issue
Block a user