Fixed data loading bugs, moved most tables out of load-dependent-tables

This commit is contained in:
Lenni Kuff
2013-03-20 13:45:12 -07:00
committed by Henry Robinson
parent 7584312540
commit 831ee529be
9 changed files with 99 additions and 62 deletions

View File

@@ -81,17 +81,19 @@ if [ $? != 0 ]; then
fi
# Load the index files for corrupted lzo data.
hadoop fs -rm -f /test-warehouse/bad_text_lzo/bad_text.lzo.index
hadoop fs -rm -f /test-warehouse/bad_text_lzo_text_lzo/bad_text.lzo.index
hadoop fs -put ${IMPALA_HOME}/testdata/bad_text_lzo/bad_text.lzo.index \
/test-warehouse/bad_text_lzo/
/test-warehouse/bad_text_lzo_text_lzo/
hadoop fs -rm -r -f /bad_text_lzo/
hadoop fs -mv /test-warehouse/bad_text_lzo/ /
hadoop fs -rm -r -f /bad_text_lzo_text_lzo/
hadoop fs -mv /test-warehouse/bad_text_lzo_text_lzo/ /
# Cleanup the old bad_text_lzo files, if they exist.
hadoop fs -rm -r -f /test-warehouse/bad_text_lzo/
# Index all lzo files in HDFS under /test-warehouse
${IMPALA_HOME}/testdata/bin/lzo_indexer.sh /test-warehouse
hadoop fs -mv /bad_text_lzo/ /test-warehouse/
hadoop fs -mv /bad_text_lzo_text_lzo/ /test-warehouse/
# Run compute stats over as many of the tables used in the Planner tests as possible.
# Due to Hive bugs HIVE-4119 and HIVE-4122, these tables need to be chosen carefully or

View File

@@ -291,12 +291,12 @@ class Statements(object):
self.load_base = list()
def write_to_file(self, filename):
# Only write to file if there's something to actually write
if self.create or self.load_base or self.load:
# Make sure we create the base tables first
output = self.create + self.load_base + self.load
with open(filename, 'w') as f:
f.write('\n\n'.join(output))
# Make sure we create the base tables first. It is important that we always write
# to the output file, even if there are no statements to generate. This makes sure
# the output file is empty and the user doesn't unexpectedly load some stale data.
output = self.create + self.load_base + self.load
with open(filename, 'w') as f:
f.write('\n\n'.join(output))
def generate_statements(output_name, test_vectors, sections,
schema_include_constraints, schema_exclude_constraints):
@@ -346,21 +346,21 @@ def generate_statements(output_name, test_vectors, sections,
# hive does not allow hyphenated table names.
if data_set == 'hive-benchmark':
db_name = '{0}{1}'.format('hivebenchmark', options.scale_factor)
db = '{0}{1}'.format(db_name, db_suffix)
data_path = os.path.join(options.hive_warehouse_dir, hdfs_location)
if table_names and (table_name.lower() not in table_names):
print 'Skipping table: %s' % table_name
print 'Skipping table: %s.%s' % (db, table_name)
continue
if schema_include_constraints[table_name.lower()] and \
table_format not in schema_include_constraints[table_name.lower()]:
print 'Skipping \'%s\' due to include constraint match' % table_name
print 'Skipping \'%s.%s\' due to include constraint match' % (db, table_name)
continue
if schema_exclude_constraints[base_table_name.lower()] and\
table_format in schema_exclude_constraints[base_table_name.lower()]:
print 'Skipping \'%s\' due to exclude constraint match' % table_name
print 'Skipping \'%s.%s\' due to exclude constraint match' % (db, table_name)
continue
# If a CREATE section is provided, use that. Otherwise a COLUMNS section
@@ -390,8 +390,7 @@ def generate_statements(output_name, test_vectors, sections,
# The ALTER statement in hive does not accept fully qualified table names.
# We need the use statement.
if alter:
use_table = 'USE {db_name}{db_suffix};\n'.format(db_name=db_name,
db_suffix=db_suffix)
use_table = 'USE {db_name};\n'.format(db_name=db)
output.create.append(use_table + alter.format(table_name=table_name))
# If the directory already exists in HDFS, assume that data files already exist

View File

@@ -49,26 +49,6 @@ ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3)
SET FILEFORMAT RCFILE;
-- Not really dependent: this table contains format errors and
-- is accessed by the unit test: sequence-file-recover-test.
CREATE DATABASE IF NOT EXISTS functional_seq_snap;
USE functional_seq_snap;
DROP TABLE IF EXISTS bad_seq_snap;
CREATE EXTERNAL TABLE bad_seq_snap (field string) stored as SEQUENCEFILE
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/bad_seq_snap';
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/bad_seq_snap/bad_file' OVERWRITE INTO TABLE bad_seq_snap;
--- Error recovery test data for LZO compression.
CREATE DATABASE IF NOT EXISTS functional_text_lzo;
USE functional_text_lzo;
DROP TABLE IF EXISTS bad_text;
CREATE EXTERNAL TABLE bad_text (field string) stored as
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/bad_text_lzo';
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/bad_text_lzo/bad_text.lzo' OVERWRITE INTO TABLE bad_text;
----
-- Used by CatalogTest to confirm that non-external HBase tables are identified
-- correctly (IMP-581)
@@ -78,23 +58,6 @@ USE functional;
CREATE TABLE internal_hbase_table(key int, value string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val");
----
-- For structured-type testing
DROP TABLE IF EXISTS map_table;
CREATE TABLE map_table(map_col map<int, string>);
DROP TABLE IF EXISTS array_table;
CREATE TABLE array_table(array_col array<int>);
----
-- Create a table to test older rc files (pre hive9). The header for those files are
-- different.
CREATE DATABASE IF NOT EXISTS functional_rc;
USE functional_rc;
DROP TABLE IF EXISTS old_rcfile_table;
CREATE EXTERNAL TABLE old_rcfile_table(key int, value string)
STORED AS RCFILE
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/old_rcfile';
LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/data/oldrcfile.rc' OVERWRITE into table old_rcfile_table;
---- Unsupported Impala table types
USE functional;

View File

@@ -1020,3 +1020,58 @@ INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}
---- LOAD
LOAD DATA LOCAL INPATH '{impala_home}/testdata/UnsupportedTypes/data.csv' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
functional
---- BASE_TABLE_NAME
old_rcfile_table
---- COLUMNS
key INT
value STRING
---- DEPENDENT_LOAD
LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/data/oldrcfile.rc' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
functional
---- BASE_TABLE_NAME
bad_text_lzo
---- COLUMNS
field STRING
---- DEPENDENT_LOAD
-- Error recovery test data for LZO compression.
LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_text_lzo/bad_text.lzo' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
functional
---- BASE_TABLE_NAME
bad_seq_snap
---- COLUMNS
field STRING
---- DEPENDENT_LOAD
-- This data file contains format errors and is accessed by the unit test: sequence-file-recover-test.
LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/bad_seq_snap/bad_file' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
functional
---- BASE_TABLE_NAME
map_table
---- CREATE
-- For structured-type testing
DROP TABLE IF EXISTS map_table;
CREATE TABLE {db_name}{db_suffix}.{table_name} (map_col map<int, string>);
====
---- DATASET
functional
---- BASE_TABLE_NAME
array_table
---- CREATE
-- For structured-type testing
CREATE TABLE {db_name}{db_suffix}.{table_name} (array_col array<int>);
====
---- DATASET
functional
---- BASE_TABLE_NAME
array_table
---- CREATE
-- For structured-type testing
CREATE TABLE {db_name}{db_suffix}.{table_name} (array_col array<int>);
====

View File

@@ -7,7 +7,14 @@ table_name:hbasealltypesagg, constraint:restrict_to, table_format:text/none/none
table_name:hbasealltypeserror, constraint:restrict_to, table_format:text/none/none
table_name:hbasealltypeserrornonulls, constraint:restrict_to, table_format:text/none/none
table_name:hbasestringids, constraint:restrict_to, table_format:text/none/none
table_name:escapechartesttable, constraint:exclude, file_format:parquet
table_name:nulltable, constraint:exclude, file_format:parquet
table_name:nullescapedtable, constraint:exclude, file_format:parquet
table_name:TblWithRaggedColumns, constraint:exclude, file_format:parquet
table_name:old_rcfile_table, constraint:restrict_to, table_format:rc/none/none
table_name:bad_text_lzo, constraint:restrict_to, table_format:text/lzo/block
table_name:bad_seq_snap, constraint:restrict_to, table_format:seq/snap/block
table_name:map_table, constraint:restrict_to, table_format:text/none/none
table_name:array_table, constraint:restrict_to, table_format:text/none/none
table_name:escapechartesttable, constraint:exclude, table_format:parquet
table_name:nulltable, constraint:exclude, table_format:parquet
table_name:nullescapedtable, constraint:exclude, table_format:parquet
table_name:TblWithRaggedColumns, constraint:exclude, table_format:parquet
1 # Table level constraints:
7 table_name:hbasealltypeserror, constraint:restrict_to, table_format:text/none/none
8 table_name:hbasealltypeserrornonulls, constraint:restrict_to, table_format:text/none/none
9 table_name:hbasestringids, constraint:restrict_to, table_format:text/none/none
10 table_name:escapechartesttable, constraint:exclude, file_format:parquet table_name:old_rcfile_table, constraint:restrict_to, table_format:rc/none/none
11 table_name:nulltable, constraint:exclude, file_format:parquet table_name:bad_text_lzo, constraint:restrict_to, table_format:text/lzo/block
12 table_name:nullescapedtable, constraint:exclude, file_format:parquet table_name:bad_seq_snap, constraint:restrict_to, table_format:seq/snap/block
13 table_name:TblWithRaggedColumns, constraint:exclude, file_format:parquet table_name:map_table, constraint:restrict_to, table_format:text/none/none
14 table_name:array_table, constraint:restrict_to, table_format:text/none/none
15 table_name:escapechartesttable, constraint:exclude, table_format:parquet
16 table_name:nulltable, constraint:exclude, table_format:parquet
17 table_name:nullescapedtable, constraint:exclude, table_format:parquet
18 table_name:TblWithRaggedColumns, constraint:exclude, table_format:parquet
19
20

View File

@@ -174,7 +174,7 @@ int, int, int, boolean, tinyint, smallint, int, bigint, float, double, string, s
2009,3,29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9',2012-03-22 00:00:00
====
---- QUERY
select count(*) from functional_text_lzo.bad_text
select count(*) from functional_text_lzo.bad_text_lzo
---- ERRORS
Blocksize: 536870911 is greater than MAX_BLOCK_SIZE: 67108864
---- TYPES

View File

@@ -10,7 +10,7 @@ Format error in record or block header at offset: 1784325
Format error in record or block header at offset: 1790563
Format error in record or block header at offset: 1791244
Format error in record or block header at end of file.
First error while processing: hdfs: test-warehouse/bad_seq_snap/bad_file at offset: 899514
First error while processing: hdfs: test-warehouse/bad_seq_snap_seq_snap/bad_file at offset: 899514
---- TYPES
bigint
---- RESULTS