From 84d91fca4fbfaedf3610b2f2e24add1855d35ab8 Mon Sep 17 00:00:00 2001 From: Lenni Kuff Date: Thu, 2 Aug 2012 09:38:14 -0700 Subject: [PATCH] Fix sequence file data loading for the alltypesmixedformat table Moved this out of the data loading framework because it is kind of a special case. I will consider how we can update the framework to address mixed format tables. --- testdata/bin/create-load-data.sh | 5 +- testdata/bin/load-dependent-tables.sql | 49 +++++++++++++++---- .../functional/functional_schema_template.sql | 32 ------------ 3 files changed, 42 insertions(+), 44 deletions(-) diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh index 58be60a7a..62fba0f80 100755 --- a/testdata/bin/create-load-data.sh +++ b/testdata/bin/create-load-data.sh @@ -24,9 +24,10 @@ if [ $? != 0 ]; then fi popd -# TODO: The multi-format table will move these files. So we need to copy them to a temporary location -# for that table to use. Should find a better way to handle this. +# TODO: The multi-format table will move these files. So we need to copy them to a +# temporary location for that table to use. Should find a better way to handle this. echo COPYING DATA FOR DEPENDENT TABLES +hadoop fs -rm -r -f /test-warehouse/alltypesmixedformat hadoop fs -rm -r -f /tmp/alltypes_rc hadoop fs -rm -r -f /tmp/alltypes_seq hadoop fs -mkdir -p /tmp/alltypes_seq/year=2009 diff --git a/testdata/bin/load-dependent-tables.sql b/testdata/bin/load-dependent-tables.sql index b49a46705..3965275ad 100644 --- a/testdata/bin/load-dependent-tables.sql +++ b/testdata/bin/load-dependent-tables.sql @@ -1,20 +1,49 @@ --- Load tables that depend upon data in the hive test-warehouse already existing +-- Create and load tables that depend upon data in the hive test-warehouse already existing -- Load a mixed-format table. Hive behaves oddly when mixing formats, -- but the following incantation ensures that the result is a -- three-partition table. First is text format, second is sequence -- file, third is RC file. Must be called after test-warehouse is -- successfully populated +DROP TABLE IF EXISTS alltypesmixedformat; +CREATE EXTERNAL TABLE alltypesmixedformat ( + id int, + bool_col boolean, + tinyint_col tinyint, + smallint_col smallint, + int_col int, + bigint_col bigint, + float_col float, + double_col double, + date_string_col string, + string_col string, + timestamp_col timestamp) +partitioned by (year int, month int) +row format delimited fields terminated by ',' escaped by '\\' +stored as TEXTFILE +LOCATION '${hiveconf:hive.metastore.warehouse.dir}/alltypesmixedformat'; + INSERT OVERWRITE TABLE alltypesmixedformat PARTITION (year=2009, month=1) -SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col -FROM alltypes WHERE year=2009 and month=1; +SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, +float_col, double_col, date_string_col, string_col, timestamp_col +FROM alltypes +WHERE year=2009 and month=1; ALTER TABLE alltypesmixedformat SET FILEFORMAT SEQUENCEFILE; -LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/' OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2); -ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE; -LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/' OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3); +LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/' +OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2); -ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1) SET FILEFORMAT TEXTFILE; -ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1) SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\'); -ALTER TABLE alltypesmixedformat PARTITION(year=2009, month=2) SET SERDEPROPERTIES('field.delim'='\001'); -ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2) SET FILEFORMAT SEQUENCEFILE; +ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE; +LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/' +OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3); + +ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1) + SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\'); +ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1) + SET FILEFORMAT TEXTFILE; +ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2) + SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\'); +ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2) + SET FILEFORMAT SEQUENCEFILE; +ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3) + SET FILEFORMAT RCFILE; diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 1f292ec4d..863680943 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -856,38 +856,6 @@ insert into table %(table_name)s SELECT id, bool_col, tinyint_col, smallint_col, ==== functional ---- -alltypesmixedformat ----- -CREATE EXTERNAL TABLE %(table_name)s ( - id int, - bool_col boolean, - tinyint_col tinyint, - smallint_col smallint, - int_col int, - bigint_col bigint, - float_col float, - double_col double, - date_string_col string, - string_col string, - timestamp_col timestamp) -partitioned by (year int, month int) -row format delimited fields terminated by ',' escaped by '\\' -stored as %(file_format)s -LOCATION '${hiveconf:hive.metastore.warehouse.dir}/%(table_name)s'; - -ALTER TABLE %(table_name)s ADD PARTITION (year=2009, month=1); -ALTER TABLE %(table_name)s ADD PARTITION (year=2009, month=2); -ALTER TABLE %(table_name)s ADD PARTITION (year=2009, month=3); - -ALTER TABLE %(table_name)s PARTITION (year=2009, month=1) SET FILEFORMAT TEXTFILE; -ALTER TABLE %(table_name)s PARTITION (year=2009, month=1) SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\'); -ALTER TABLE %(table_name)s PARTITION(year=2009, month=2) SET SERDEPROPERTIES('field.delim'='\001'); -ALTER TABLE %(table_name)s PARTITION (year=2009, month=2) SET FILEFORMAT SEQUENCEFILE; ----- ----- -==== -functional ----- stringpartitionkey ---- -- Regression for IMP-163, failure to load tables partitioned by string column