Fix sequence file data loading for the alltypesmixedformat table

Moved this out of the data loading framework because it is kind of a special
case. I will consider how we can update the framework to address mixed format
tables.
This commit is contained in:
Lenni Kuff
2012-08-02 09:38:14 -07:00
committed by Henry Robinson
parent cef688d0fd
commit 84d91fca4f
3 changed files with 42 additions and 44 deletions

View File

@@ -24,9 +24,10 @@ if [ $? != 0 ]; then
fi
popd
# TODO: The multi-format table will move these files. So we need to copy them to a temporary location
# for that table to use. Should find a better way to handle this.
# TODO: The multi-format table will move these files. So we need to copy them to a
# temporary location for that table to use. Should find a better way to handle this.
echo COPYING DATA FOR DEPENDENT TABLES
hadoop fs -rm -r -f /test-warehouse/alltypesmixedformat
hadoop fs -rm -r -f /tmp/alltypes_rc
hadoop fs -rm -r -f /tmp/alltypes_seq
hadoop fs -mkdir -p /tmp/alltypes_seq/year=2009

View File

@@ -1,20 +1,49 @@
-- Load tables that depend upon data in the hive test-warehouse already existing
-- Create and load tables that depend upon data in the hive test-warehouse already existing
-- Load a mixed-format table. Hive behaves oddly when mixing formats,
-- but the following incantation ensures that the result is a
-- three-partition table. First is text format, second is sequence
-- file, third is RC file. Must be called after test-warehouse is
-- successfully populated
DROP TABLE IF EXISTS alltypesmixedformat;
CREATE EXTERNAL TABLE alltypesmixedformat (
id int,
bool_col boolean,
tinyint_col tinyint,
smallint_col smallint,
int_col int,
bigint_col bigint,
float_col float,
double_col double,
date_string_col string,
string_col string,
timestamp_col timestamp)
partitioned by (year int, month int)
row format delimited fields terminated by ',' escaped by '\\'
stored as TEXTFILE
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/alltypesmixedformat';
INSERT OVERWRITE TABLE alltypesmixedformat PARTITION (year=2009, month=1)
SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
FROM alltypes WHERE year=2009 and month=1;
SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
FROM alltypes
WHERE year=2009 and month=1;
ALTER TABLE alltypesmixedformat SET FILEFORMAT SEQUENCEFILE;
LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/' OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2);
ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE;
LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/' OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3);
LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/'
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2);
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1) SET FILEFORMAT TEXTFILE;
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1) SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
ALTER TABLE alltypesmixedformat PARTITION(year=2009, month=2) SET SERDEPROPERTIES('field.delim'='\001');
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2) SET FILEFORMAT SEQUENCEFILE;
ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE;
LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/'
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3);
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
SET FILEFORMAT TEXTFILE;
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
SET FILEFORMAT SEQUENCEFILE;
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3)
SET FILEFORMAT RCFILE;

View File

@@ -856,38 +856,6 @@ insert into table %(table_name)s SELECT id, bool_col, tinyint_col, smallint_col,
====
functional
----
alltypesmixedformat
----
CREATE EXTERNAL TABLE %(table_name)s (
id int,
bool_col boolean,
tinyint_col tinyint,
smallint_col smallint,
int_col int,
bigint_col bigint,
float_col float,
double_col double,
date_string_col string,
string_col string,
timestamp_col timestamp)
partitioned by (year int, month int)
row format delimited fields terminated by ',' escaped by '\\'
stored as %(file_format)s
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/%(table_name)s';
ALTER TABLE %(table_name)s ADD PARTITION (year=2009, month=1);
ALTER TABLE %(table_name)s ADD PARTITION (year=2009, month=2);
ALTER TABLE %(table_name)s ADD PARTITION (year=2009, month=3);
ALTER TABLE %(table_name)s PARTITION (year=2009, month=1) SET FILEFORMAT TEXTFILE;
ALTER TABLE %(table_name)s PARTITION (year=2009, month=1) SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
ALTER TABLE %(table_name)s PARTITION(year=2009, month=2) SET SERDEPROPERTIES('field.delim'='\001');
ALTER TABLE %(table_name)s PARTITION (year=2009, month=2) SET FILEFORMAT SEQUENCEFILE;
----
----
====
functional
----
stringpartitionkey
----
-- Regression for IMP-163, failure to load tables partitioned by string column