mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
Fix sequence file data loading for the alltypesmixedformat table
Moved this out of the data loading framework because it is kind of a special case. I will consider how we can update the framework to address mixed format tables.
This commit is contained in:
committed by
Henry Robinson
parent
cef688d0fd
commit
84d91fca4f
5
testdata/bin/create-load-data.sh
vendored
5
testdata/bin/create-load-data.sh
vendored
@@ -24,9 +24,10 @@ if [ $? != 0 ]; then
|
||||
fi
|
||||
popd
|
||||
|
||||
# TODO: The multi-format table will move these files. So we need to copy them to a temporary location
|
||||
# for that table to use. Should find a better way to handle this.
|
||||
# TODO: The multi-format table will move these files. So we need to copy them to a
|
||||
# temporary location for that table to use. Should find a better way to handle this.
|
||||
echo COPYING DATA FOR DEPENDENT TABLES
|
||||
hadoop fs -rm -r -f /test-warehouse/alltypesmixedformat
|
||||
hadoop fs -rm -r -f /tmp/alltypes_rc
|
||||
hadoop fs -rm -r -f /tmp/alltypes_seq
|
||||
hadoop fs -mkdir -p /tmp/alltypes_seq/year=2009
|
||||
|
||||
49
testdata/bin/load-dependent-tables.sql
vendored
49
testdata/bin/load-dependent-tables.sql
vendored
@@ -1,20 +1,49 @@
|
||||
-- Load tables that depend upon data in the hive test-warehouse already existing
|
||||
-- Create and load tables that depend upon data in the hive test-warehouse already existing
|
||||
|
||||
-- Load a mixed-format table. Hive behaves oddly when mixing formats,
|
||||
-- but the following incantation ensures that the result is a
|
||||
-- three-partition table. First is text format, second is sequence
|
||||
-- file, third is RC file. Must be called after test-warehouse is
|
||||
-- successfully populated
|
||||
DROP TABLE IF EXISTS alltypesmixedformat;
|
||||
CREATE EXTERNAL TABLE alltypesmixedformat (
|
||||
id int,
|
||||
bool_col boolean,
|
||||
tinyint_col tinyint,
|
||||
smallint_col smallint,
|
||||
int_col int,
|
||||
bigint_col bigint,
|
||||
float_col float,
|
||||
double_col double,
|
||||
date_string_col string,
|
||||
string_col string,
|
||||
timestamp_col timestamp)
|
||||
partitioned by (year int, month int)
|
||||
row format delimited fields terminated by ',' escaped by '\\'
|
||||
stored as TEXTFILE
|
||||
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/alltypesmixedformat';
|
||||
|
||||
INSERT OVERWRITE TABLE alltypesmixedformat PARTITION (year=2009, month=1)
|
||||
SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
|
||||
FROM alltypes WHERE year=2009 and month=1;
|
||||
SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
float_col, double_col, date_string_col, string_col, timestamp_col
|
||||
FROM alltypes
|
||||
WHERE year=2009 and month=1;
|
||||
|
||||
ALTER TABLE alltypesmixedformat SET FILEFORMAT SEQUENCEFILE;
|
||||
LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/' OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2);
|
||||
ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE;
|
||||
LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/' OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3);
|
||||
LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/'
|
||||
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2);
|
||||
|
||||
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1) SET FILEFORMAT TEXTFILE;
|
||||
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1) SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
|
||||
ALTER TABLE alltypesmixedformat PARTITION(year=2009, month=2) SET SERDEPROPERTIES('field.delim'='\001');
|
||||
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2) SET FILEFORMAT SEQUENCEFILE;
|
||||
ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE;
|
||||
LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/'
|
||||
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3);
|
||||
|
||||
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
|
||||
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
|
||||
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
|
||||
SET FILEFORMAT TEXTFILE;
|
||||
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
|
||||
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
|
||||
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
|
||||
SET FILEFORMAT SEQUENCEFILE;
|
||||
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3)
|
||||
SET FILEFORMAT RCFILE;
|
||||
|
||||
@@ -856,38 +856,6 @@ insert into table %(table_name)s SELECT id, bool_col, tinyint_col, smallint_col,
|
||||
====
|
||||
functional
|
||||
----
|
||||
alltypesmixedformat
|
||||
----
|
||||
CREATE EXTERNAL TABLE %(table_name)s (
|
||||
id int,
|
||||
bool_col boolean,
|
||||
tinyint_col tinyint,
|
||||
smallint_col smallint,
|
||||
int_col int,
|
||||
bigint_col bigint,
|
||||
float_col float,
|
||||
double_col double,
|
||||
date_string_col string,
|
||||
string_col string,
|
||||
timestamp_col timestamp)
|
||||
partitioned by (year int, month int)
|
||||
row format delimited fields terminated by ',' escaped by '\\'
|
||||
stored as %(file_format)s
|
||||
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/%(table_name)s';
|
||||
|
||||
ALTER TABLE %(table_name)s ADD PARTITION (year=2009, month=1);
|
||||
ALTER TABLE %(table_name)s ADD PARTITION (year=2009, month=2);
|
||||
ALTER TABLE %(table_name)s ADD PARTITION (year=2009, month=3);
|
||||
|
||||
ALTER TABLE %(table_name)s PARTITION (year=2009, month=1) SET FILEFORMAT TEXTFILE;
|
||||
ALTER TABLE %(table_name)s PARTITION (year=2009, month=1) SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
|
||||
ALTER TABLE %(table_name)s PARTITION(year=2009, month=2) SET SERDEPROPERTIES('field.delim'='\001');
|
||||
ALTER TABLE %(table_name)s PARTITION (year=2009, month=2) SET FILEFORMAT SEQUENCEFILE;
|
||||
----
|
||||
----
|
||||
====
|
||||
functional
|
||||
----
|
||||
stringpartitionkey
|
||||
----
|
||||
-- Regression for IMP-163, failure to load tables partitioned by string column
|
||||
|
||||
Reference in New Issue
Block a user