mirror of
https://github.com/apache/impala.git
synced 2026-01-02 12:00:33 -05:00
Fix a bug in which Impala only reads the first stream of a multi-stream bz2/gzip file. Changes the bz2 decoder to read the file in a streaming fashion rather than reading the entire file into memory before it can be decompressed. Change-Id: Icbe617d03a69953f0bf3aa0f7c30d34bc612f9f8 (cherry picked from commit b6d0b4e059329633dc50f1f73ebe35b7ac317a8e) Reviewed-on: http://gerrit.cloudera.org:8080/2219 Reviewed-by: Juan Yu <jyu@cloudera.com> Tested-by: Internal Jenkins
196 lines
8.4 KiB
Plaintext
196 lines
8.4 KiB
Plaintext
====
|
|
---- QUERY
|
|
## TODO: IMPALA-1862: Invalid bool value not reported as a scanner error
|
|
##
|
|
## TODO: the error info should be sufficient to pin point the data location: filename and
|
|
## offset
|
|
## TODO: printing the entire record will break column level security (when it is
|
|
## implemented).
|
|
#select id, bool_col, tinyint_col, smallint_col from alltypeserror
|
|
#---- ERRORS
|
|
#Error converting column: 3 TO SMALLINT (Data is: abc3)
|
|
#file: hdfs://regex:.$
|
|
#record: 23,false,3,abc3,3,30,3.000000,30.300000,03/01/09,3,2020-10-10 60:10:10.123
|
|
#Error converting column: 2 TO TINYINT (Data is: abc7)
|
|
#file: hdfs://regex:.$
|
|
#record: 27,false,abc7,7,7,70,7.000000,70.700000,03/01/09,7,2020-10-10 10:10:10.123
|
|
#Error converting column: 2 TO TINYINT (Data is: err30)
|
|
#Error converting column: 3 TO SMALLINT (Data is: err30)
|
|
#file: hdfs://regex:.$
|
|
#record: 30,t\rue,err30,err30,err30,err300,err30..000000,err300.900000,01/01/10,10,0000-01-01 00:00:00
|
|
#Error converting column: 2 TO TINYINT (Data is: xyz5)
|
|
#file: hdfs://regex:.$
|
|
#record: 15,false,xyz5,5,5,50,5.000000,50.500000,02/01/09,5,0009-01-01 00:00:00
|
|
#Error converting column: 1 TO BOOLEAN (Data is: errfalse)
|
|
#file: hdfs://regex:.$
|
|
#record: 1,errfalse,,1,1,10,1.000000,10.100000,01/01/09,1,1999-10-10
|
|
#Error converting column: 2 TO TINYINT (Data is: err2)
|
|
#file: hdfs://regex:.$
|
|
#record: 2,true,err2,,2,20,2.000000,20.200000,01/01/09,2,1999-10-10 90:10:10
|
|
#Error converting column: 3 TO SMALLINT (Data is: err3)
|
|
#file: hdfs://regex:.$
|
|
#record: 3,false,3,err3,,30,3.000000,30.300000,01/01/09,3,2002-14-10 00:00:00
|
|
#Error converting column: 1 TO BOOLEAN (Data is: errtrue)
|
|
#Error converting column: 2 TO TINYINT (Data is: err9)
|
|
#Error converting column: 3 TO SMALLINT (Data is: err9)
|
|
#file: hdfs://regex:.$
|
|
#record: 9,errtrue,err9,err9,err9,err90,err9.000000,err90.900000,01/01/09,9,0000-01-01 00:00:00
|
|
#
|
|
#---- RESULTS
|
|
#0,NULL,NULL,0
|
|
#1,NULL,NULL,1
|
|
#10,NULL,NULL,NULL
|
|
#11,false,NULL,NULL
|
|
#12,true,2,NULL
|
|
#13,false,3,3
|
|
#14,true,4,4
|
|
#15,false,NULL,5
|
|
#16,NULL,NULL,NULL
|
|
#17,false,7,7
|
|
#18,true,8,8
|
|
#19,false,9,9
|
|
#2,true,NULL,NULL
|
|
#20,true,0,0
|
|
#21,false,1,1
|
|
#22,true,2,2
|
|
#23,false,3,NULL
|
|
#24,true,4,4
|
|
#25,false,5,5
|
|
#26,true,6,6
|
|
#27,false,NULL,7
|
|
#28,true,8,8
|
|
#29,false,9,9
|
|
#3,false,3,NULL
|
|
#30,NULL,NULL,NULL
|
|
#4,true,4,4
|
|
#5,false,5,5
|
|
#6,true,6,6
|
|
#7,NULL,NULL,7
|
|
#8,false,NULL,NULL
|
|
#9,NULL,NULL,NULL
|
|
#---- TYPES
|
|
#int, boolean, tinyint, smallint
|
|
#====
|
|
#---- QUERY
|
|
select count(*) from functional_text_lzo.bad_text_lzo
|
|
---- ERRORS
|
|
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
|
|
---- RESULTS
|
|
5141
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(field) from functional_text_lzo.bad_text_lzo
|
|
---- ERRORS
|
|
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
|
|
---- RESULTS
|
|
5141
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select * from alltypeserrornonulls
|
|
---- ERRORS
|
|
|
|
Error converting column: 3 TO SMALLINT (Data is: abc3)
|
|
file: hdfs://regex:.$
|
|
record: 23,false,3,abc3,3,30,3.000000,30.300000,03/01/09,3,2012-03-22 11:20:01.123
|
|
Error converting column: 4 TO INT (Data is: abc5)
|
|
Error converting column: 10 TO TIMESTAMP (Data is: 2012-Mar-22 11:20:01.123)
|
|
file: hdfs://regex:.$
|
|
record: 25,false,5,5,abc5,50,5.000000,50.500000,03/01/09,5,2012-Mar-22 11:20:01.123
|
|
Error converting column: 2 TO TINYINT (Data is: abc7)
|
|
file: hdfs://regex:.$
|
|
record: 27,false,abc7,7,7,70,7.000000,70.700000,03/01/09,7,2012-03-22 11:20:01.123
|
|
Error converting column: 10 TO TIMESTAMP (Data is: 11:20:01.123 2012-03-22 )
|
|
file: hdfs://regex:.$
|
|
record: 28,true,8,8,8,80,8.000000,80.800000,03/01/09,8,11:20:01.123 2012-03-22
|
|
Error converting column: 4 TO INT (Data is: abc9)
|
|
file: hdfs://regex:.$
|
|
record: 29,false,9,9,abc9,90,9.000000,90.900000,03/01/09,9,2012-03-22
|
|
Error converting column: 6 TO FLOAT (Data is: xyz3.000000)
|
|
Error converting column: 7 TO DOUBLE (Data is: xyz30.300000)
|
|
file: hdfs://regex:.$
|
|
record: 13,false,3,3,3,30,xyz3.000000,xyz30.300000,02/01/09,3,2012-03-22 11:20:01.123
|
|
Error converting column: 2 TO TINYINT (Data is: xyz5)
|
|
file: hdfs://regex:.$
|
|
record: 15,false,xyz5,5,5,50,5.000000,50.500000,02/01/09,5,2012-03-22 11:20:01.123
|
|
Error converting column: 7 TO DOUBLE (Data is: xyz70.700000)
|
|
file: hdfs://regex:.$
|
|
record: 17,false,7,7,7,70,7.000000,xyz70.700000,02/01/09,7,2012-03-22 11:20:01.123
|
|
Error converting column: 10 TO TIMESTAMP (Data is: 123456)
|
|
file: hdfs://regex:.$
|
|
record: 0,true,0,0,0,0,0.000000,0.000000,01/01/09,0,123456
|
|
Error converting column: 1 TO BOOLEAN (Data is: errfalse)
|
|
Error converting column: 10 TO TIMESTAMP (Data is: 1990-00-01 10:10:10)
|
|
file: hdfs://regex:.$
|
|
record: 1,errfalse,1,1,1,10,1.000000,10.100000,01/01/09,1,1990-00-01 10:10:10
|
|
Error converting column: 2 TO TINYINT (Data is: err2)
|
|
file: hdfs://regex:.$
|
|
record: 2,true,err2,2,2,20,2.000000,20.200000,01/01/09,2,2012-03-22 11:20:01.123
|
|
Error converting column: 3 TO SMALLINT (Data is: err3)
|
|
file: hdfs://regex:.$
|
|
record: 3,false,3,err3,3,30,3.000000,30.300000,01/01/09,3,2012-03-22 11:20:01.123
|
|
Error converting column: 4 TO INT (Data is: err4)
|
|
file: hdfs://regex:.$
|
|
record: 4,true,4,4,err4,40,4.000000,40.400000,01/01/09,4,2012-03-22 11:20:01.123
|
|
Error converting column: 5 TO BIGINT (Data is: err50)
|
|
file: hdfs://regex:.$
|
|
record: 5,false,5,5,5,err50,5.000000,50.500000,01/01/09,5,2012-03-22 11:20:01.123
|
|
Error converting column: 6 TO FLOAT (Data is: err6.000000)
|
|
file: hdfs://regex:.$
|
|
record: 6,true,6,6,6,60,err6.000000,60.600000,01/01/09,6,2012-03-22 11:20:01.123
|
|
Error converting column: 7 TO DOUBLE (Data is: err70.700000)
|
|
file: hdfs://regex:.$
|
|
record: 7,false,7,7,7,70,7.000000,err70.700000,01/01/09,7,2012-03-22 11:20:01.123
|
|
Error converting column: 1 TO BOOLEAN (Data is: errtrue)
|
|
Error converting column: 2 TO TINYINT (Data is: err9)
|
|
Error converting column: 3 TO SMALLINT (Data is: err9)
|
|
Error converting column: 4 TO INT (Data is: err9)
|
|
Error converting column: 5 TO BIGINT (Data is: err90)
|
|
Error converting column: 6 TO FLOAT (Data is: err9.000000)
|
|
Error converting column: 7 TO DOUBLE (Data is: err90.900000)
|
|
file: hdfs://regex:.$
|
|
record: 9,errtrue,err9,err9,err9,err90,err9.000000,err90.900000,01/01/09,9,2012-03-22 11:20:01.123
|
|
|
|
---- RESULTS
|
|
0,true,0,0,0,0,0,0,'01/01/09','0',NULL,2009,1
|
|
1,NULL,1,1,1,10,1,10.1,'01/01/09','1',NULL,2009,1
|
|
2,true,NULL,2,2,20,2,20.2,'01/01/09','2',2012-03-22 11:20:01.123000000,2009,1
|
|
3,false,3,NULL,3,30,3,30.3,'01/01/09','3',2012-03-22 11:20:01.123000000,2009,1
|
|
4,true,4,4,NULL,40,4,40.4,'01/01/09','4',2012-03-22 11:20:01.123000000,2009,1
|
|
5,false,5,5,5,NULL,5,50.5,'01/01/09','5',2012-03-22 11:20:01.123000000,2009,1
|
|
6,true,6,6,6,60,NULL,60.6,'01/01/09','6',2012-03-22 11:20:01.123000000,2009,1
|
|
7,false,7,7,7,70,7,NULL,'01/01/09','7',2012-03-22 11:20:01.123000000,2009,1
|
|
8,false,8,8,8,80,8,80.8,'01/01/09','8',2012-03-22 11:20:01.123000000,2009,1
|
|
9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/09','9',2012-03-22 11:20:01.123000000,2009,1
|
|
10,true,0,0,0,0,0,0,'02/01/09','0',2012-03-22 11:20:01.123000000,2009,2
|
|
11,false,1,1,1,10,1,10.1,'02/01/09','1',2012-03-22 11:20:01.123000000,2009,2
|
|
12,true,2,2,2,20,2,20.2,'02/01/09','2',2012-03-22 11:20:01.123000000,2009,2
|
|
13,false,3,3,3,30,NULL,NULL,'02/01/09','3',2012-03-22 11:20:01.123000000,2009,2
|
|
14,true,4,4,4,40,4,40.4,'02/01/09','4',2012-03-22 11:20:01.123000000,2009,2
|
|
15,false,NULL,5,5,50,5,50.5,'02/01/09','5',2012-03-22 11:20:01.123000000,2009,2
|
|
16,true,6,6,6,60,6,60.6,'02/01/09','6',2012-03-22 11:20:01.123000000,2009,2
|
|
17,false,7,7,7,70,7,NULL,'02/01/09','7',2012-03-22 11:20:01.123000000,2009,2
|
|
18,true,8,8,8,80,8,80.8,'02/01/09','8',2012-03-22 11:20:01.123000000,2009,2
|
|
19,false,9,9,9,90,9,90.90000000000001,'02/01/09','9',2012-03-22 11:20:01.123000000,2009,2
|
|
20,true,0,0,0,0,0,0,'03/01/09','0',2012-03-22 11:20:01.123000000,2009,3
|
|
21,false,1,1,1,10,1,10.1,'03/01/09','1',2012-03-22 11:20:01.123000000,2009,3
|
|
22,true,2,2,2,20,2,20.2,'03/01/09','2',2012-03-22 11:20:01.123000000,2009,3
|
|
23,false,3,NULL,3,30,3,30.3,'03/01/09','3',2012-03-22 11:20:01.123000000,2009,3
|
|
24,true,4,4,4,40,4,40.4,'03/01/09','4',2012-03-22 11:20:01.123000000,2009,3
|
|
25,false,5,5,NULL,50,5,50.5,'03/01/09','5',NULL,2009,3
|
|
26,true,6,6,6,60,6,60.6,'03/01/09','6',2012-03-22 11:20:01.123000000,2009,3
|
|
27,false,NULL,7,7,70,7,70.7,'03/01/09','7',2012-03-22 11:20:01.123000000,2009,3
|
|
28,true,8,8,8,80,8,80.8,'03/01/09','8',NULL,2009,3
|
|
29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9',2012-03-22 00:00:00,2009,3
|
|
---- TYPES
|
|
int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_text_gzip.bad_text_gzip
|
|
---- CATCH
|
|
Unexpected end of compressed file. File may be truncated.
|
|
====
|