Files
impala/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test
Juan Yu c9b33ddf63 IMPALA-1886/IMPALA-2154: Add support for multi-stream bz2/gzip compressed files.
Fix a bug in which Impala only reads the first stream
of a multi-stream bz2/gzip file.
Changes the bz2 decoder to read the file in a streaming
fashion rather than reading the entire file into memory
before it can be decompressed.

Change-Id: Icbe617d03a69953f0bf3aa0f7c30d34bc612f9f8
(cherry picked from commit b6d0b4e059329633dc50f1f73ebe35b7ac317a8e)
Reviewed-on: http://gerrit.cloudera.org:8080/2219
Reviewed-by: Juan Yu <jyu@cloudera.com>
Tested-by: Internal Jenkins
2016-02-28 21:31:37 -08:00

196 lines
8.4 KiB
Plaintext

====
---- QUERY
## TODO: IMPALA-1862: Invalid bool value not reported as a scanner error
##
## TODO: the error info should be sufficient to pin point the data location: filename and
## offset
## TODO: printing the entire record will break column level security (when it is
## implemented).
#select id, bool_col, tinyint_col, smallint_col from alltypeserror
#---- ERRORS
#Error converting column: 3 TO SMALLINT (Data is: abc3)
#file: hdfs://regex:.$
#record: 23,false,3,abc3,3,30,3.000000,30.300000,03/01/09,3,2020-10-10 60:10:10.123
#Error converting column: 2 TO TINYINT (Data is: abc7)
#file: hdfs://regex:.$
#record: 27,false,abc7,7,7,70,7.000000,70.700000,03/01/09,7,2020-10-10 10:10:10.123
#Error converting column: 2 TO TINYINT (Data is: err30)
#Error converting column: 3 TO SMALLINT (Data is: err30)
#file: hdfs://regex:.$
#record: 30,t\rue,err30,err30,err30,err300,err30..000000,err300.900000,01/01/10,10,0000-01-01 00:00:00
#Error converting column: 2 TO TINYINT (Data is: xyz5)
#file: hdfs://regex:.$
#record: 15,false,xyz5,5,5,50,5.000000,50.500000,02/01/09,5,0009-01-01 00:00:00
#Error converting column: 1 TO BOOLEAN (Data is: errfalse)
#file: hdfs://regex:.$
#record: 1,errfalse,,1,1,10,1.000000,10.100000,01/01/09,1,1999-10-10
#Error converting column: 2 TO TINYINT (Data is: err2)
#file: hdfs://regex:.$
#record: 2,true,err2,,2,20,2.000000,20.200000,01/01/09,2,1999-10-10 90:10:10
#Error converting column: 3 TO SMALLINT (Data is: err3)
#file: hdfs://regex:.$
#record: 3,false,3,err3,,30,3.000000,30.300000,01/01/09,3,2002-14-10 00:00:00
#Error converting column: 1 TO BOOLEAN (Data is: errtrue)
#Error converting column: 2 TO TINYINT (Data is: err9)
#Error converting column: 3 TO SMALLINT (Data is: err9)
#file: hdfs://regex:.$
#record: 9,errtrue,err9,err9,err9,err90,err9.000000,err90.900000,01/01/09,9,0000-01-01 00:00:00
#
#---- RESULTS
#0,NULL,NULL,0
#1,NULL,NULL,1
#10,NULL,NULL,NULL
#11,false,NULL,NULL
#12,true,2,NULL
#13,false,3,3
#14,true,4,4
#15,false,NULL,5
#16,NULL,NULL,NULL
#17,false,7,7
#18,true,8,8
#19,false,9,9
#2,true,NULL,NULL
#20,true,0,0
#21,false,1,1
#22,true,2,2
#23,false,3,NULL
#24,true,4,4
#25,false,5,5
#26,true,6,6
#27,false,NULL,7
#28,true,8,8
#29,false,9,9
#3,false,3,NULL
#30,NULL,NULL,NULL
#4,true,4,4
#5,false,5,5
#6,true,6,6
#7,NULL,NULL,7
#8,false,NULL,NULL
#9,NULL,NULL,NULL
#---- TYPES
#int, boolean, tinyint, smallint
#====
#---- QUERY
select count(*) from functional_text_lzo.bad_text_lzo
---- ERRORS
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
---- RESULTS
5141
---- TYPES
bigint
====
---- QUERY
select count(field) from functional_text_lzo.bad_text_lzo
---- ERRORS
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
---- RESULTS
5141
---- TYPES
bigint
====
---- QUERY
select * from alltypeserrornonulls
---- ERRORS
Error converting column: 3 TO SMALLINT (Data is: abc3)
file: hdfs://regex:.$
record: 23,false,3,abc3,3,30,3.000000,30.300000,03/01/09,3,2012-03-22 11:20:01.123
Error converting column: 4 TO INT (Data is: abc5)
Error converting column: 10 TO TIMESTAMP (Data is: 2012-Mar-22 11:20:01.123)
file: hdfs://regex:.$
record: 25,false,5,5,abc5,50,5.000000,50.500000,03/01/09,5,2012-Mar-22 11:20:01.123
Error converting column: 2 TO TINYINT (Data is: abc7)
file: hdfs://regex:.$
record: 27,false,abc7,7,7,70,7.000000,70.700000,03/01/09,7,2012-03-22 11:20:01.123
Error converting column: 10 TO TIMESTAMP (Data is: 11:20:01.123 2012-03-22 )
file: hdfs://regex:.$
record: 28,true,8,8,8,80,8.000000,80.800000,03/01/09,8,11:20:01.123 2012-03-22
Error converting column: 4 TO INT (Data is: abc9)
file: hdfs://regex:.$
record: 29,false,9,9,abc9,90,9.000000,90.900000,03/01/09,9,2012-03-22
Error converting column: 6 TO FLOAT (Data is: xyz3.000000)
Error converting column: 7 TO DOUBLE (Data is: xyz30.300000)
file: hdfs://regex:.$
record: 13,false,3,3,3,30,xyz3.000000,xyz30.300000,02/01/09,3,2012-03-22 11:20:01.123
Error converting column: 2 TO TINYINT (Data is: xyz5)
file: hdfs://regex:.$
record: 15,false,xyz5,5,5,50,5.000000,50.500000,02/01/09,5,2012-03-22 11:20:01.123
Error converting column: 7 TO DOUBLE (Data is: xyz70.700000)
file: hdfs://regex:.$
record: 17,false,7,7,7,70,7.000000,xyz70.700000,02/01/09,7,2012-03-22 11:20:01.123
Error converting column: 10 TO TIMESTAMP (Data is: 123456)
file: hdfs://regex:.$
record: 0,true,0,0,0,0,0.000000,0.000000,01/01/09,0,123456
Error converting column: 1 TO BOOLEAN (Data is: errfalse)
Error converting column: 10 TO TIMESTAMP (Data is: 1990-00-01 10:10:10)
file: hdfs://regex:.$
record: 1,errfalse,1,1,1,10,1.000000,10.100000,01/01/09,1,1990-00-01 10:10:10
Error converting column: 2 TO TINYINT (Data is: err2)
file: hdfs://regex:.$
record: 2,true,err2,2,2,20,2.000000,20.200000,01/01/09,2,2012-03-22 11:20:01.123
Error converting column: 3 TO SMALLINT (Data is: err3)
file: hdfs://regex:.$
record: 3,false,3,err3,3,30,3.000000,30.300000,01/01/09,3,2012-03-22 11:20:01.123
Error converting column: 4 TO INT (Data is: err4)
file: hdfs://regex:.$
record: 4,true,4,4,err4,40,4.000000,40.400000,01/01/09,4,2012-03-22 11:20:01.123
Error converting column: 5 TO BIGINT (Data is: err50)
file: hdfs://regex:.$
record: 5,false,5,5,5,err50,5.000000,50.500000,01/01/09,5,2012-03-22 11:20:01.123
Error converting column: 6 TO FLOAT (Data is: err6.000000)
file: hdfs://regex:.$
record: 6,true,6,6,6,60,err6.000000,60.600000,01/01/09,6,2012-03-22 11:20:01.123
Error converting column: 7 TO DOUBLE (Data is: err70.700000)
file: hdfs://regex:.$
record: 7,false,7,7,7,70,7.000000,err70.700000,01/01/09,7,2012-03-22 11:20:01.123
Error converting column: 1 TO BOOLEAN (Data is: errtrue)
Error converting column: 2 TO TINYINT (Data is: err9)
Error converting column: 3 TO SMALLINT (Data is: err9)
Error converting column: 4 TO INT (Data is: err9)
Error converting column: 5 TO BIGINT (Data is: err90)
Error converting column: 6 TO FLOAT (Data is: err9.000000)
Error converting column: 7 TO DOUBLE (Data is: err90.900000)
file: hdfs://regex:.$
record: 9,errtrue,err9,err9,err9,err90,err9.000000,err90.900000,01/01/09,9,2012-03-22 11:20:01.123
---- RESULTS
0,true,0,0,0,0,0,0,'01/01/09','0',NULL,2009,1
1,NULL,1,1,1,10,1,10.1,'01/01/09','1',NULL,2009,1
2,true,NULL,2,2,20,2,20.2,'01/01/09','2',2012-03-22 11:20:01.123000000,2009,1
3,false,3,NULL,3,30,3,30.3,'01/01/09','3',2012-03-22 11:20:01.123000000,2009,1
4,true,4,4,NULL,40,4,40.4,'01/01/09','4',2012-03-22 11:20:01.123000000,2009,1
5,false,5,5,5,NULL,5,50.5,'01/01/09','5',2012-03-22 11:20:01.123000000,2009,1
6,true,6,6,6,60,NULL,60.6,'01/01/09','6',2012-03-22 11:20:01.123000000,2009,1
7,false,7,7,7,70,7,NULL,'01/01/09','7',2012-03-22 11:20:01.123000000,2009,1
8,false,8,8,8,80,8,80.8,'01/01/09','8',2012-03-22 11:20:01.123000000,2009,1
9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/09','9',2012-03-22 11:20:01.123000000,2009,1
10,true,0,0,0,0,0,0,'02/01/09','0',2012-03-22 11:20:01.123000000,2009,2
11,false,1,1,1,10,1,10.1,'02/01/09','1',2012-03-22 11:20:01.123000000,2009,2
12,true,2,2,2,20,2,20.2,'02/01/09','2',2012-03-22 11:20:01.123000000,2009,2
13,false,3,3,3,30,NULL,NULL,'02/01/09','3',2012-03-22 11:20:01.123000000,2009,2
14,true,4,4,4,40,4,40.4,'02/01/09','4',2012-03-22 11:20:01.123000000,2009,2
15,false,NULL,5,5,50,5,50.5,'02/01/09','5',2012-03-22 11:20:01.123000000,2009,2
16,true,6,6,6,60,6,60.6,'02/01/09','6',2012-03-22 11:20:01.123000000,2009,2
17,false,7,7,7,70,7,NULL,'02/01/09','7',2012-03-22 11:20:01.123000000,2009,2
18,true,8,8,8,80,8,80.8,'02/01/09','8',2012-03-22 11:20:01.123000000,2009,2
19,false,9,9,9,90,9,90.90000000000001,'02/01/09','9',2012-03-22 11:20:01.123000000,2009,2
20,true,0,0,0,0,0,0,'03/01/09','0',2012-03-22 11:20:01.123000000,2009,3
21,false,1,1,1,10,1,10.1,'03/01/09','1',2012-03-22 11:20:01.123000000,2009,3
22,true,2,2,2,20,2,20.2,'03/01/09','2',2012-03-22 11:20:01.123000000,2009,3
23,false,3,NULL,3,30,3,30.3,'03/01/09','3',2012-03-22 11:20:01.123000000,2009,3
24,true,4,4,4,40,4,40.4,'03/01/09','4',2012-03-22 11:20:01.123000000,2009,3
25,false,5,5,NULL,50,5,50.5,'03/01/09','5',NULL,2009,3
26,true,6,6,6,60,6,60.6,'03/01/09','6',2012-03-22 11:20:01.123000000,2009,3
27,false,NULL,7,7,70,7,70.7,'03/01/09','7',2012-03-22 11:20:01.123000000,2009,3
28,true,8,8,8,80,8,80.8,'03/01/09','8',NULL,2009,3
29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9',2012-03-22 00:00:00,2009,3
---- TYPES
int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int
====
---- QUERY
select count(*) from functional_text_gzip.bad_text_gzip
---- CATCH
Unexpected end of compressed file. File may be truncated.
====