mirror of
https://github.com/apache/impala.git
synced 2026-01-05 12:01:11 -05:00
Logging file or table data is a bad idea, and doing it by default is particularly bad. This patch changes HdfsScanNode::LogRowParseError() to log a file and offset only. Testing: See rewritten tests. To support testing this change, we also fix IMPALA-3895, by introducing a canonical string __HDFS_FILENAME__ that all Hadoop filenames in the ERROR output are replaced with before comparing with the expected results. This fixes a number of issues with the old way of matching filenames which purported to be a regex, but really wasn't. In particular, we can now match the rest of an ERROR line after the filename, which was not possible before. In some cases, we don't want to substitute filenames because the ERROR output is looking for a very specific output. In that case we can write: $NAMENODE/<filename> and this patch will not perform _any_ filename substitutions on ERROR sections that contain the $NAMENODE string. Finally, this patch fixes a bug where a test that had an ERRORS section but no RESULTS section would silently pass without testing anything. Change-Id: I5a604f8784a9ff7b4bf878f82ee7f56697df3272 Reviewed-on: http://gerrit.cloudera.org:8080/4020 Reviewed-by: Henry Robinson <henry@cloudera.com> Tested-by: Internal Jenkins
169 lines
6.5 KiB
Plaintext
169 lines
6.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
## TODO: IMPALA-1862: Invalid bool value not reported as a scanner error
|
|
##
|
|
## TODO: the error info should be sufficient to pin point the data location: filename and
|
|
## offset
|
|
## TODO: printing the entire record will break column level security (when it is
|
|
## implemented).
|
|
#select id, bool_col, tinyint_col, smallint_col from alltypeserror
|
|
#---- ERRORS
|
|
#Error converting column: 3 to SMALLINT
|
|
#file: hdfs://regex:.$
|
|
#Error converting column: 2 to TINYINT
|
|
#file: hdfs://regex:.$
|
|
#Error converting column: 2 to TINYINT
|
|
#Error converting column: 3 to SMALLINT
|
|
#file: hdfs://regex:.$
|
|
#Error converting column: 2 to TINYINT
|
|
#file: hdfs://regex:.$
|
|
#Error converting column: 1 to BOOLEAN
|
|
#file: hdfs://regex:.$
|
|
#Error converting column: 2 to TINYINT
|
|
#file: hdfs://regex:.$
|
|
#Error converting column: 3 to SMALLINT
|
|
#file: hdfs://regex:.$
|
|
#Error converting column: 1 to BOOLEAN
|
|
#Error converting column: 2 to TINYINT
|
|
#Error converting column: 3 to SMALLINT
|
|
#file: hdfs://regex:.$
|
|
#
|
|
#---- RESULTS
|
|
#0,NULL,NULL,0
|
|
#1,NULL,NULL,1
|
|
#10,NULL,NULL,NULL
|
|
#11,false,NULL,NULL
|
|
#12,true,2,NULL
|
|
#13,false,3,3
|
|
#14,true,4,4
|
|
#15,false,NULL,5
|
|
#16,NULL,NULL,NULL
|
|
#17,false,7,7
|
|
#18,true,8,8
|
|
#19,false,9,9
|
|
#2,true,NULL,NULL
|
|
#20,true,0,0
|
|
#21,false,1,1
|
|
#22,true,2,2
|
|
#23,false,3,NULL
|
|
#24,true,4,4
|
|
#25,false,5,5
|
|
#26,true,6,6
|
|
#27,false,NULL,7
|
|
#28,true,8,8
|
|
#29,false,9,9
|
|
#3,false,3,NULL
|
|
#30,NULL,NULL,NULL
|
|
#4,true,4,4
|
|
#5,false,5,5
|
|
#6,true,6,6
|
|
#7,NULL,NULL,7
|
|
#8,false,NULL,NULL
|
|
#9,NULL,NULL,NULL
|
|
#---- TYPES
|
|
#int, boolean, tinyint, smallint
|
|
#====
|
|
#---- QUERY
|
|
select count(*) from functional_text_lzo.bad_text_lzo
|
|
---- ERRORS
|
|
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
|
|
---- RESULTS
|
|
5141
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(field) from functional_text_lzo.bad_text_lzo
|
|
---- ERRORS
|
|
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
|
|
---- RESULTS
|
|
5141
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select * from alltypeserrornonulls
|
|
---- ERRORS
|
|
Error converting column: 3 to SMALLINT
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 4 to INT
|
|
Error converting column: 10 to TIMESTAMP
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 2 to TINYINT
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 10 to TIMESTAMP
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 4 to INT
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 6 to FLOAT
|
|
Error converting column: 7 to DOUBLE
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 2 to TINYINT
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 7 to DOUBLE
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 10 to TIMESTAMP
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 1 to BOOLEAN
|
|
Error converting column: 10 to TIMESTAMP
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 2 to TINYINT
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 3 to SMALLINT
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 4 to INT
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 5 to BIGINT
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 6 to FLOAT
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 7 to DOUBLE
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
Error converting column: 1 to BOOLEAN
|
|
Error converting column: 2 to TINYINT
|
|
Error converting column: 3 to SMALLINT
|
|
Error converting column: 4 to INT
|
|
Error converting column: 5 to BIGINT
|
|
Error converting column: 6 to FLOAT
|
|
Error converting column: 7 to DOUBLE
|
|
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
|
---- RESULTS
|
|
0,true,0,0,0,0,0,0,'01/01/09','0',NULL,2009,1
|
|
1,NULL,1,1,1,10,1,10.1,'01/01/09','1',NULL,2009,1
|
|
2,true,NULL,2,2,20,2,20.2,'01/01/09','2',2012-03-22 11:20:01.123000000,2009,1
|
|
3,false,3,NULL,3,30,3,30.3,'01/01/09','3',2012-03-22 11:20:01.123000000,2009,1
|
|
4,true,4,4,NULL,40,4,40.4,'01/01/09','4',2012-03-22 11:20:01.123000000,2009,1
|
|
5,false,5,5,5,NULL,5,50.5,'01/01/09','5',2012-03-22 11:20:01.123000000,2009,1
|
|
6,true,6,6,6,60,NULL,60.6,'01/01/09','6',2012-03-22 11:20:01.123000000,2009,1
|
|
7,false,7,7,7,70,7,NULL,'01/01/09','7',2012-03-22 11:20:01.123000000,2009,1
|
|
8,false,8,8,8,80,8,80.8,'01/01/09','8',2012-03-22 11:20:01.123000000,2009,1
|
|
9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/09','9',2012-03-22 11:20:01.123000000,2009,1
|
|
10,true,0,0,0,0,0,0,'02/01/09','0',2012-03-22 11:20:01.123000000,2009,2
|
|
11,false,1,1,1,10,1,10.1,'02/01/09','1',2012-03-22 11:20:01.123000000,2009,2
|
|
12,true,2,2,2,20,2,20.2,'02/01/09','2',2012-03-22 11:20:01.123000000,2009,2
|
|
13,false,3,3,3,30,NULL,NULL,'02/01/09','3',2012-03-22 11:20:01.123000000,2009,2
|
|
14,true,4,4,4,40,4,40.4,'02/01/09','4',2012-03-22 11:20:01.123000000,2009,2
|
|
15,false,NULL,5,5,50,5,50.5,'02/01/09','5',2012-03-22 11:20:01.123000000,2009,2
|
|
16,true,6,6,6,60,6,60.6,'02/01/09','6',2012-03-22 11:20:01.123000000,2009,2
|
|
17,false,7,7,7,70,7,NULL,'02/01/09','7',2012-03-22 11:20:01.123000000,2009,2
|
|
18,true,8,8,8,80,8,80.8,'02/01/09','8',2012-03-22 11:20:01.123000000,2009,2
|
|
19,false,9,9,9,90,9,90.90000000000001,'02/01/09','9',2012-03-22 11:20:01.123000000,2009,2
|
|
20,true,0,0,0,0,0,0,'03/01/09','0',2012-03-22 11:20:01.123000000,2009,3
|
|
21,false,1,1,1,10,1,10.1,'03/01/09','1',2012-03-22 11:20:01.123000000,2009,3
|
|
22,true,2,2,2,20,2,20.2,'03/01/09','2',2012-03-22 11:20:01.123000000,2009,3
|
|
23,false,3,NULL,3,30,3,30.3,'03/01/09','3',2012-03-22 11:20:01.123000000,2009,3
|
|
24,true,4,4,4,40,4,40.4,'03/01/09','4',2012-03-22 11:20:01.123000000,2009,3
|
|
25,false,5,5,NULL,50,5,50.5,'03/01/09','5',NULL,2009,3
|
|
26,true,6,6,6,60,6,60.6,'03/01/09','6',2012-03-22 11:20:01.123000000,2009,3
|
|
27,false,NULL,7,7,70,7,70.7,'03/01/09','7',2012-03-22 11:20:01.123000000,2009,3
|
|
28,true,8,8,8,80,8,80.8,'03/01/09','8',NULL,2009,3
|
|
29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9',2012-03-22 00:00:00,2009,3
|
|
---- TYPES
|
|
int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_text_gzip.bad_text_gzip
|
|
---- CATCH
|
|
Unexpected end of compressed file. File may be truncated.
|
|
====
|