Files
impala/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test
Henry Robinson 34b5f1c416 IMPALA-(3895,3859): Don't log file data on parse errors
Logging file or table data is a bad idea, and doing it by default is
particularly bad. This patch changes HdfsScanNode::LogRowParseError() to
log a file and offset only.

Testing: See rewritten tests.

To support testing this change, we also fix IMPALA-3895, by introducing
a canonical string __HDFS_FILENAME__ that all Hadoop filenames in the ERROR
output are replaced with before comparing with the expected
results. This fixes a number of issues with the old way of matching
filenames which purported to be a regex, but really wasn't. In
particular, we can now match the rest of an ERROR line after the
filename, which was not possible before.

In some cases, we don't want to substitute filenames because the ERROR
output is looking for a very specific output. In that case we can write:

$NAMENODE/<filename>

and this patch will not perform _any_ filename substitutions on ERROR
sections that contain the $NAMENODE string.

Finally, this patch fixes a bug where a test that had an ERRORS section
but no RESULTS section would silently pass without testing anything.

Change-Id: I5a604f8784a9ff7b4bf878f82ee7f56697df3272
Reviewed-on: http://gerrit.cloudera.org:8080/4020
Reviewed-by: Henry Robinson <henry@cloudera.com>
Tested-by: Internal Jenkins
2016-08-25 10:20:36 +00:00

169 lines
6.5 KiB
Plaintext

====
---- QUERY
## TODO: IMPALA-1862: Invalid bool value not reported as a scanner error
##
## TODO: the error info should be sufficient to pin point the data location: filename and
## offset
## TODO: printing the entire record will break column level security (when it is
## implemented).
#select id, bool_col, tinyint_col, smallint_col from alltypeserror
#---- ERRORS
#Error converting column: 3 to SMALLINT
#file: hdfs://regex:.$
#Error converting column: 2 to TINYINT
#file: hdfs://regex:.$
#Error converting column: 2 to TINYINT
#Error converting column: 3 to SMALLINT
#file: hdfs://regex:.$
#Error converting column: 2 to TINYINT
#file: hdfs://regex:.$
#Error converting column: 1 to BOOLEAN
#file: hdfs://regex:.$
#Error converting column: 2 to TINYINT
#file: hdfs://regex:.$
#Error converting column: 3 to SMALLINT
#file: hdfs://regex:.$
#Error converting column: 1 to BOOLEAN
#Error converting column: 2 to TINYINT
#Error converting column: 3 to SMALLINT
#file: hdfs://regex:.$
#
#---- RESULTS
#0,NULL,NULL,0
#1,NULL,NULL,1
#10,NULL,NULL,NULL
#11,false,NULL,NULL
#12,true,2,NULL
#13,false,3,3
#14,true,4,4
#15,false,NULL,5
#16,NULL,NULL,NULL
#17,false,7,7
#18,true,8,8
#19,false,9,9
#2,true,NULL,NULL
#20,true,0,0
#21,false,1,1
#22,true,2,2
#23,false,3,NULL
#24,true,4,4
#25,false,5,5
#26,true,6,6
#27,false,NULL,7
#28,true,8,8
#29,false,9,9
#3,false,3,NULL
#30,NULL,NULL,NULL
#4,true,4,4
#5,false,5,5
#6,true,6,6
#7,NULL,NULL,7
#8,false,NULL,NULL
#9,NULL,NULL,NULL
#---- TYPES
#int, boolean, tinyint, smallint
#====
#---- QUERY
select count(*) from functional_text_lzo.bad_text_lzo
---- ERRORS
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
---- RESULTS
5141
---- TYPES
bigint
====
---- QUERY
select count(field) from functional_text_lzo.bad_text_lzo
---- ERRORS
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
---- RESULTS
5141
---- TYPES
bigint
====
---- QUERY
select * from alltypeserrornonulls
---- ERRORS
Error converting column: 3 to SMALLINT
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 4 to INT
Error converting column: 10 to TIMESTAMP
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 2 to TINYINT
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 10 to TIMESTAMP
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 4 to INT
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 6 to FLOAT
Error converting column: 7 to DOUBLE
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 2 to TINYINT
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 7 to DOUBLE
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 10 to TIMESTAMP
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 1 to BOOLEAN
Error converting column: 10 to TIMESTAMP
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 2 to TINYINT
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 3 to SMALLINT
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 4 to INT
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 5 to BIGINT
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 6 to FLOAT
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 7 to DOUBLE
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
Error converting column: 1 to BOOLEAN
Error converting column: 2 to TINYINT
Error converting column: 3 to SMALLINT
Error converting column: 4 to INT
Error converting column: 5 to BIGINT
Error converting column: 6 to FLOAT
Error converting column: 7 to DOUBLE
row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
---- RESULTS
0,true,0,0,0,0,0,0,'01/01/09','0',NULL,2009,1
1,NULL,1,1,1,10,1,10.1,'01/01/09','1',NULL,2009,1
2,true,NULL,2,2,20,2,20.2,'01/01/09','2',2012-03-22 11:20:01.123000000,2009,1
3,false,3,NULL,3,30,3,30.3,'01/01/09','3',2012-03-22 11:20:01.123000000,2009,1
4,true,4,4,NULL,40,4,40.4,'01/01/09','4',2012-03-22 11:20:01.123000000,2009,1
5,false,5,5,5,NULL,5,50.5,'01/01/09','5',2012-03-22 11:20:01.123000000,2009,1
6,true,6,6,6,60,NULL,60.6,'01/01/09','6',2012-03-22 11:20:01.123000000,2009,1
7,false,7,7,7,70,7,NULL,'01/01/09','7',2012-03-22 11:20:01.123000000,2009,1
8,false,8,8,8,80,8,80.8,'01/01/09','8',2012-03-22 11:20:01.123000000,2009,1
9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/09','9',2012-03-22 11:20:01.123000000,2009,1
10,true,0,0,0,0,0,0,'02/01/09','0',2012-03-22 11:20:01.123000000,2009,2
11,false,1,1,1,10,1,10.1,'02/01/09','1',2012-03-22 11:20:01.123000000,2009,2
12,true,2,2,2,20,2,20.2,'02/01/09','2',2012-03-22 11:20:01.123000000,2009,2
13,false,3,3,3,30,NULL,NULL,'02/01/09','3',2012-03-22 11:20:01.123000000,2009,2
14,true,4,4,4,40,4,40.4,'02/01/09','4',2012-03-22 11:20:01.123000000,2009,2
15,false,NULL,5,5,50,5,50.5,'02/01/09','5',2012-03-22 11:20:01.123000000,2009,2
16,true,6,6,6,60,6,60.6,'02/01/09','6',2012-03-22 11:20:01.123000000,2009,2
17,false,7,7,7,70,7,NULL,'02/01/09','7',2012-03-22 11:20:01.123000000,2009,2
18,true,8,8,8,80,8,80.8,'02/01/09','8',2012-03-22 11:20:01.123000000,2009,2
19,false,9,9,9,90,9,90.90000000000001,'02/01/09','9',2012-03-22 11:20:01.123000000,2009,2
20,true,0,0,0,0,0,0,'03/01/09','0',2012-03-22 11:20:01.123000000,2009,3
21,false,1,1,1,10,1,10.1,'03/01/09','1',2012-03-22 11:20:01.123000000,2009,3
22,true,2,2,2,20,2,20.2,'03/01/09','2',2012-03-22 11:20:01.123000000,2009,3
23,false,3,NULL,3,30,3,30.3,'03/01/09','3',2012-03-22 11:20:01.123000000,2009,3
24,true,4,4,4,40,4,40.4,'03/01/09','4',2012-03-22 11:20:01.123000000,2009,3
25,false,5,5,NULL,50,5,50.5,'03/01/09','5',NULL,2009,3
26,true,6,6,6,60,6,60.6,'03/01/09','6',2012-03-22 11:20:01.123000000,2009,3
27,false,NULL,7,7,70,7,70.7,'03/01/09','7',2012-03-22 11:20:01.123000000,2009,3
28,true,8,8,8,80,8,80.8,'03/01/09','8',NULL,2009,3
29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9',2012-03-22 00:00:00,2009,3
---- TYPES
int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int
====
---- QUERY
select count(*) from functional_text_gzip.bad_text_gzip
---- CATCH
Unexpected end of compressed file. File may be truncated.
====