mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
This patch modifies DelimitedTextParser and StringValue to work with data containing null characters by using SSE instructions that take a length, rather than expecting null-terminated strings. It also adds some other minor changes to correctly handle data with nulls and to faciliate testing. I checked the execution time of a count(*) and a select(*) limit 1 query locally, and saw no difference for either text or sequence files. Change-Id: Ia920b35bea7048aa286f39ec83e313c2a39251d1 Reviewed-on: http://gerrit.ent.cloudera.com:8080/2110 Reviewed-by: Skye Wanderman-Milne <skye@cloudera.com> Tested-by: Skye Wanderman-Milne <skye@cloudera.com> Reviewed-on: http://gerrit.ent.cloudera.com:8080/2181
157 lines
2.4 KiB
Plaintext
157 lines
2.4 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Test to select from table with additional columns at the end that are not in the
|
|
# schema and with missing columns
|
|
select * from tblwithraggedcolumns
|
|
---- TYPES
|
|
string, int
|
|
---- RESULTS
|
|
'',NULL
|
|
'',NULL
|
|
'ColumnWithCarriageReturn',123
|
|
'NoDelimiter',0
|
|
'\r\r\n',NULL
|
|
'a',3
|
|
'at16bytes',NULL
|
|
'b',4
|
|
'c',NULL
|
|
'd',NULL
|
|
'foo',2
|
|
'hello',1
|
|
====
|
|
---- QUERY
|
|
select int_col from tblwithraggedcolumns
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
0
|
|
1
|
|
123
|
|
2
|
|
3
|
|
4
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
select str_col from tblwithraggedcolumns
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
''
|
|
''
|
|
'ColumnWithCarriageReturn'
|
|
'NoDelimiter'
|
|
'\r\r\n'
|
|
'a'
|
|
'at16bytes'
|
|
'b'
|
|
'c'
|
|
'd'
|
|
'foo'
|
|
'hello'
|
|
====
|
|
---- QUERY
|
|
# Quoting test
|
|
SELECT `table_alias`.`int_col` AS `default_int_col`
|
|
FROM `functional`.`alltypes` `table_alias`
|
|
GROUP BY `default_int_col`
|
|
LIMIT 10
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
0
|
|
7
|
|
3
|
|
9
|
|
4
|
|
6
|
|
1
|
|
5
|
|
2
|
|
8
|
|
====
|
|
---- QUERY
|
|
# Select from table with escape char that is same as delim char
|
|
SELECT * FROM escapechartesttable
|
|
---- TYPES
|
|
boolean, int
|
|
---- RESULTS
|
|
true,0
|
|
false,1
|
|
true,2
|
|
false,3
|
|
true,4
|
|
false,5
|
|
true,6
|
|
false,7
|
|
true,8
|
|
false,9
|
|
====
|
|
---- QUERY
|
|
# Test string-literal escape sequences
|
|
SELECT ASCII("\0"), ASCII("\\"), ASCII("\b"), ASCII("\n"), ASCII("\r"), ASCII("\t"), ASCII("\Z")
|
|
---- TYPES
|
|
int, int, int, int, int, int, int
|
|
---- RESULTS
|
|
0,92,8,10,13,9,26
|
|
====
|
|
---- QUERY
|
|
# Test escaping non-escape chars. We expect the escape to be simply removed.
|
|
SELECT ASCII("\a"), ASCII("\X"), ASCII("\z"), ASCII("\?"), ASCII("\*")
|
|
---- TYPES
|
|
int, int, int, int, int
|
|
---- RESULTS
|
|
97,88,122,63,42
|
|
====
|
|
---- QUERY
|
|
# Test escaping '%' and '_' which handled specially.
|
|
# We expect '\\%' and '\%' to result in '\%' (similarly for '_')
|
|
SELECT "\%", "\\%", "\_", "\\_"
|
|
---- TYPES
|
|
string, string, string, string
|
|
---- RESULTS
|
|
'\%','\%','\_','\_'
|
|
====
|
|
---- QUERY
|
|
# Test query filed in IMPALA-65
|
|
SELECT "quote \"", 'quote \''
|
|
---- TYPES
|
|
string, string
|
|
---- RESULTS
|
|
'quote "','quote ''
|
|
====
|
|
#---- QUERY
|
|
# Select from table that contains unsupported primitive types
|
|
#SELECT int_col, str_col, bigint_col from functional.unsupported_types
|
|
#---- TYPES
|
|
#int, string, bigint
|
|
#---- RESULTS
|
|
#0,'aaaa',0
|
|
#1,'bbbb',10
|
|
#2,'cccc',20
|
|
#NULL,'NULL',NULL
|
|
#4,'eeee',40
|
|
#====
|
|
---- QUERY
|
|
# where clause is a SlotRef
|
|
SELECT count(*) from functional.alltypes where bool_col
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
3650
|
|
====
|
|
---- QUERY
|
|
# having clause is a SlotRef
|
|
SELECT count(*) from functional.alltypes group by bool_col having bool_col
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
3650
|
|
====
|
|
|