mirror of
https://github.com/apache/impala.git
synced 2026-01-04 00:00:56 -05:00
This fixes how we validate delimiters to be in line with Hive. A delimiter must fit in a single byte and can be specified in the following formats, as far as I can tell (there isn't documentation): - A single ASCII or unicode character (ex. '|') - An escape character in octal format (ex. \001. Stored in the metastore as a unicode character: \u0001). - A signed decimal integer in the range [-128:127]. Used to support delimiters for ASCII character values between 128-255 (-2 maps to ASCII 254). Previously, we were not handling the "signed integer" case so there was no way to specify a delimiter in the "extended" ASCII range of 128-255. To support result validation, the test infrastructure had to be updated to support reading/writing different character encodings. Change-Id: Ie3c4d444dc9c6e60192093ed0c0f6f151eab16bc Reviewed-on: http://gerrit.ent.cloudera.com:8080/1848 Reviewed-by: Lenni Kuff <lskuff@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/1888
39 lines
1.0 KiB
Plaintext
39 lines
1.0 KiB
Plaintext
====
|
|
---- QUERY
|
|
# test querying text table "extended" ASCII (latin) delimiters:
|
|
# fields terminated by '-2' -- thorn character
|
|
# escaped by '-22' -- lowercase e with circumflex
|
|
# lines terminated by '\n'
|
|
select * from text_thorn_ecirc_newline
|
|
---- RESULTS
|
|
'one','two',3,4
|
|
'oneþone','two',3,4
|
|
'oneê','two',3,4
|
|
'oneêþone','two',3,4
|
|
'oneêê','two',3,4
|
|
---- TYPES
|
|
STRING,STRING,INT,INT
|
|
====
|
|
---- QUERY
|
|
# create new tables like the ones above to test inserting
|
|
create table delim_text_test_db.tecn like text_thorn_ecirc_newline;
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# insert data into tecn table and check results
|
|
# TODO: Expand verification to cover inserting a field terminator and escape character.
|
|
# Some additional work needs to be done in the test framework to properly encode/decode
|
|
# the values going into the INSERT query before this will work.
|
|
insert overwrite delim_text_test_db.tecn values
|
|
('abc', 'xyz', 1, 2),
|
|
('efg', 'xyz', 3, 4)
|
|
====
|
|
---- QUERY
|
|
select * from delim_text_test_db.tecn
|
|
---- RESULTS
|
|
'abc','xyz',1,2
|
|
'efg','xyz',3,4
|
|
---- TYPES
|
|
STRING,STRING,INT,INT
|
|
====
|