mirror of
https://github.com/apache/impala.git
synced 2026-01-26 12:02:21 -05:00
An error came from an issue with URL encoding, where certain Unicode characters were being incorrectly encoded due to their UTF-8 representation matching characters in the set of characters to escape. For example, the string '运', which consists of three bytes 0xe8 0xbf 0x90 was wrongly getting encoded into '\E8%FFFFFFBF\90', because the middle byte matched one of the two bytes that represented the "\u00FF" literal. Inclusion of "\u00FF" was likely a mistake from the beginning and it should have been '\x7F'. The patch makes three key changes: 1. Before the change, the set of characters that need to be escaped was stored as a string. The current patch uses an unordered_set instead. 2. '\xFF', which is an invalid UTF-8 byte and whose inclusion was erroneous from the beginning, is replaced with '\x7F', which is a control character for DELETE, ensuring consistency and correctness in URL encoding. 3. The list of characters to be escaped is extended to match the current list in Hive. Testing: Tests on both traditional Hive tables and Iceberg tables are included in unicode-column-name.test, insert.test, coding-util-test.cc and test_insert.py. Change-Id: I88c4aba5d811dfcec809583d0c16fcbc0ca730fb Reviewed-on: http://gerrit.cloudera.org:8080/21131 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
329 lines
7.3 KiB
Plaintext
329 lines
7.3 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table testtbl1(`세율대분류구분코드` int, s string COMMENT 'String col') stored as TEXTFILE;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Make sure creating a table with the same name doesn't throw an error when
|
|
# IF NOT EXISTS is specified.
|
|
create table if not exists testtbl1(`세율대분류구분코드` int, s string)
|
|
STORED AS TEXTFILE;
|
|
---- RESULTS
|
|
'Table already exists.'
|
|
====
|
|
---- QUERY
|
|
show tables;
|
|
---- RESULTS
|
|
'testtbl1'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
describe testtbl1;
|
|
---- RESULTS: RAW_STRING
|
|
'세율대분류구분코드','int',''
|
|
's','string','String col'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
insert into table testtbl1 values(1, 'Alice');
|
|
====
|
|
---- QUERY
|
|
select * from testtbl1;
|
|
---- RESULTS
|
|
1,'Alice'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
drop table testtbl1;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
create table testtbl_kudu(`시스템처리최종사용자번호a` int, s string COMMENT 'String col',
|
|
Primary key(`시스템처리최종사용자번호a`)) stored as KUDU;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
show tables;
|
|
---- RESULTS
|
|
'testtbl_kudu'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
describe testtbl_kudu;
|
|
---- RESULTS: RAW_STRING
|
|
'시스템처리최종사용자번호a','int','','true','true','false','','AUTO_ENCODING','DEFAULT_COMPRESSION','0'
|
|
's','string','String col','false','','true','','AUTO_ENCODING','DEFAULT_COMPRESSION','0'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
insert into table testtbl_kudu values(1, 'Alice');
|
|
====
|
|
---- QUERY
|
|
select * from testtbl_kudu;
|
|
---- RESULTS
|
|
1,'Alice'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
drop table testtbl_kudu;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
create table testtbl_iceberg(`我` int, s string COMMENT 'String col') stored as ICEBERG;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Make sure creating a table with the same name doesn't throw an error when
|
|
# IF NOT EXISTS is specified.
|
|
create table if not exists testtbl_iceberg(`我` int, s string)
|
|
STORED AS ICEBERG;
|
|
---- RESULTS
|
|
'Table already exists.'
|
|
====
|
|
---- QUERY
|
|
show tables;
|
|
---- RESULTS
|
|
'testtbl_iceberg'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
describe testtbl_iceberg;
|
|
---- RESULTS: RAW_STRING
|
|
'我','int','','true'
|
|
's','string','String col','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
insert into table testtbl_iceberg values(1, 'Alice');
|
|
====
|
|
---- QUERY
|
|
select * from testtbl_iceberg;
|
|
---- RESULTS
|
|
1,'Alice'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
drop table testtbl_iceberg;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.testtbl_orc(`我` int, s string COMMENT 'String col') stored as ORC;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Make sure creating a table with the same name doesn't throw an error when
|
|
# IF NOT EXISTS is specified.
|
|
create table if not exists $DATABASE.testtbl_orc(`我` int, s string)
|
|
STORED AS ORC;
|
|
---- RESULTS
|
|
'Table already exists.'
|
|
====
|
|
---- QUERY
|
|
show tables;
|
|
---- RESULTS
|
|
'testtbl_orc'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.testtbl_orc;
|
|
---- RESULTS: RAW_STRING
|
|
'我','int',''
|
|
's','string','String col'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- IS_HDFS_ONLY
|
|
---- HIVE_QUERY
|
|
insert into table $DATABASE.testtbl_orc values(1, 'Alice');
|
|
====
|
|
---- IS_HDFS_ONLY
|
|
---- HIVE_QUERY
|
|
select * from $DATABASE.testtbl_orc;
|
|
---- RESULTS
|
|
1,'Alice'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
drop table $DATABASE.testtbl_orc;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.testtbl_avro(`我` int, s string COMMENT 'String col') stored as AVRO;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Make sure creating a table with the same name doesn't throw an error when
|
|
# IF NOT EXISTS is specified.
|
|
create table if not exists $DATABASE.testtbl_avro(`我` int, s string)
|
|
STORED AS AVRO;
|
|
---- RESULTS
|
|
'Table already exists.'
|
|
====
|
|
---- QUERY
|
|
show tables;
|
|
---- RESULTS
|
|
'testtbl_avro'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.testtbl_avro;
|
|
---- RESULTS: RAW_STRING
|
|
'我','int','from deserializer'
|
|
's','string','String col'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- IS_HDFS_ONLY
|
|
---- HIVE_QUERY
|
|
insert into table $DATABASE.testtbl_avro values(1, 'Alice');
|
|
====
|
|
---- IS_HDFS_ONLY
|
|
---- HIVE_QUERY
|
|
select * from $DATABASE.testtbl_avro;
|
|
---- RESULTS
|
|
1,'Alice'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
drop table $DATABASE.testtbl_avro;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
create table testtbl_part(`我` int, s string) PARTITIONED BY (`고객` int comment 'C') stored as TEXTFILE;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Partition columns are displayed as part of DESCRIBE <table>
|
|
describe testtbl_part;
|
|
---- RESULTS: RAW_STRING
|
|
'我','int',''
|
|
's','string',''
|
|
'고객','int','C'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
insert into table testtbl_part partition(`고객`=24) values(1, 'Alice');
|
|
====
|
|
---- QUERY
|
|
insert into table testtbl_part partition(`고객`=20) values(2, 'Alison');
|
|
====
|
|
---- QUERY
|
|
insert into table testtbl_part partition(`고객`=23) values(3, 'Zack');
|
|
====
|
|
---- QUERY
|
|
select * from testtbl_part;
|
|
---- RESULTS
|
|
1,'Alice',24
|
|
2,'Alison',20
|
|
3,'Zack',23
|
|
---- TYPES
|
|
INT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
drop table testtbl_part;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
create table testtbl_parquet(`我` int, s string COMMENT 'String col') stored as PARQUET;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Make sure creating a table with the same name doesn't throw an error when
|
|
# IF NOT EXISTS is specified.
|
|
create table if not exists testtbl_parquet(`我` int, s string)
|
|
STORED AS PARQUET;
|
|
---- RESULTS
|
|
'Table already exists.'
|
|
====
|
|
---- QUERY
|
|
show tables;
|
|
---- RESULTS
|
|
'testtbl_parquet'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
describe testtbl_parquet;
|
|
---- RESULTS: RAW_STRING
|
|
'我','int',''
|
|
's','string','String col'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
insert into table testtbl_parquet values(1, 'Alice');
|
|
====
|
|
---- QUERY
|
|
select * from testtbl_parquet;
|
|
---- RESULTS
|
|
1,'Alice'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
drop table testtbl_parquet;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Tests for IMPALA-11499
|
|
create table unicode_partition_values (id int) partitioned by (p string) stored as parquet;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into unicode_partition_values partition(p='运营业务数据') values (0);
|
|
insert into unicode_partition_values partition(p='运') values (0);
|
|
insert into unicode_partition_values partition(p='运营业务数据1234567890!@#$%^&*(){}[]') values (0);
|
|
select * from unicode_partition_values;
|
|
---- RESULTS: RAW_STRING
|
|
0,'运'
|
|
0,'运营业务数据'
|
|
0,'运营业务数据1234567890!@#$%^&*(){}[]'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
create table unicode_partition_values_iceberg (id int, p string) partitioned by spec (identity(p)) stored by iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into unicode_partition_values_iceberg values (0, '运营业务数据');
|
|
insert into unicode_partition_values_iceberg values (0, '运');
|
|
insert into unicode_partition_values_iceberg values (0, '运营业务数据1234567890!@#$%^&*(){}[]');
|
|
select * from unicode_partition_values_iceberg;
|
|
---- RESULTS: RAW_STRING
|
|
0,'运'
|
|
0,'运营业务数据'
|
|
0,'运营业务数据1234567890!@#$%^&*(){}[]'
|
|
---- TYPES
|
|
INT, STRING
|
|
==== |