mirror of
https://github.com/apache/impala.git
synced 2026-01-02 12:00:33 -05:00
This turned out to be slightly non-trivial as REPLACE is already a keyword, and thus the parser needs to be tweaked to allow this, since function names act as bare identifiers. It was difficult to get this to match performance of regexp_replace. For expanding patterns, the fact that regexp_replace copies the expansion inline means that it may in fact win on large strings with sparse matches that are > dcache size apart. Let's leave optimizing that for later. Testing: Added a full test for maximum size strings and got most of the boundary conditions I could identify. Manually ran queries on TPC-H dataset in impala to verify both performance and correctness. Added large string and exprs.test test clauses and ran the tests to verify they work as expected. Change-Id: I1780a7d8fee6d0db9dad148217fb6eb10f773329 Reviewed-on: http://gerrit.cloudera.org:8080/5776 Reviewed-by: Dan Hecht <dhecht@cloudera.com> Tested-by: Impala Public Jenkins
212 lines
8.7 KiB
Plaintext
212 lines
8.7 KiB
Plaintext
====
|
|
---- QUERY
|
|
# IMPALA-1619 group_concat() error
|
|
select length(group_concat(l_comment, "!")) from (
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem) a
|
|
---- CATCH
|
|
Concatenated string length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
# IMPALA-2620: Allocation by UDF/UDA need to take into account of memory limit.
|
|
SET mem_limit=512m;
|
|
select length(group_concat(l_comment, "!")) from (
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem union all
|
|
select l_comment from tpch_parquet.lineitem) a
|
|
---- CATCH
|
|
Memory limit exceeded
|
|
====
|
|
---- QUERY
|
|
#IMPALA-3350: Results of string functions can exceed 1GB.
|
|
select length(concat_ws(',', s, s, s, s)) from (
|
|
select group_concat(l_comment, "!") s from (
|
|
select l_comment from tpch.lineitem union all
|
|
select l_comment from tpch.lineitem) t1
|
|
) t2
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select length(repeat(s, 10)) from (
|
|
select group_concat(l_comment, "!") s from (
|
|
select l_comment from tpch.lineitem union all
|
|
select l_comment from tpch.lineitem) t1
|
|
) t2
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select length(lpad(s, 1073741830, '!')) from (
|
|
select group_concat(l_comment, "!") s from (
|
|
select l_comment from tpch.lineitem union all
|
|
select l_comment from tpch.lineitem) t1
|
|
) t2
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select length(rpad(s, 1073741830, '~')) from (
|
|
select group_concat(l_comment, "!") s from (
|
|
select l_comment from tpch.lineitem union all
|
|
select l_comment from tpch.lineitem) t1
|
|
) t2
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select space(1073741830);
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select length(regexp_replace(s, '.', '++++++++')) from (
|
|
select group_concat(l_comment, "!") s from (
|
|
select l_comment from tpch.lineitem union all
|
|
select l_comment from tpch.lineitem) t1
|
|
) t2
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select length(replace(s, ' ', '++++++++')) from (
|
|
select group_concat(l_comment, "!") s from (
|
|
select l_comment from tpch.lineitem union all
|
|
select l_comment from tpch.lineitem) t1
|
|
) t2
|
|
---- RESULTS
|
|
625718301
|
|
=====
|
|
---- QUERY
|
|
select replace(x, '+', '000') from (select (replace(s, ' ', '++++++++')) x from (
|
|
select group_concat(l_comment, "!") s from (
|
|
select l_comment from tpch.lineitem union all
|
|
select l_comment from tpch.lineitem) t1
|
|
) t2) t3;
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select trunc(timestamp_col, space(1073741830)) from functional.alltypes
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select extract(timestamp_col, space(1073741830)) from functional.alltypes
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select length(madlib_encode_vector(concat_ws(',', s, s, s, s))) from (
|
|
select group_concat(l_comment, "!") s from (
|
|
select l_comment from tpch.lineitem union all
|
|
select l_comment from tpch.lineitem) t1
|
|
) t2
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|
|
---- QUERY
|
|
select length(madlib_decode_vector(concat_ws(',', s, s, s, s))) from (
|
|
select group_concat(l_comment, "!") s from (
|
|
select l_comment from tpch.lineitem union all
|
|
select l_comment from tpch.lineitem) t1
|
|
) t2
|
|
---- CATCH
|
|
String length larger than allowed limit of 1 GB character data
|
|
=====
|