mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
Currently we have the following problematic StringValue::Smallify() call in BufferedTupleStream. It modifies the string value of an existing tuple and it can corrupt the BufferedTupleStream. We should only smallify string values during deepcopy, and only the target string value, never the source. To ensure it, this patch makes StringValue::Smallify() private and adds comments to warn the callers. Same is true for Tuple::SmallifyStrings(). The bug was reproducible by a complex query against a few large tables. One JOIN builder crashed Impala during spilling due to a corrupted buffered tuple stream. create-tables-impala-13138.test and query-impala-13138.test contain the repro steps. Testing: * updated backend tests * added test that crashes Impala without this fix Change-Id: I739048b37a59a81c41c85d475fad00cb520a5f99 Reviewed-on: http://gerrit.cloudera.org:8080/21502 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Daniel Becker <daniel.becker@cloudera.com>
30 lines
1.2 KiB
Plaintext
30 lines
1.2 KiB
Plaintext
====
|
|
---- QUERY
|
|
set RUNTIME_FILTER_WAIT_TIME_MS=10000;
|
|
set MT_DOP=12;
|
|
set RUNTIME_FILTER_MIN_SIZE=8192;
|
|
set RUNTIME_FILTER_MAX_SIZE=2097152;
|
|
set MINMAX_FILTER_THRESHOLD=0.5;
|
|
set MINMAX_FILTERING_LEVEL=PAGE;
|
|
SELECT ecc.letter_mission_name, re.`date` as date_, c.war_group
|
|
FROM letter_marketing_response_events re
|
|
LEFT JOIN letter_missions_combined ecc ON ecc.letter_mission=re.letter_mission
|
|
LEFT JOIN (SELECT b.letter_hash, b.war
|
|
FROM (SELECT letter_hash, war, id,
|
|
row_number() over(partition by letter_hash order by id desc) as latest
|
|
FROM subscriptions
|
|
WHERE is_affiliate=0
|
|
GROUP BY letter_hash, war, id) b
|
|
WHERE latest=1
|
|
GROUP BY b.letter_hash, b.war) su ON re.letter_hash=su.letter_hash
|
|
LEFT JOIN letterwargroups c ON upper(c.war)=upper(su.war)
|
|
LEFT JOIN dim_letter em ON em.letter_hash=re.letter_hash
|
|
WHERE re.`date`>='2024-06-01'
|
|
AND re.send_account_sk not in (43)
|
|
GROUP BY ecc.letter_mission_name, re.`date`, c.war_group;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:.*,'2024-06-07',.*
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|