mirror of
https://github.com/apache/impala.git
synced 2026-02-02 06:00:36 -05:00
The .test file parser implemented an unconventional method for parsing single-quoted strings in comma-separated value format. This didn't handle trailing commas in the string correctly. This commit switches to using a conventional method for parsing comma-separated value format: * Commas enclosed by single quotes are not treated as field separators * Single quotes can be escaped within a string by doubling them. I looked into using Python's .csv module for this, but it wouldn't work without modifying the test file format more because it automatically discards the quotes during parsing, which are actually semantically important in .test files. E.g. without the quotes we can't distinguish between the literal string 'regex:...' and the regex regex:.... Testing: Ran exhaustive tests and fixed .test files that required modifications. Will rerun before merging. Added a couple of tests to exercise edge cases in the test file parser. Change-Id: I18ddcb0440490ddf8184be66d3681038a1615dd9 Reviewed-on: http://gerrit.cloudera.org:8080/11800 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Tim Armstrong <tarmstrong@cloudera.com>
200 lines
7.6 KiB
Plaintext
200 lines
7.6 KiB
Plaintext
====
|
|
---- QUERY
|
|
# This test relies on a deterministic row order so we use "sort by (id)".
|
|
create table alltypes sort by (id) like functional_parquet.alltypes;
|
|
alter table alltypes set tblproperties("impala.enable.stats.extrapolation"="true");
|
|
insert into alltypes partition(year, month)
|
|
select * from functional_parquet.alltypes where year = 2009;
|
|
====
|
|
---- QUERY
|
|
# No stats are available.
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' stored statistics:'
|
|
' table: rows=unavailable size=unavailable'
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: unavailable'
|
|
row_regex:.* extrapolated-rows=unavailable.*
|
|
' tuple-ids=0 row-size=4B cardinality=unavailable'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
compute stats alltypes
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Only the table-level row count is stored. The partition row counts are extrapolated.
|
|
show table stats alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, EXTRAP #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'2009','1',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=1'
|
|
'2009','2',-1,288,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=2'
|
|
'2009','3',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=3'
|
|
'2009','4',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=4'
|
|
'2009','5',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=5'
|
|
'2009','6',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=6'
|
|
'2009','7',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=7'
|
|
'2009','8',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=8'
|
|
'2009','9',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=9'
|
|
'2009','10',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=10'
|
|
'2009','11',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=11'
|
|
'2009','12',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=12'
|
|
'Total','',3650,3650,12,regex:.*B,'0B','','','',''
|
|
---- TYPES
|
|
STRING,STRING,BIGINT,BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
# Stats are available now.
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=.*
|
|
'Codegen disabled by planner'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/12 files=12 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3650 size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=3650.*
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=3650'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Select a subset of partitions.
|
|
explain select id from alltypes where month in (1, 2, 3);
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=.*
|
|
'Codegen disabled by planner'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=3/12 files=3 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3650 size=.*
|
|
' partitions: 0/3 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=904.*
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=904'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Double the data in existing partitions.
|
|
insert into alltypes partition(year, month)
|
|
select * from functional_parquet.alltypes where year = 2009;
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=.*
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/12 files=24 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3650 size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=7300.*
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=7300'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Create new partitions and extrapolate their row count.
|
|
insert into alltypes partition(year, month)
|
|
select * from functional_parquet.alltypes where year = 2010;
|
|
explain select id from alltypes where year = 2010;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=16MB.*
|
|
'Codegen disabled by planner'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/24 files=12 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3650 size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=3651.*
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=3651'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Compute stats and run the same query again.
|
|
compute stats alltypes;
|
|
explain select id from alltypes where year = 2010;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=16MB.*
|
|
'Codegen disabled by planner'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/24 files=12 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=10950 size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=3651
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=3651'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Test that dropping stats resets everything.
|
|
drop stats alltypes;
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' stored statistics:'
|
|
' table: rows=unavailable size=unavailable'
|
|
' partitions: 0/24 rows=unavailable'
|
|
' columns: unavailable'
|
|
row_regex:.* extrapolated-rows=unavailable.*
|
|
' tuple-ids=0 row-size=4B cardinality=unavailable'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|