Files
impala/testdata/bin/load-dependent-tables.sql
poojanilangekar c6f9b61ec2 IMPALA-6625: Skip computing parquet conjuncts for non-Parquet scans
This change ensures that the planner computes parquet conjuncts
only when for scans containing parquet files. Additionally, it
also handles PARQUET_DICTIONARY_FILTERING and
PARQUET_READ_STATISTICS query options in the planner.

Testing was carried out independently on parquet and non-parquet
scans:
  1. Parquet scans were tested via the existing parquet-filtering
     planner test. Additionally, a new test
     [parquet-filtering-disabled] was added to ensure that the
     explain plan generated skips parquet predicates based on the
     query options.
  2. Non-parquet scans were tested manually to ensure that the
     functions to compute parquet conjucts were not invoked.
     Additional test cases were added to the parquet-filtering
     planner test to scan non parquet tables and ensure that the
     plans do not contain conjuncts based on parquet statistics.
  3. A parquet partition was added to the alltypesmixedformat
     table in the functional database. Planner tests were added
     to ensure that Parquet conjuncts are constructed only when
     the Parquet partition is included in the query.

Change-Id: I9d6c26d42db090c8a15c602f6419ad6399c329e7
Reviewed-on: http://gerrit.cloudera.org:8080/10704
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2018-07-06 02:06:50 +00:00

117 lines
4.7 KiB
SQL

-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-- Create and load tables that depend upon data in the hive test-warehouse already existing
-- Load a mixed-format table. Hive behaves oddly when mixing formats,
-- but the following incantation ensures that the result is a
-- three-partition table. First is text format, second is sequence
-- file, third is RC file. Must be called after test-warehouse is
-- successfully populated
USE functional;
DROP TABLE IF EXISTS alltypesmixedformat;
CREATE EXTERNAL TABLE alltypesmixedformat (
id int,
bool_col boolean,
tinyint_col tinyint,
smallint_col smallint,
int_col int,
bigint_col bigint,
float_col float,
double_col double,
date_string_col string,
string_col string,
timestamp_col timestamp)
partitioned by (year int, month int)
row format delimited fields terminated by ',' escaped by '\\'
stored as TEXTFILE
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/alltypesmixedformat';
INSERT OVERWRITE TABLE alltypesmixedformat PARTITION (year=2009, month=1)
SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
FROM alltypes
WHERE year=2009 and month=1;
ALTER TABLE alltypesmixedformat SET FILEFORMAT SEQUENCEFILE;
LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/'
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2);
ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE;
LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/'
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3);
ALTER TABLE alltypesmixedformat SET FILEFORMAT PARQUET;
LOAD DATA INPATH '/tmp/alltypes_parquet/year=2009/month=4'
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=4);
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
SET FILEFORMAT TEXTFILE;
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
SET FILEFORMAT SEQUENCEFILE;
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3)
SET FILEFORMAT RCFILE;
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=4)
SET FILEFORMAT PARQUET;
DROP TABLE IF EXISTS functional_parquet.chars_formats;
CREATE EXTERNAL TABLE functional_parquet.chars_formats
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
STORED AS PARQUET
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/chars_formats_parquet';
DROP TABLE IF EXISTS functional_orc_def.chars_formats;
CREATE EXTERNAL TABLE functional_orc_def.chars_formats
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
STORED AS ORC
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/chars_formats_orc_def';
DROP TABLE IF EXISTS functional.chars_formats;
CREATE EXTERNAL TABLE functional.chars_formats
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
ROW FORMAT delimited fields terminated by ',' escaped by '\\'
STORED AS TEXTFILE
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/chars_formats_text';
DROP TABLE IF EXISTS functional_avro_snap.chars_formats;
CREATE EXTERNAL TABLE functional_avro_snap.chars_formats
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
STORED AS AVRO
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/chars_formats_avro_snap'
TBLPROPERTIES ('avro.schema.literal'='{"type":"record",
"name":"CharTypesTest","doc":"Schema generated by Kite",
"fields":[
{"name":"cs","type":["null","string"], "doc":"Type inferred"},
{"name":"cl","type":["null","string"], "doc":"Type inferred"},
{"name":"vc","type":["null","string"], "doc":"Type inferred"}
]}');
---- Unsupported Impala table types
USE functional;
CREATE VIEW IF NOT EXISTS hive_view AS SELECT 1 AS int_col FROM alltypes limit 1;
USE functional;
DROP INDEX IF EXISTS hive_index ON alltypes;
CREATE INDEX hive_index ON TABLE alltypes (int_col)
AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'
WITH DEFERRED REBUILD IN TABLE hive_index_tbl;