Files
impala/testdata/datasets/functional/schema_constraints.csv
stiga-huang 9672d94596 IMPALA-7784: Use unescaped string in partition pruning + fix duplicatedly unescaping strings
String values from external systems (HDFS, Hive, Kudu, etc.) are already
unescaped, the same as string values in Thrift objects deserialized in
coordinators. We should mark needsUnescaping_ as false in creating
StringLiterals for these values (in LiteralExpr#create()).

When comparing StringLiterals in partition pruning, we should also use
the unescaped values if needsUnescaping_ is true.

Tests:
 - Add tests for partition pruning on unescaped strings.
 - Add test coverage for all existing code paths using
   LiteralExpr#create().
 - Run core tests

Change-Id: Iea8070f16a74f9aeade294504f2834abb8b3b38f
Reviewed-on: http://gerrit.cloudera.org:8080/15278
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-03-09 06:29:35 +00:00

19 KiB

1# Table level constraints:
2# Allows for defining constraints on which file formats to generate for an individual
3# table. The table name should match the base table name defined in the schema template
4# file.
5table_name:stringids, constraint:restrict_to, table_format:hbase/none/none
6table_name:hbasecolumnfamilies, constraint:restrict_to, table_format:hbase/none/none
7table_name:insertalltypesagg, constraint:restrict_to, table_format:hbase/none/none
8table_name:alltypessmallbinary, constraint:restrict_to, table_format:hbase/none/none
9table_name:insertalltypesaggbinary, constraint:restrict_to, table_format:hbase/none/none
10table_name:hbasealltypeserror, constraint:restrict_to, table_format:hbase/none/none
11table_name:hbasealltypeserrornonulls, constraint:restrict_to, table_format:hbase/none/none
12table_name:alltypesinsert, constraint:restrict_to, table_format:text/none/none
13table_name:stringpartitionkey, constraint:restrict_to, table_format:text/none/none
14table_name:alltypesnopart_insert, constraint:restrict_to, table_format:text/none/none
15table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:text/none/none
16table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:text/none/none
17table_name:insert_string_partitioned, constraint:restrict_to, table_format:text/none/none
18table_name:alltypesinsert, constraint:restrict_to, table_format:parquet/none/none
19table_name:alltypesnopart_insert, constraint:restrict_to, table_format:parquet/none/none
20table_name:alltypesinsert, constraint:restrict_to, table_format:text/none/none
21table_name:alltypesnopart_insert, constraint:restrict_to, table_format:text/none/none
22table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:text/none/none
23table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:text/none/none
24table_name:insert_string_partitioned, constraint:restrict_to, table_format:text/none/none
25table_name:alltypesinsert, constraint:restrict_to, table_format:parquet/none/none
26table_name:alltypesnopart_insert, constraint:restrict_to, table_format:parquet/none/none
27table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:parquet/none/none
28table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:parquet/none/none
29table_name:insert_string_partitioned, constraint:restrict_to, table_format:parquet/none/none
30table_name:old_rcfile_table, constraint:restrict_to, table_format:rc/none/none
31table_name:bad_text_lzo, constraint:restrict_to, table_format:text/lzo/block
32table_name:bad_text_gzip, constraint:restrict_to, table_format:text/gzip/block
33table_name:bad_seq_snap, constraint:restrict_to, table_format:seq/snap/block
34table_name:bad_avro_snap_strings, constraint:restrict_to, table_format:avro/snap/block
35table_name:bad_avro_snap_floats, constraint:restrict_to, table_format:avro/snap/block
36table_name:bad_avro_decimal_schema, constraint:restrict_to, table_format:avro/snap/block
37table_name:bad_avro_date_out_of_range, constraint:restrict_to, table_format:avro/snap/block
38table_name:hive2_bad_avro_date_pre_gregorian, constraint:restrict_to, table_format:avro/snap/block
39table_name:hive3_avro_date_pre_gregorian, constraint:restrict_to, table_format:avro/snap/block
40table_name:bad_parquet, constraint:restrict_to, table_format:parquet/none/none
41table_name:bad_parquet_strings_negative_len, constraint:restrict_to, table_format:parquet/none/none
42table_name:bad_parquet_strings_out_of_bounds, constraint:restrict_to, table_format:parquet/none/none
43table_name:bad_magic_number, constraint:restrict_to, table_format:parquet/none/none
44table_name:bad_metadata_len, constraint:restrict_to, table_format:parquet/none/none
45table_name:bad_dict_page_offset, constraint:restrict_to, table_format:parquet/none/none
46table_name:bad_compressed_size, constraint:restrict_to, table_format:parquet/none/none
47table_name:alltypesagg_hive_13_1, constraint:restrict_to, table_format:parquet/none/none
48table_name:kite_required_fields, constraint:restrict_to, table_format:parquet/none/none
49table_name:bad_column_metadata, constraint:restrict_to, table_format:parquet/none/none
50table_name:lineitem_multiblock, constraint:restrict_to, table_format:parquet/none/none
51table_name:lineitem_sixblocks, constraint:restrict_to, table_format:parquet/none/none
52table_name:lineitem_multiblock_one_row_group, constraint:restrict_to, table_format:parquet/none/none
53table_name:customer_multiblock, constraint:restrict_to, table_format:parquet/none/none
54table_name:hudi_partitioned, constraint:restrict_to, table_format:parquet/none/none
55table_name:hudi_non_partitioned, constraint:restrict_to, table_format:parquet/none/none
56table_name:hudi_as_parquet, constraint:restrict_to, table_format:parquet/none/none
57# TODO: Support Avro. Data loading currently fails for Avro because complex types
58# cannot be converted to the corresponding Avro types yet.
59table_name:allcomplextypes, constraint:restrict_to, table_format:text/none/none
60table_name:allcomplextypes, constraint:restrict_to, table_format:parquet/none/none
61table_name:allcomplextypes, constraint:restrict_to, table_format:hbase/none/none
62table_name:functional, constraint:restrict_to, table_format:text/none/none
63table_name:complextypes_fileformat, constraint:restrict_to, table_format:text/none/none
64table_name:complextypes_fileformat, constraint:restrict_to, table_format:parquet/none/none
65table_name:complextypes_fileformat, constraint:restrict_to, table_format:avro/snap/block
66table_name:complextypes_fileformat, constraint:restrict_to, table_format:rc/snap/block
67table_name:complextypes_fileformat, constraint:restrict_to, table_format:seq/snap/block
68table_name:complextypes_fileformat, constraint:restrict_to, table_format:orc/def/block
69table_name:complextypes_multifileformat, constraint:restrict_to, table_format:text/none/none
70# TODO: Avro
71table_name:complextypestbl, constraint:restrict_to, table_format:parquet/none/none
72table_name:complextypestbl, constraint:restrict_to, table_format:orc/def/block
73table_name:complextypestbl_medium, constraint:restrict_to, table_format:parquet/none/none
74table_name:complextypestbl_medium, constraint:restrict_to, table_format:orc/def/block
75table_name:alltypeserror, constraint:exclude, table_format:parquet/none/none
76table_name:alltypeserrornonulls, constraint:exclude, table_format:parquet/none/none
77table_name:unsupported_types, constraint:exclude, table_format:parquet/none/none
78table_name:escapechartesttable, constraint:exclude, table_format:parquet/none/none
79table_name:TblWithRaggedColumns, constraint:exclude, table_format:parquet/none/none
80# the text_ tables are for testing test delimiters and escape chars in text files
81table_name:text_comma_backslash_newline, constraint:restrict_to, table_format:text/none/none
82table_name:text_dollar_hash_pipe, constraint:restrict_to, table_format:text/none/none
83table_name:text_thorn_ecirc_newline, constraint:restrict_to, table_format:text/none/none
84table_name:bad_serde, constraint:restrict_to, table_format:text/none/none
85table_name:rcfile_lazy_binary_serde, constraint:restrict_to, table_format:rc/none/none
86table_name:unsupported_partition_types, constraint:restrict_to, table_format:text/none/none
87table_name:nullformat_custom, constraint:exclude, table_format:parquet/none/none
88table_name:alltypes_view, constraint:restrict_to, table_format:text/none/none
89table_name:allcomplextypes_view, constraint:restrict_to, table_format:text/none/none
90table_name:alltypes_view, constraint:restrict_to, table_format:seq/snap/block
91table_name:alltypes_hive_view, constraint:restrict_to, table_format:text/none/none
92table_name:alltypes_view_sub, constraint:restrict_to, table_format:text/none/none
93table_name:alltypes_view_sub, constraint:restrict_to, table_format:seq/snap/block
94table_name:alltypes_parens, constraint:restrict_to, table_format:text/none/none
95table_name:complex_view, constraint:restrict_to, table_format:text/none/none
96table_name:complex_view, constraint:restrict_to, table_format:seq/snap/block
97table_name:view_view, constraint:restrict_to, table_format:text/none/none
98table_name:view_view, constraint:restrict_to, table_format:seq/snap/block
99table_name:subquery_view, constraint:restrict_to, table_format:seq/snap/block
100table_name:subquery_view, constraint:restrict_to, table_format:rc/none/none
101# liketbl, tblwithraggedcolumns and manynulls all have
102# NULLs in primary key columns. hbase does not support
103# writing NULLs to primary key columns.
104table_name:liketbl, constraint:exclude, table_format:hbase/none/none
105table_name:manynulls, constraint:exclude, table_format:hbase/none/none
106table_name:tblwithraggedcolumns, constraint:exclude, table_format:hbase/none/none
107# Tables with only one column are not supported in hbase.
108table_name:greptiny, constraint:exclude, table_format:hbase/none/none
109table_name:tinyinttable, constraint:exclude, table_format:hbase/none/none
110# overflow uses a manually constructed text file which doesn't make sense to write to
111# other table formats since the values that would be written are different (e.g. already
112# truncated.)
113table_name:overflow, constraint:restrict_to, table_format:text/none/none
114# widerow has a single column with a single row containing a 10MB string. hbase doesn't
115# seem to like this.
116table_name:widerow, constraint:exclude, table_format:hbase/none/none
117# nullformat_custom is used in null-insert tests, which user insert overwrite,
118# which is not supported in hbase. The schema is also specified in HIVE_CREATE
119# with no corresponding LOAD statement.
120table_name:nullformat_custom, constraint:exclude, table_format:hbase/none/none
121table_name:unsupported_types, constraint:exclude, table_format:hbase/none/none
122# Decimal can only be tested on formats Impala can write to (text and parquet).
123# TODO: add Avro once Hive or Impala can write Avro decimals
124table_name:decimal_tbl, constraint:restrict_to, table_format:text/none/none
125table_name:decimal_tiny, constraint:restrict_to, table_format:text/none/none
126table_name:decimal_tbl, constraint:restrict_to, table_format:parquet/none/none
127table_name:decimal_tiny, constraint:restrict_to, table_format:parquet/none/none
128table_name:decimal_tbl, constraint:restrict_to, table_format:kudu/none/none
129table_name:decimal_tiny, constraint:restrict_to, table_format:kudu/none/none
130table_name:decimal_tbl, constraint:restrict_to, table_format:orc/def/block
131table_name:decimal_tiny, constraint:restrict_to, table_format:orc/def/block
132table_name:decimal_rtf_tbl, constraint:restrict_to, table_format:text/none/none
133table_name:decimal_rtf_tbl, constraint:restrict_to, table_format:parquet/none/none
134table_name:decimal_rtf_tbl, constraint:restrict_to, table_format:kudu/none/none
135table_name:decimal_rtf_tbl, constraint:restrict_to, table_format:orc/def/block
136table_name:decimal_rtf_tiny_tbl, constraint:restrict_to, table_format:text/none/none
137table_name:decimal_rtf_tiny_tbl, constraint:restrict_to, table_format:parquet/none/none
138table_name:decimal_rtf_tiny_tbl, constraint:restrict_to, table_format:kudu/none/none
139table_name:decimal_rtf_tiny_tbl, constraint:restrict_to, table_format:orc/def/block
140table_name:avro_decimal_tbl, constraint:restrict_to, table_format:avro/snap/block
141# CHAR is not supported by HBase.
142table_name:chars_tiny, constraint:exclude, table_format:hbase/none/none
143table_name:chars_medium, constraint:exclude, table_format:hbase/none/none
144# invalid_decimal_part_tbl[1,2,3] tables are used for testing invalid decimal
145# partition key values (see IMPALA-1040)
146table_name:invalid_decimal_part_tbl1, constraint:restrict_to, table_format:text/none/none
147table_name:invalid_decimal_part_tbl2, constraint:restrict_to, table_format:text/none/none
148table_name:invalid_decimal_part_tbl3, constraint:restrict_to, table_format:text/none/none
149table_name:avro_decimal_tbl, constraint:restrict_to, table_format:avro/snap/block
150# testescape tables are used for testing text scanner delimiter handling
151table_name:table_no_newline, constraint:restrict_to, table_format:text/none/none
152table_name:table_no_newline_part, constraint:restrict_to, table_format:text/none/none
153table_name:testescape_16_lf, constraint:restrict_to, table_format:text/none/none
154table_name:testescape_16_crlf, constraint:restrict_to, table_format:text/none/none
155table_name:testescape_17_lf, constraint:restrict_to, table_format:text/none/none
156table_name:testescape_17_crlf, constraint:restrict_to, table_format:text/none/none
157table_name:testescape_32_lf, constraint:restrict_to, table_format:text/none/none
158table_name:testescape_32_crlf, constraint:restrict_to, table_format:text/none/none
159# alltimezones is used to verify that impala properly deals with timezones
160table_name:alltimezones, constraint:restrict_to, table_format:text/none/none
161# Avro schema is inferred from the column definitions (IMPALA-1136)
162table_name:no_avro_schema, constraint:restrict_to, table_format:avro/snap/block
163table_name:avro_unicode_nulls, constraint:restrict_to, table_format:avro/snap/block
164# test single and multi stream bz2 files
165table_name:bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block
166table_name:large_bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block
167table_name:multistream_bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block
168table_name:large_multistream_bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block
169# Kudu can't handle certain types such as timestamp so we pick and choose the tables
170# we actually use for Kudu related tests.
171table_name:alltypes, constraint:only, table_format:kudu/none/none
172table_name:alltypessmall, constraint:only, table_format:kudu/none/none
173table_name:alltypestiny, constraint:only, table_format:kudu/none/none
174table_name:alltypesagg, constraint:only, table_format:kudu/none/none
175table_name:alltypesaggnonulls, constraint:only, table_format:kudu/none/none
176table_name:testtbl, constraint:only, table_format:kudu/none/none
177table_name:jointbl, constraint:only, table_format:kudu/none/none
178table_name:emptytable, constraint:only, table_format:kudu/none/none
179table_name:dimtbl, constraint:only, table_format:kudu/none/none
180table_name:tinytable, constraint:only, table_format:kudu/none/none
181table_name:tinyinttable, constraint:only, table_format:kudu/none/none
182table_name:zipcode_incomes, constraint:only, table_format:kudu/none/none
183table_name:nulltable, constraint:only, table_format:kudu/none/none
184table_name:nullrows, constraint:only, table_format:kudu/none/none
185table_name:nullescapedtable, constraint:only, table_format:kudu/none/none
186table_name:decimal_tbl, constraint:only, table_format:kudu/none/none
187table_name:decimal_rtf_tbl, constraint:only, table_format:kudu/none/none
188table_name:decimal_rtf_tiny_tbl, constraint:only, table_format:kudu/none/none
189table_name:decimal_tiny, constraint:only, table_format:kudu/none/none
190table_name:strings_with_quotes, constraint:only, table_format:kudu/none/none
191table_name:manynulls, constraint:only, table_format:kudu/none/none
192# Skipping header lines is only effective with text tables
193table_name:table_with_header, constraint:restrict_to, table_format:text/none/none
194table_name:table_with_header_2, constraint:restrict_to, table_format:text/none/none
195table_name:table_with_header_insert, constraint:restrict_to, table_format:text/none/none
196# We also test that skipping header lines works on compressed tables (IMPALA-5287)
197table_name:table_with_header, constraint:restrict_to, table_format:text/gzip/block
198table_name:table_with_header_2, constraint:restrict_to, table_format:text/gzip/block
199table_name:table_with_header_insert, constraint:restrict_to, table_format:text/gzip/block
200# Inserting into parquet tables should not be affected by the 'skip.header.line.count'
201# property, so we test parquet format as well.
202table_name:table_with_header_insert, constraint:restrict_to, table_format:parquet/none/none
203# IMPALA-7368/IMPALA-7370/IMPALA-8198 adds DATE support for text, hbase, parquet and avro.
204# IMPALA-8801 adds DATE support for ORC.
205# Other file-formats will be introduced later.
206table_name:date_tbl, constraint:restrict_to, table_format:parquet/none/none
207table_name:date_tbl, constraint:restrict_to, table_format:avro/snap/block
208table_name:date_tbl, constraint:restrict_to, table_format:orc/def/block
209table_name:date_tbl, constraint:restrict_to, table_format:hbase/none/none
210table_name:date_tbl, constraint:restrict_to, table_format:text/none/none
211table_name:date_tbl, constraint:restrict_to, table_format:text/lzo/block
212table_name:date_tbl, constraint:restrict_to, table_format:text/bzip/block
213table_name:date_tbl, constraint:restrict_to, table_format:text/gzip/block
214table_name:date_tbl, constraint:restrict_to, table_format:text/snap/block
215table_name:date_tbl, constraint:restrict_to, table_format:text/def/block
216table_name:date_tbl_error, constraint:restrict_to, table_format:text/none/none
217table_name:date_tbl_error, constraint:restrict_to, table_format:text/lzo/block
218table_name:date_tbl_error, constraint:restrict_to, table_format:text/bzip/block
219table_name:date_tbl_error, constraint:restrict_to, table_format:text/gzip/block
220table_name:date_tbl_error, constraint:restrict_to, table_format:text/snap/block
221table_name:date_tbl_error, constraint:restrict_to, table_format:text/def/block
222table_name:insert_date_tbl, constraint:restrict_to, table_format:hbase/none/none
223# Full transactional table is only supported for ORC
224table_name:full_transactional_table, constraint:restrict_to, table_format:orc/def/block
225# Insert-only transactional tables only work for file-format based tables
226table_name:insert_only_transactional_table, constraint:exclude, table_format:hbase/none/none
227table_name:insert_only_transactional_table, constraint:exclude, table_format:kudu/none/none
228table_name:insertonly_nopart_insert, constraint:restrict_to, table_format:text/none/none
229table_name:insertonly_nopart_insert, constraint:restrict_to, table_format:parquet/none/none
230table_name:insertonly_part_insert, constraint:restrict_to, table_format:text/none/none
231table_name:insertonly_part_insert, constraint:restrict_to, table_format:parquet/none/none
232# 'materialized_view' is based on 'insert_only_transactional_table' from the same
233# database, so it needs to be excluded where 'insert_only_transactional_table' is
234# excluded.
235table_name:materialized_view, constraint:exclude, table_format:hbase/none/none
236table_name:materialized_view, constraint:exclude, table_format:kudu/none/none
237table_name:insert_only_transactional_bucketed_table, constraint:exclude, table_format:hbase/none/none
238table_name:insert_only_transactional_bucketed_table, constraint:exclude, table_format:kudu/none/none
239# Bucketed tables only work for file-format based tables
240table_name:bucketed_ext_table, constraint:exclude, table_format:hbase/none/none
241table_name:bucketed_ext_table, constraint:exclude, table_format:kudu/none/none
242table_name:bucketed_table, constraint:exclude, table_format:hbase/none/none
243table_name:bucketed_table, constraint:exclude, table_format:kudu/none/none
244table_name:bucketed_table, constraint:exclude, table_format:text/lzo/block
245# The uncompressed ORC tables are mainly used in test_scanners_fuzz.py to avoid creating
246# them each time when running the test. Developers may run this test many times locally.
247table_name:uncomp_src_alltypes, constraint:restrict_to, table_format:orc/def/block
248table_name:uncomp_src_decimal_tbl, constraint:restrict_to, table_format:orc/def/block
249table_name:part_strings_with_quotes, constraint:restrict_to, table_format:text/none/none