Files
impala/testdata/datasets/functional/schema_constraints.csv
Matthew Jacobs f413e236a8 IMPALA-3579: Strict handling of numeric overflow in text parsing
Adds a query option 'strict_mode' which treats integer and
floating pt overflows as parse errors. In the past,
overflows were ignored and the max value was returned. When
this query option is set, overflowing values are treated as if
they were completely invalid data, i.e. NULL is returned.
When abort_on_error is enabled, this means the query is
aborted.

Notes:
* DECIMAL overflow/underflow is already treated as an error.
* The handling in text-converter treats underflows the same
  as overflows, so they would result in the same behavior.
  However, floating point parsing never returns an underflow
  today.
* We may also want to handle numeric values that are truncated
  when parsing to integer types, e.g. 10.5 -> 10.

Change-Id: I7409c31ec0cb6fe0b2d9842b9f58fe1670914836
Reviewed-on: http://gerrit.cloudera.org:8080/3150
Reviewed-by: Matthew Jacobs <mj@cloudera.com>
Tested-by: Internal Jenkins
2016-05-23 08:40:20 -07:00

11 KiB

1# Table level constraints:
2# Allows for defining constraints on which file formats to generate for an individual
3# table. The table name should match the base table name defined in the schema template
4# file.
5table_name:stringids, constraint:restrict_to, table_format:hbase/none/none
6table_name:hbasecolumnfamilies, constraint:restrict_to, table_format:hbase/none/none
7table_name:insertalltypesagg, constraint:restrict_to, table_format:hbase/none/none
8table_name:alltypessmallbinary, constraint:restrict_to, table_format:hbase/none/none
9table_name:insertalltypesaggbinary, constraint:restrict_to, table_format:hbase/none/none
10table_name:hbasealltypeserror, constraint:restrict_to, table_format:hbase/none/none
11table_name:hbasealltypeserrornonulls, constraint:restrict_to, table_format:hbase/none/none
12table_name:alltypesinsert, constraint:restrict_to, table_format:text/none/none
13table_name:alltypesnopart_insert, constraint:restrict_to, table_format:text/none/none
14table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:text/none/none
15table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:text/none/none
16table_name:insert_string_partitioned, constraint:restrict_to, table_format:text/none/none
17table_name:alltypesinsert, constraint:restrict_to, table_format:parquet/none/none
18table_name:alltypesnopart_insert, constraint:restrict_to, table_format:parquet/none/none
19table_name:alltypesinsert, constraint:restrict_to, table_format:text/none/none
20table_name:alltypesnopart_insert, constraint:restrict_to, table_format:text/none/none
21table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:text/none/none
22table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:text/none/none
23table_name:insert_string_partitioned, constraint:restrict_to, table_format:text/none/none
24table_name:alltypesinsert, constraint:restrict_to, table_format:parquet/none/none
25table_name:alltypesnopart_insert, constraint:restrict_to, table_format:parquet/none/none
26table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:parquet/none/none
27table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:parquet/none/none
28table_name:insert_string_partitioned, constraint:restrict_to, table_format:parquet/none/none
29table_name:old_rcfile_table, constraint:restrict_to, table_format:rc/none/none
30table_name:bad_text_lzo, constraint:restrict_to, table_format:text/lzo/block
31table_name:bad_text_gzip, constraint:restrict_to, table_format:text/gzip/block
32table_name:bad_seq_snap, constraint:restrict_to, table_format:seq/snap/block
33table_name:bad_parquet, constraint:restrict_to, table_format:parquet/none/none
34table_name:bad_magic_number, constraint:restrict_to, table_format:parquet/none/none
35table_name:bad_metadata_len, constraint:restrict_to, table_format:parquet/none/none
36table_name:bad_dict_page_offset, constraint:restrict_to, table_format:parquet/none/none
37table_name:bad_compressed_size, constraint:restrict_to, table_format:parquet/none/none
38table_name:alltypesagg_hive_13_1, constraint:restrict_to, table_format:parquet/none/none
39table_name:kite_required_fields, constraint:restrict_to, table_format:parquet/none/none
40table_name:bad_column_metadata, constraint:restrict_to, table_format:parquet/none/none
41table_name:lineitem_multiblock, constraint:restrict_to, table_format:parquet/none/none
42table_name:lineitem_sixblocks, constraint:restrict_to, table_format:parquet/none/none
43table_name:lineitem_multiblock_one_row_group, constraint:restrict_to, table_format:parquet/none/none
44# TODO: Support Avro. Data loading currently fails for Avro because complex types
45# cannot be converted to the corresponding Avro types yet.
46table_name:allcomplextypes, constraint:restrict_to, table_format:text/none/none
47table_name:allcomplextypes, constraint:restrict_to, table_format:parquet/none/none
48table_name:allcomplextypes, constraint:restrict_to, table_format:hbase/none/none
49table_name:functional, constraint:restrict_to, table_format:text/none/none
50table_name:complextypes_fileformat, constraint:restrict_to, table_format:text/none/none
51table_name:complextypes_fileformat, constraint:restrict_to, table_format:parquet/none/none
52table_name:complextypes_fileformat, constraint:restrict_to, table_format:avro/snap/block
53table_name:complextypes_fileformat, constraint:restrict_to, table_format:rc/snap/block
54table_name:complextypes_fileformat, constraint:restrict_to, table_format:seq/snap/block
55table_name:complextypes_multifileformat, constraint:restrict_to, table_format:text/none/none
56# TODO: Avro
57table_name:complextypestbl, constraint:restrict_to, table_format:parquet/none/none
58table_name:alltypeserror, constraint:exclude, table_format:parquet/none/none
59table_name:alltypeserrornonulls, constraint:exclude, table_format:parquet/none/none
60table_name:unsupported_types, constraint:exclude, table_format:parquet/none/none
61table_name:escapechartesttable, constraint:exclude, table_format:parquet/none/none
62table_name:TblWithRaggedColumns, constraint:exclude, table_format:parquet/none/none
63# the text_ tables are for testing test delimiters and escape chars in text files
64table_name:text_comma_backslash_newline, constraint:restrict_to, table_format:text/none/none
65table_name:text_dollar_hash_pipe, constraint:restrict_to, table_format:text/none/none
66table_name:text_thorn_ecirc_newline, constraint:restrict_to, table_format:text/none/none
67table_name:bad_serde, constraint:restrict_to, table_format:text/none/none
68table_name:rcfile_lazy_binary_serde, constraint:restrict_to, table_format:rc/none/none
69table_name:unsupported_partition_types, constraint:restrict_to, table_format:text/none/none
70table_name:nullformat_custom, constraint:exclude, table_format:parquet/none/none
71table_name:alltypes_view, constraint:restrict_to, table_format:text/none/none
72table_name:allcomplextypes_view, constraint:restrict_to, table_format:text/none/none
73table_name:alltypes_view, constraint:restrict_to, table_format:seq/snap/block
74table_name:alltypes_hive_view, constraint:restrict_to, table_format:text/none/none
75table_name:alltypes_view_sub, constraint:restrict_to, table_format:text/none/none
76table_name:alltypes_view_sub, constraint:restrict_to, table_format:seq/snap/block
77table_name:alltypes_parens, constraint:restrict_to, table_format:text/none/none
78table_name:complex_view, constraint:restrict_to, table_format:text/none/none
79table_name:complex_view, constraint:restrict_to, table_format:seq/snap/block
80table_name:view_view, constraint:restrict_to, table_format:text/none/none
81table_name:view_view, constraint:restrict_to, table_format:seq/snap/block
82# liketbl and tblwithraggedcolumns all have
83# NULLs in primary key columns. hbase does not support
84# writing NULLs to primary key columns.
85table_name:liketbl, constraint:exclude, table_format:hbase/none/none
86table_name:tblwithraggedcolumns, constraint:exclude, table_format:hbase/none/none
87# Tables with only one column are not supported in hbase.
88table_name:greptiny, constraint:exclude, table_format:hbase/none/none
89table_name:tinyinttable, constraint:exclude, table_format:hbase/none/none
90# overflow uses a manually constructed text file which doesn't make sense to write to
91# other table formats since the values that would be written are different (e.g. already
92# truncated.)
93table_name:overflow, constraint:restrict_to, table_format:text/none/none
94# widerow has a single column with a single row containing a 10MB string. hbase doesn't
95# seem to like this.
96table_name:widerow, constraint:exclude, table_format:hbase/none/none
97# nullformat_custom is used in null-insert tests, which user insert overwrite,
98# which is not supported in hbase. The schema is also specified in HIVE_CREATE
99# with no corresponding LOAD statement.
100table_name:nullformat_custom, constraint:exclude, table_format:hbase/none/none
101table_name:unsupported_types, constraint:exclude, table_format:hbase/none/none
102# On CDH4, decimal can only be tested on formats Impala can write to (text and parquet)
103# TODO: add Avro once Hive or Impala can write Avro decimals
104table_name:decimal_tbl, constraint:restrict_to, table_format:text/none/none
105table_name:decimal_tiny, constraint:restrict_to, table_format:text/none/none
106table_name:decimal_tbl, constraint:restrict_to, table_format:parquet/none/none
107table_name:decimal_tiny, constraint:restrict_to, table_format:parquet/none/none
108table_name:avro_decimal_tbl, constraint:restrict_to, table_format:avro/snap/block
109# TODO first set of tests are for text/none/none
110table_name:chars_tiny, constraint:restrict_to, table_format:text/none/none
111# invalid_decimal_part_tbl[1,2,3] tables are used for testing invalid decimal
112# partition key values (see IMPALA-1040)
113table_name:invalid_decimal_part_tbl1, constraint:restrict_to, table_format:text/none/none
114table_name:invalid_decimal_part_tbl2, constraint:restrict_to, table_format:text/none/none
115table_name:invalid_decimal_part_tbl3, constraint:restrict_to, table_format:text/none/none
116table_name:avro_decimal_tbl, constraint:restrict_to, table_format:avro/snap/block
117# testescape tables are used for testing text scanner delimiter handling
118table_name:table_no_newline, constraint:restrict_to, table_format:text/none/none
119table_name:table_no_newline_part, constraint:restrict_to, table_format:text/none/none
120table_name:testescape_16_lf, constraint:restrict_to, table_format:text/none/none
121table_name:testescape_16_crlf, constraint:restrict_to, table_format:text/none/none
122table_name:testescape_17_lf, constraint:restrict_to, table_format:text/none/none
123table_name:testescape_17_crlf, constraint:restrict_to, table_format:text/none/none
124table_name:testescape_32_lf, constraint:restrict_to, table_format:text/none/none
125table_name:testescape_32_crlf, constraint:restrict_to, table_format:text/none/none
126# alltimezones is used to verify that impala properly deals with timezones
127table_name:alltimezones, constraint:restrict_to, table_format:text/none/none
128# Avro schema is inferred from the column definitions (IMPALA-1136)
129table_name:no_avro_schema, constraint:restrict_to, table_format:avro/snap/block
130table_name:avro_unicode_nulls, constraint:restrict_to, table_format:avro/snap/block
131# test single and multi stream bz2 files
132table_name:bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block
133table_name:large_bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block
134table_name:multistream_bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block
135table_name:large_multistream_bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block
136# Kudu can't handle certain types such as timestamp so we pick and choose the tables
137# we actually use for Kudu related tests.
138table_name:testtbl, constraint:only, table_format:kudu/none/none
139table_name:dimtbl, constraint:only, table_format:kudu/none/none
140table_name:text_comma_backslash_newline, constraint:only, table_format:kudu/none/none
141table_name:tinytable, constraint:only, table_format:kudu/none/none
142table_name:tinyinttable, constraint:only, table_format:kudu/none/none
143table_name:zipcode_incomes, constraint:only, table_format:kudu/none/none
144# Skipping header lines is only effective with text tables
145table_name:table_with_header, constraint:restrict_to, table_format:text/none/none
146table_name:table_with_header_2, constraint:restrict_to, table_format:text/none/none
147table_name:table_with_header_insert, constraint:restrict_to, table_format:text/none/none
148# Inserting into parquet tables should not be affected by the 'skip.header.line.count'
149# property, so we test parquet format as well.
150table_name:table_with_header_insert, constraint:restrict_to, table_format:parquet/none/none