mirror of
https://github.com/apache/impala.git
synced 2026-01-22 00:01:21 -05:00
Adds initial support for the functional-query test workload
for Kudu tables.
There are a few issues that make loading the functional
schema difficult on Kudu:
1) Kudu tables must have one or more columns that together
constitute a unique primary key.
a) Primary key columns must currently be the first columns
in the table definition (KUDU-1271).
b) Primary key columns cannot be nullable (KUDU-1570).
2) Kudu tables must be specified with distribution
parameters.
(1) limits the tables that can be loaded without ugly
workarounds. This patch only includes important tables that
are used for relevant tests, most notably the alltypes*
family. In particular, alltypesagg is important but it does
not have a set of columns that are non-nullable and form a unique
primary key. As a result, that table is created in Kudu with
a different name and an additional BIGINT column for a PK
that is a unique index and is generated at data loading time
using the ROW_NUMBER analytic function. A view is then
wrapped around the underlying table that matches the
alltypesagg schema exactly. When KUDU-1570 is resolved, this
can be simplified.
(2) requires some additional considerations and custom
syntax. As a result, the DDL to create the tables is
explicitly specified in CREATE_KUDU sections in the
functional_schema_constraints.csv, and an additional
DEPENDENT_LOAD_KUDU section was added to specify custom data
loading DML that differs from the existing DEPENDENT_LOAD.
TODO: IMPALA-4005: generate_schema_statements.py needs refactoring
Tests that are not relevant or not yet supported have been
marked with xfail and a skip where appropriate.
TODO: Support remaining functional tables/tests when possible.
Change-Id: Iada88e078352e4462745d9a9a1b5111260d21acc
Reviewed-on: http://gerrit.cloudera.org:8080/4175
Reviewed-by: Matthew Jacobs <mj@cloudera.com>
Tested-by: Internal Jenkins
12 KiB
12 KiB
| 1 | # Table level constraints: |
|---|---|
| 2 | # Allows for defining constraints on which file formats to generate for an individual |
| 3 | # table. The table name should match the base table name defined in the schema template |
| 4 | # file. |
| 5 | table_name:stringids, constraint:restrict_to, table_format:hbase/none/none |
| 6 | table_name:hbasecolumnfamilies, constraint:restrict_to, table_format:hbase/none/none |
| 7 | table_name:insertalltypesagg, constraint:restrict_to, table_format:hbase/none/none |
| 8 | table_name:alltypessmallbinary, constraint:restrict_to, table_format:hbase/none/none |
| 9 | table_name:insertalltypesaggbinary, constraint:restrict_to, table_format:hbase/none/none |
| 10 | table_name:hbasealltypeserror, constraint:restrict_to, table_format:hbase/none/none |
| 11 | table_name:hbasealltypeserrornonulls, constraint:restrict_to, table_format:hbase/none/none |
| 12 | table_name:alltypesinsert, constraint:restrict_to, table_format:text/none/none |
| 13 | table_name:alltypesnopart_insert, constraint:restrict_to, table_format:text/none/none |
| 14 | table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:text/none/none |
| 15 | table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:text/none/none |
| 16 | table_name:insert_string_partitioned, constraint:restrict_to, table_format:text/none/none |
| 17 | table_name:alltypesinsert, constraint:restrict_to, table_format:parquet/none/none |
| 18 | table_name:alltypesnopart_insert, constraint:restrict_to, table_format:parquet/none/none |
| 19 | table_name:alltypesinsert, constraint:restrict_to, table_format:text/none/none |
| 20 | table_name:alltypesnopart_insert, constraint:restrict_to, table_format:text/none/none |
| 21 | table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:text/none/none |
| 22 | table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:text/none/none |
| 23 | table_name:insert_string_partitioned, constraint:restrict_to, table_format:text/none/none |
| 24 | table_name:alltypesinsert, constraint:restrict_to, table_format:parquet/none/none |
| 25 | table_name:alltypesnopart_insert, constraint:restrict_to, table_format:parquet/none/none |
| 26 | table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:parquet/none/none |
| 27 | table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:parquet/none/none |
| 28 | table_name:insert_string_partitioned, constraint:restrict_to, table_format:parquet/none/none |
| 29 | table_name:old_rcfile_table, constraint:restrict_to, table_format:rc/none/none |
| 30 | table_name:bad_text_lzo, constraint:restrict_to, table_format:text/lzo/block |
| 31 | table_name:bad_text_gzip, constraint:restrict_to, table_format:text/gzip/block |
| 32 | table_name:bad_seq_snap, constraint:restrict_to, table_format:seq/snap/block |
| 33 | table_name:bad_avro_snap_strings, constraint:restrict_to, table_format:avro/snap/block |
| 34 | table_name:bad_avro_snap_floats, constraint:restrict_to, table_format:avro/snap/block |
| 35 | table_name:bad_parquet, constraint:restrict_to, table_format:parquet/none/none |
| 36 | table_name:bad_parquet_strings_negative_len, constraint:restrict_to, table_format:parquet/none/none |
| 37 | table_name:bad_parquet_strings_out_of_bounds, constraint:restrict_to, table_format:parquet/none/none |
| 38 | table_name:bad_magic_number, constraint:restrict_to, table_format:parquet/none/none |
| 39 | table_name:bad_metadata_len, constraint:restrict_to, table_format:parquet/none/none |
| 40 | table_name:bad_dict_page_offset, constraint:restrict_to, table_format:parquet/none/none |
| 41 | table_name:bad_compressed_size, constraint:restrict_to, table_format:parquet/none/none |
| 42 | table_name:alltypesagg_hive_13_1, constraint:restrict_to, table_format:parquet/none/none |
| 43 | table_name:kite_required_fields, constraint:restrict_to, table_format:parquet/none/none |
| 44 | table_name:bad_column_metadata, constraint:restrict_to, table_format:parquet/none/none |
| 45 | table_name:lineitem_multiblock, constraint:restrict_to, table_format:parquet/none/none |
| 46 | table_name:lineitem_sixblocks, constraint:restrict_to, table_format:parquet/none/none |
| 47 | table_name:lineitem_multiblock_one_row_group, constraint:restrict_to, table_format:parquet/none/none |
| 48 | # TODO: Support Avro. Data loading currently fails for Avro because complex types |
| 49 | # cannot be converted to the corresponding Avro types yet. |
| 50 | table_name:allcomplextypes, constraint:restrict_to, table_format:text/none/none |
| 51 | table_name:allcomplextypes, constraint:restrict_to, table_format:parquet/none/none |
| 52 | table_name:allcomplextypes, constraint:restrict_to, table_format:hbase/none/none |
| 53 | table_name:functional, constraint:restrict_to, table_format:text/none/none |
| 54 | table_name:complextypes_fileformat, constraint:restrict_to, table_format:text/none/none |
| 55 | table_name:complextypes_fileformat, constraint:restrict_to, table_format:parquet/none/none |
| 56 | table_name:complextypes_fileformat, constraint:restrict_to, table_format:avro/snap/block |
| 57 | table_name:complextypes_fileformat, constraint:restrict_to, table_format:rc/snap/block |
| 58 | table_name:complextypes_fileformat, constraint:restrict_to, table_format:seq/snap/block |
| 59 | table_name:complextypes_multifileformat, constraint:restrict_to, table_format:text/none/none |
| 60 | # TODO: Avro |
| 61 | table_name:complextypestbl, constraint:restrict_to, table_format:parquet/none/none |
| 62 | table_name:alltypeserror, constraint:exclude, table_format:parquet/none/none |
| 63 | table_name:alltypeserrornonulls, constraint:exclude, table_format:parquet/none/none |
| 64 | table_name:unsupported_types, constraint:exclude, table_format:parquet/none/none |
| 65 | table_name:escapechartesttable, constraint:exclude, table_format:parquet/none/none |
| 66 | table_name:TblWithRaggedColumns, constraint:exclude, table_format:parquet/none/none |
| 67 | # the text_ tables are for testing test delimiters and escape chars in text files |
| 68 | table_name:text_comma_backslash_newline, constraint:restrict_to, table_format:text/none/none |
| 69 | table_name:text_dollar_hash_pipe, constraint:restrict_to, table_format:text/none/none |
| 70 | table_name:text_thorn_ecirc_newline, constraint:restrict_to, table_format:text/none/none |
| 71 | table_name:bad_serde, constraint:restrict_to, table_format:text/none/none |
| 72 | table_name:rcfile_lazy_binary_serde, constraint:restrict_to, table_format:rc/none/none |
| 73 | table_name:unsupported_partition_types, constraint:restrict_to, table_format:text/none/none |
| 74 | table_name:nullformat_custom, constraint:exclude, table_format:parquet/none/none |
| 75 | table_name:alltypes_view, constraint:restrict_to, table_format:text/none/none |
| 76 | table_name:allcomplextypes_view, constraint:restrict_to, table_format:text/none/none |
| 77 | table_name:alltypes_view, constraint:restrict_to, table_format:seq/snap/block |
| 78 | table_name:alltypes_hive_view, constraint:restrict_to, table_format:text/none/none |
| 79 | table_name:alltypes_view_sub, constraint:restrict_to, table_format:text/none/none |
| 80 | table_name:alltypes_view_sub, constraint:restrict_to, table_format:seq/snap/block |
| 81 | table_name:alltypes_parens, constraint:restrict_to, table_format:text/none/none |
| 82 | table_name:complex_view, constraint:restrict_to, table_format:text/none/none |
| 83 | table_name:complex_view, constraint:restrict_to, table_format:seq/snap/block |
| 84 | table_name:view_view, constraint:restrict_to, table_format:text/none/none |
| 85 | table_name:view_view, constraint:restrict_to, table_format:seq/snap/block |
| 86 | table_name:subquery_view, constraint:restrict_to, table_format:seq/snap/block |
| 87 | table_name:subquery_view, constraint:restrict_to, table_format:rc/none/none |
| 88 | # liketbl and tblwithraggedcolumns all have |
| 89 | # NULLs in primary key columns. hbase does not support |
| 90 | # writing NULLs to primary key columns. |
| 91 | table_name:liketbl, constraint:exclude, table_format:hbase/none/none |
| 92 | table_name:tblwithraggedcolumns, constraint:exclude, table_format:hbase/none/none |
| 93 | # Tables with only one column are not supported in hbase. |
| 94 | table_name:greptiny, constraint:exclude, table_format:hbase/none/none |
| 95 | table_name:tinyinttable, constraint:exclude, table_format:hbase/none/none |
| 96 | # overflow uses a manually constructed text file which doesn't make sense to write to |
| 97 | # other table formats since the values that would be written are different (e.g. already |
| 98 | # truncated.) |
| 99 | table_name:overflow, constraint:restrict_to, table_format:text/none/none |
| 100 | # widerow has a single column with a single row containing a 10MB string. hbase doesn't |
| 101 | # seem to like this. |
| 102 | table_name:widerow, constraint:exclude, table_format:hbase/none/none |
| 103 | # nullformat_custom is used in null-insert tests, which user insert overwrite, |
| 104 | # which is not supported in hbase. The schema is also specified in HIVE_CREATE |
| 105 | # with no corresponding LOAD statement. |
| 106 | table_name:nullformat_custom, constraint:exclude, table_format:hbase/none/none |
| 107 | table_name:unsupported_types, constraint:exclude, table_format:hbase/none/none |
| 108 | # On CDH4, decimal can only be tested on formats Impala can write to (text and parquet) |
| 109 | # TODO: add Avro once Hive or Impala can write Avro decimals |
| 110 | table_name:decimal_tbl, constraint:restrict_to, table_format:text/none/none |
| 111 | table_name:decimal_tiny, constraint:restrict_to, table_format:text/none/none |
| 112 | table_name:decimal_tbl, constraint:restrict_to, table_format:parquet/none/none |
| 113 | table_name:decimal_tiny, constraint:restrict_to, table_format:parquet/none/none |
| 114 | table_name:avro_decimal_tbl, constraint:restrict_to, table_format:avro/snap/block |
| 115 | # TODO first set of tests are for text/none/none |
| 116 | table_name:chars_tiny, constraint:restrict_to, table_format:text/none/none |
| 117 | # invalid_decimal_part_tbl[1,2,3] tables are used for testing invalid decimal |
| 118 | # partition key values (see IMPALA-1040) |
| 119 | table_name:invalid_decimal_part_tbl1, constraint:restrict_to, table_format:text/none/none |
| 120 | table_name:invalid_decimal_part_tbl2, constraint:restrict_to, table_format:text/none/none |
| 121 | table_name:invalid_decimal_part_tbl3, constraint:restrict_to, table_format:text/none/none |
| 122 | table_name:avro_decimal_tbl, constraint:restrict_to, table_format:avro/snap/block |
| 123 | # testescape tables are used for testing text scanner delimiter handling |
| 124 | table_name:table_no_newline, constraint:restrict_to, table_format:text/none/none |
| 125 | table_name:table_no_newline_part, constraint:restrict_to, table_format:text/none/none |
| 126 | table_name:testescape_16_lf, constraint:restrict_to, table_format:text/none/none |
| 127 | table_name:testescape_16_crlf, constraint:restrict_to, table_format:text/none/none |
| 128 | table_name:testescape_17_lf, constraint:restrict_to, table_format:text/none/none |
| 129 | table_name:testescape_17_crlf, constraint:restrict_to, table_format:text/none/none |
| 130 | table_name:testescape_32_lf, constraint:restrict_to, table_format:text/none/none |
| 131 | table_name:testescape_32_crlf, constraint:restrict_to, table_format:text/none/none |
| 132 | # alltimezones is used to verify that impala properly deals with timezones |
| 133 | table_name:alltimezones, constraint:restrict_to, table_format:text/none/none |
| 134 | # Avro schema is inferred from the column definitions (IMPALA-1136) |
| 135 | table_name:no_avro_schema, constraint:restrict_to, table_format:avro/snap/block |
| 136 | table_name:avro_unicode_nulls, constraint:restrict_to, table_format:avro/snap/block |
| 137 | # test single and multi stream bz2 files |
| 138 | table_name:bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block |
| 139 | table_name:large_bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block |
| 140 | table_name:multistream_bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block |
| 141 | table_name:large_multistream_bzip2_tbl, constraint:restrict_to, table_format:text/bzip/block |
| 142 | # Kudu can't handle certain types such as timestamp so we pick and choose the tables |
| 143 | # we actually use for Kudu related tests. |
| 144 | table_name:alltypes, constraint:only, table_format:kudu/none/none |
| 145 | table_name:alltypessmall, constraint:only, table_format:kudu/none/none |
| 146 | table_name:alltypestiny, constraint:only, table_format:kudu/none/none |
| 147 | table_name:alltypesagg, constraint:only, table_format:kudu/none/none |
| 148 | table_name:alltypesaggnonulls, constraint:only, table_format:kudu/none/none |
| 149 | table_name:testtbl, constraint:only, table_format:kudu/none/none |
| 150 | table_name:jointbl, constraint:only, table_format:kudu/none/none |
| 151 | table_name:emptytable, constraint:only, table_format:kudu/none/none |
| 152 | table_name:dimtbl, constraint:only, table_format:kudu/none/none |
| 153 | table_name:tinytable, constraint:only, table_format:kudu/none/none |
| 154 | table_name:tinyinttable, constraint:only, table_format:kudu/none/none |
| 155 | table_name:zipcode_incomes, constraint:only, table_format:kudu/none/none |
| 156 | table_name:nulltable, constraint:only, table_format:kudu/none/none |
| 157 | table_name:nullescapedtable, constraint:only, table_format:kudu/none/none |
| 158 | # Skipping header lines is only effective with text tables |
| 159 | table_name:table_with_header, constraint:restrict_to, table_format:text/none/none |
| 160 | table_name:table_with_header_2, constraint:restrict_to, table_format:text/none/none |
| 161 | table_name:table_with_header_insert, constraint:restrict_to, table_format:text/none/none |
| 162 | # Inserting into parquet tables should not be affected by the 'skip.header.line.count' |
| 163 | # property, so we test parquet format as well. |
| 164 | table_name:table_with_header_insert, constraint:restrict_to, table_format:parquet/none/none |