mirror of
https://github.com/apache/impala.git
synced 2026-02-02 15:00:38 -05:00
Loading data from S3 did not skip hidden files because the FileSystemUtil.listFiles() call was returning a RemoteIterator, which compared to RecursingIterator does not filter the hidden files. This would make a load fail because the hidden files likely have invalid magic string. This commit adds an extra condition to skip hidden files when creating the CREATE subquery. Testing: - Added E2E test - Ran E2E test on S3 build Change-Id: Iffd179383c2bb2529f6f9b5f8bf5cba5f3553652 Reviewed-on: http://gerrit.cloudera.org:8080/19441 Reviewed-by: Daniel Becker <daniel.becker@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Noemi Pap-Takacs <npaptakacs@cloudera.com> Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com>
156 lines
3.7 KiB
Plaintext
156 lines
3.7 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table test_iceberg_load_parquet like iceberg_mixed_file_format_test
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
# Test 1-2: Recovery from child query failure, first file then directory location
|
|
set mem_limit=1;
|
|
load data inpath '/tmp/$DATABASE/parquet/00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-fff150b6136a-job_16619542960420_0002-1-00001.parquet'
|
|
into table test_iceberg_load_parquet;
|
|
---- CATCH
|
|
minimum memory reservation is greater than memory available to the query for buffer reservations
|
|
====
|
|
---- QUERY
|
|
select count(*) from test_iceberg_load_parquet;
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
set mem_limit=1;
|
|
load data inpath '/tmp/$DATABASE/parquet/' into table test_iceberg_load_parquet;
|
|
---- CATCH
|
|
minimum memory reservation is greater than memory available to the query for buffer reservations
|
|
====
|
|
---- QUERY
|
|
select count(*) from test_iceberg_load_parquet;
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test 3-4: Load a parquet file then a directory into test table
|
|
load data inpath '/tmp/$DATABASE/parquet/00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-fff150b6136a-job_16619542960420_0002-1-00001.parquet'
|
|
into table test_iceberg_load_parquet;
|
|
---- RESULTS
|
|
'Loaded 1 file(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from test_iceberg_load_parquet;
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
load data inpath '/tmp/$DATABASE/parquet/' into table test_iceberg_load_parquet;
|
|
---- RESULTS
|
|
'Loaded 1 file(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from test_iceberg_load_parquet;
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test 5-6: Load an orc file then a directory into test table
|
|
create table test_iceberg_load_orc like iceberg_mixed_file_format_test
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
load data inpath '/tmp/$DATABASE/orc/00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-ee500a19c1d1-job_16619542960420_0003-1-00001.orc'
|
|
into table test_iceberg_load_orc;
|
|
---- RESULTS
|
|
'Loaded 1 file(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from test_iceberg_load_orc;
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
load data inpath '/tmp/$DATABASE/orc/' into table test_iceberg_load_orc;
|
|
---- RESULTS
|
|
'Loaded 1 file(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from test_iceberg_load_orc;
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test 7: Overwrite the existing table from Test 1
|
|
load data inpath '/tmp/$DATABASE/overwrite/' overwrite into table test_iceberg_load_parquet;
|
|
---- RESULTS
|
|
'Loaded 1 file(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from test_iceberg_load_parquet;
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test 8: Mismatching target table and file schema
|
|
create table test_iceberg_load_schema_mismatch (i int)
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
load data inpath '/tmp/$DATABASE/mismatching_schema/' overwrite into table test_iceberg_load_schema_mismatch;
|
|
---- CATCH
|
|
row_regex:.*AnalysisException: Target table .* has fewer columns \(1\) than the SELECT / VALUES clause returns \(4\).*
|
|
====
|
|
---- QUERY
|
|
# Test 9: Partitioned Iceberg table
|
|
create table test_iceberg_load_partitioned like functional_parquet.iceberg_partitioned
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
load data inpath '/tmp/$DATABASE/partitioned/' overwrite into table test_iceberg_load_partitioned;
|
|
---- RESULTS
|
|
'Loaded 1 file(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from test_iceberg_load_partitioned;
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test 10: hidden files should be skipped and the one data file should be loaded
|
|
load data inpath '/tmp/$DATABASE/hidden/' into table test_iceberg_load_partitioned;
|
|
---- RESULTS
|
|
'Loaded 1 file(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from test_iceberg_load_partitioned;
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
BIGINT
|
|
==== |