IMPALA-11562: Revert support for o3fs as default filesystem

Reverts support for o3fs as a default filesystem added in IMPALA-9442.
Updates test setup to use ofs instead.

Munges absolute paths in Iceberg metadata to match the new location
required for ofs. Ozone has strict requirements on volume and bucket
names, so all tables must be created within a bucket (e.g. inside
/impala/test-warehouse/).

Change-Id: I45e90d30b2e68876dec0db3c43ac15ee510b17bd
Reviewed-on: http://gerrit.cloudera.org:8080/19001
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Michael Smith
2022-09-13 15:38:13 -07:00
committed by Impala Public Jenkins
parent 55194a9c83
commit 3577030df6
37 changed files with 355 additions and 241 deletions

View File

@@ -682,8 +682,9 @@ elif [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
elif [ "${TARGET_FILESYSTEM}" = "ozone" ]; then
export USE_OZONE_ENCRYPTION=${USE_OZONE_ENCRYPTION-true}
export OZONE_VOLUME="impala"
export OZONE_BUCKET="base"
export DEFAULT_FS="o3fs://${OZONE_BUCKET}.${OZONE_VOLUME}.${INTERNAL_LISTEN_HOST}:9862"
export DEFAULT_FS="ofs://${INTERNAL_LISTEN_HOST}:9862"
export FILESYSTEM_PREFIX="${DEFAULT_FS}/${OZONE_VOLUME}"
export WAREHOUSE_LOCATION_PREFIX="/${OZONE_VOLUME}"
else
echo "Unsupported filesystem '$TARGET_FILESYSTEM'"
echo "Valid values are: hdfs, isilon, s3, abfs, adls, gs, local, ozone"

View File

@@ -197,7 +197,8 @@ do
TEST_RET_CODE=0
# Store a list of the files at the beginning of each iteration.
hdfs dfs -ls -R /test-warehouse > ${IMPALA_LOGS_DIR}/file-list-begin-${i}.log 2>&1
hdfs dfs -ls -R ${FILESYSTEM_PREFIX}/test-warehouse \
> ${IMPALA_LOGS_DIR}/file-list-begin-${i}.log 2>&1
# Try not restarting the cluster to save time. BE, FE, JDBC and EE tests require
# running on a cluster with default flags. We just need to restart the cluster when
@@ -334,7 +335,8 @@ do
# to the file-list-begin*.log from the beginning of the iteration to see if files
# are not being cleaned up. This is most useful on the first iteration, when
# the list of files is from dataload.
hdfs dfs -ls -R /test-warehouse > ${IMPALA_LOGS_DIR}/file-list-end-${i}.log 2>&1
hdfs dfs -ls -R ${FILESYSTEM_PREFIX}/test-warehouse \
> ${IMPALA_LOGS_DIR}/file-list-end-${i}.log 2>&1
if [[ $TEST_RET_CODE == 1 ]]; then
break

View File

@@ -113,7 +113,6 @@ public class FileSystemUtil {
.add(SCHEME_ADL)
.add(SCHEME_HDFS)
.add(SCHEME_S3A)
.add(SCHEME_O3FS)
.add(SCHEME_OFS)
.add(SCHEME_GCS)
.add(SCHEME_COS)

View File

@@ -31,7 +31,7 @@ IMPALAD=${IMPALAD:-localhost}
TPCDS_QUERY_HOME=$IMPALA_HOME/testdata/workloads/tpcds/queries/raw
# Target directory containing the testcase data.
TESTCASE_DATA_DIR=/test-warehouse/tpcds-testcase-data
TESTCASE_DATA_DIR=${FILESYSTEM_PREFIX}/test-warehouse/tpcds-testcase-data
COPY_TEST_CASE_PREFIX="COPY TESTCASE TO '$TESTCASE_DATA_DIR'"

View File

@@ -32,6 +32,8 @@ if [[ $# -ne 1 ]]; then
exit 1
fi
: ${TEST_WAREHOUSE_DIR=/test-warehouse}
SNAPSHOT_FILE=$1
if [ ! -f ${SNAPSHOT_FILE} ]; then
echo "Metastore Snapshot file '${SNAPSHOT_FILE}' not found"
@@ -69,6 +71,13 @@ elif [[ "${DEFAULT_FS}" != "hdfs://localhost:20500" ]]; then
sed -i "s|hdfs://localhost:20500|${DEFAULT_FS}|g" ${TMP_SNAPSHOT_FILE}
fi
if [[ "${WAREHOUSE_LOCATION_PREFIX}" != "" ]]; then
echo "Adding prefix ${WAREHOUSE_LOCATION_PREFIX} to iceberg.catalog_location"
cloc='iceberg\.catalog_location\t'
sed -i "s|\(${cloc}\)\(${TEST_WAREHOUSE_DIR}\)|\1${WAREHOUSE_LOCATION_PREFIX}\2|g" \
${TMP_SNAPSHOT_FILE}
fi
# Drop and re-create the hive metastore database
dropdb -U hiveuser ${METASTORE_DB} 2> /dev/null || true
createdb -U hiveuser ${METASTORE_DB}

View File

@@ -110,6 +110,12 @@ if [ ! -f ${SNAPSHOT_STAGING_DIR}${TEST_WAREHOUSE_DIR}/githash.txt ]; then
exit 1
fi
if [ "${WAREHOUSE_LOCATION_PREFIX}" != "" ]; then
echo "Updating Iceberg locations with warehouse prefix ${WAREHOUSE_LOCATION_PREFIX}"
${IMPALA_HOME}/testdata/bin/rewrite-iceberg-metadata.py ${WAREHOUSE_LOCATION_PREFIX} \
$(find ${SNAPSHOT_STAGING_DIR}${TEST_WAREHOUSE_DIR}/iceberg_test -name "metadata")
fi
echo "Copying data to ${TARGET_FILESYSTEM}"
if [ "${TARGET_FILESYSTEM}" = "s3" ]; then
# hive does not yet work well with s3, so we won't need hive builtins.

97
testdata/bin/rewrite-iceberg-metadata.py vendored Executable file
View File

@@ -0,0 +1,97 @@
#!/usr/bin/env impala-python
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import glob
import json
import os
import sys
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
args = sys.argv[1:]
if len(args) < 2:
print("Usage: rewrite-iceberg-metadata.py PREFIX METADATA-dirs...")
exit(1)
prefix = args[0]
def add_prefix_to_snapshot(snapshot):
if 'manifest-list' in snapshot:
snapshot['manifest-list'] = prefix + snapshot['manifest-list']
if 'manifests' in snapshot:
snapshot['manifests'] = map(lambda m: prefix + m, snapshot['manifests'])
return snapshot
def add_prefix_to_mlog(metadata_log):
metadata_log['metadata-file'] = prefix + metadata_log['metadata-file']
return metadata_log
def add_prefix_to_snapshot_entry(entry):
if 'manifest_path' in entry:
entry['manifest_path'] = prefix + entry['manifest_path']
if 'data_file' in entry:
entry['data_file']['file_path'] = prefix + entry['data_file']['file_path']
return entry
for arg in args[1:]:
# Update metadata.json
for mfile in glob.glob(os.path.join(arg, 'v*.metadata.json')):
with open(mfile, 'r') as f:
metadata = json.load(f)
if 'format-version' not in metadata:
print("WARN: skipping {}, missing format-version".format(f))
continue
version = metadata['format-version']
if version < 1 or version > 2:
print("WARN: skipping {}, unknown version {}".format(f, version))
continue
# metadata: required
metadata['location'] = prefix + metadata['location']
# snapshots: optional
if 'snapshots' in metadata:
metadata['snapshots'] = map(add_prefix_to_snapshot, metadata['snapshots'])
# metadata-log: optional
if 'metadata-log' in metadata:
metadata['metadata-log'] = map(add_prefix_to_mlog, metadata['metadata-log'])
with open(mfile + '.tmp', 'w') as f:
json.dump(metadata, f)
os.rename(mfile + '.tmp', mfile)
for afile in glob.glob(os.path.join(arg, '*.avro')):
with open(afile, 'rb') as f:
with DataFileReader(f, DatumReader()) as reader:
schema = reader.datum_reader.writers_schema
lines = map(add_prefix_to_snapshot_entry, reader)
with open(afile + '.tmp', 'wb') as f:
with DataFileWriter(f, DatumWriter(), schema) as writer:
for line in lines:
writer.append(line)
os.rename(afile + '.tmp', afile)

View File

@@ -329,7 +329,7 @@ function start_cluster {
if [[ "${TARGET_FILESYSTEM}" = "ozone" ]]; then
local bucketkey=''
if $USE_OZONE_ENCRYPTION; then
echo "Ozone encryption enabled for ${OZONE_VOLUME}/${OZONE_BUCKET}"
echo "Ozone encryption enabled for ${OZONE_VOLUME}/test-warehouse"
# Encryption is done at the bucket level, so ensure the keys are available first.
${IMPALA_HOME}/testdata/bin/setup-dfs-keys.sh testkey
@@ -337,7 +337,7 @@ function start_cluster {
fi
ozone sh volume create ${OZONE_VOLUME} || true
ozone sh bucket create ${bucketkey} ${OZONE_VOLUME}/${OZONE_BUCKET} || true
ozone sh bucket create ${bucketkey} ${OZONE_VOLUME}/test-warehouse || true
fi
return $?

View File

@@ -74,7 +74,7 @@ CREATE TABLE iceberg_hadoop_catalog(
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test');
ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(event_time TIMESTAMP, register_time DATE);
ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(message STRING, price DECIMAL(8,1));
ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(map_test MAP <STRING, array <STRING>>, struct_test STRUCT <f1: BIGINT, f2: BIGINT>);

View File

@@ -139,7 +139,7 @@ create table ice_hadoop_catalog_no_part(
array < STRING > >
) stored as iceberg tblproperties(
'iceberg.catalog' = 'hadoop.catalog',
'iceberg.catalog_location' = '/test-warehouse/$DATABASE.db/cat_loc',
'iceberg.catalog_location' = '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc',
'iceberg.table_identifier' = 'id_a.id_b.ice_hadoop_catalog_no_part'
);
---- RESULTS
@@ -172,7 +172,7 @@ describe formatted ice_hadoop_catalog_no_part_clone;
'col_map','map<string,array<string>>','NULL'
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/cat_loc/$DATABASE/ice_hadoop_catalog_no_part_clone','NULL'
'','iceberg.catalog ','hadoop.catalog '
'','iceberg.catalog_location','/test-warehouse/$DATABASE.db/cat_loc'
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc'
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
'','write.format.default','parquet '
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
@@ -202,7 +202,7 @@ create table ice_hadoop_catalog(
array < STRING > >
) partitioned by spec (bucket(3, id)) stored as iceberg tblproperties(
'iceberg.catalog' = 'hadoop.catalog',
'iceberg.catalog_location' = '/test-warehouse/$DATABASE.db/cat_loc',
'iceberg.catalog_location' = '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc',
'iceberg.table_identifier' = 'id_a.id_b.ice_hadoop_catalog'
);
---- RESULTS
@@ -238,7 +238,7 @@ describe formatted ice_hadoop_catalog_clone;
'id','BUCKET[3]','NULL'
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/cat_loc/$DATABASE/ice_hadoop_catalog_clone','NULL'
'','iceberg.catalog ','hadoop.catalog '
'','iceberg.catalog_location','/test-warehouse/$DATABASE.db/cat_loc'
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc'
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
'','write.format.default','parquet '
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'

View File

@@ -70,13 +70,13 @@ PARTITIONED BY SPEC
TRUNCATE(15, level)
)
STORED AS ICEBERG
LOCATION '/$DATABASE.iceberg_test_with_location'
LOCATION '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE.iceberg_test_with_location'
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external(
level STRING
)
STORED AS ICEBERG
LOCATION '/$DATABASE.iceberg_test_with_location'
LOCATION '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE.iceberg_test_with_location'
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
@@ -97,7 +97,7 @@ STRING,BIGINT,BIGINT
---- QUERY
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external_empty_col
STORED AS ICEBERG
LOCATION '/$DATABASE.iceberg_test_with_location'
LOCATION '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE.iceberg_test_with_location'
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
@@ -137,7 +137,7 @@ PARTITIONED BY SPEC
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test');
---- RESULTS
'Table has been created.'
====
@@ -171,13 +171,13 @@ PARTITIONED BY SPEC
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test');
CREATE EXTERNAL TABLE iceberg_hadoop_cat_external(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test', 'iceberg.table_identifier'='$DATABASE.iceberg_hadoop_catalog');
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test', 'iceberg.table_identifier'='$DATABASE.iceberg_hadoop_catalog');
---- RESULTS
'Table has been created.'
====
@@ -200,13 +200,13 @@ CREATE TABLE iceberg_hadoop_cat_drop(
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
CREATE TABLE iceberg_hadoop_cat_query(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
DROP TABLE iceberg_hadoop_cat_drop;
SELECT * FROM iceberg_hadoop_cat_query;
---- TYPES
@@ -217,7 +217,7 @@ string
DESCRIBE FORMATTED iceberg_hadoop_cat_query;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/$DATABASE/iceberg_hadoop_cat_query','NULL'
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','write.format.default','parquet '
'','iceberg.catalog ','hadoop.catalog '
---- TYPES
@@ -229,12 +229,12 @@ CREATE TABLE iceberg_hadoop_cat_with_ident(
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
'iceberg.table_identifier'='org.db.tbl');
DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL'
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','write.format.default','parquet '
'','iceberg.catalog ','hadoop.catalog '
---- TYPES
@@ -247,7 +247,7 @@ INSERT INTO iceberg_hadoop_cat_with_ident values ("ice");
CREATE EXTERNAL TABLE iceberg_hadoop_cat_with_ident_ext
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
'iceberg.table_identifier'='org.db.tbl');
---- RESULTS
'Table has been created.'
@@ -256,7 +256,7 @@ TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident_ext;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL'
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','write.format.default','parquet '
'','iceberg.catalog ','hadoop.catalog '
---- TYPES
@@ -288,7 +288,7 @@ PARTITIONED BY SPEC
)
STORED AS ICEBERG
TBLPROPERTIES('write.format.default'='orc','iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
---- RESULTS
'Table has been created.'
====
@@ -475,7 +475,7 @@ create table ice_part_hadoop_catalog (
col_identity
) stored as iceberg TBLPROPERTIES(
'iceberg.catalog' = 'hadoop.catalog',
'iceberg.catalog_location' = '/$DATABASE/hadoop_catalog_test'
'iceberg.catalog_location' = '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test'
);
---- RESULTS
'Table has been created.'

View File

@@ -75,7 +75,7 @@ STRING,BIGINT,BIGINT
# Use PARTITIONED BY SPEC
CREATE TABLE ice_ctas_hadoop_tables_part PARTITIONED BY SPEC (month(d))
STORED AS ICEBERG
LOCATION '/test-warehouse/$DATABASE.db/loc_test'
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/loc_test'
TBLPROPERTIES ('iceberg.catalog'='hadoop.tables') AS SELECT s, ts, d FROM value_tbl;
SELECT * FROM ice_ctas_hadoop_tables_part where d='2021-02-26';
---- RESULTS
@@ -134,7 +134,7 @@ STRING,BIGINT,BIGINT
CREATE TABLE ice_ctas_hadoop_catalog_part PARTITIONED BY SPEC (truncate(3, s))
STORED AS ICEBERG
TBLPROPERTIES ('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/cat_loc',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc',
'iceberg.table_identifier'='ns1.ns2.ctas')
AS SELECT cast(t as INT), s, d FROM value_tbl;
INSERT INTO ice_ctas_hadoop_catalog_part VALUES (1, 'lion', '2021-02-27');

View File

@@ -102,7 +102,7 @@ select * from ts_iceberg;
create table iceberg_hadoop_cat (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test');
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test');
insert into iceberg_hadoop_cat values (1), (2), (3);
---- RESULTS
: 3
@@ -128,7 +128,7 @@ STRING, STRING, STRING
create table iceberg_hadoop_cat_ti (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test',
'iceberg.table_identifier'='test.custom_db.int_table');
insert into iceberg_hadoop_cat_ti values (1), (2), (3);
---- RESULTS
@@ -170,7 +170,7 @@ INT
# Query external Iceberg table
create external table iceberg_hive_cat_ext (i int)
stored as iceberg
location '/test-warehouse/$DATABASE.db/iceberg_hive_cat'
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog',
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
---- RESULTS
@@ -223,7 +223,7 @@ INT
# Create another external Iceberg table
create external table iceberg_hive_cat_ext_2 (i int)
stored as iceberg
location '/test-warehouse/$DATABASE.db/iceberg_hive_cat'
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog',
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
select * from iceberg_hive_cat_ext_2
@@ -248,7 +248,7 @@ Table does not exist
# Insert into hive catalog with custom location.
create table iceberg_hive_cat_custom_loc (i int)
stored as iceberg
location '/test-warehouse/$DATABASE.db/custom_hive_cat'
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/custom_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog');
insert into iceberg_hive_cat_custom_loc values (1), (2), (3);
---- RESULTS

View File

@@ -37,7 +37,7 @@ AnalysisException: Table is not partitioned: functional_parquet.iceberg_non_part
CREATE TABLE iceberg_table_hadoop_tables_cat_loc(i INT)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.tables',
'iceberg.catalog_location'='/test-warehouse/cat_loc')
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/cat_loc')
---- CATCH
iceberg.catalog_location cannot be set for Iceberg table stored in hadoop.tables
====
@@ -46,7 +46,7 @@ CREATE TABLE iceberg_table_hadoop_catalog(
level STRING
)
STORED AS ICEBERG
LOCATION '/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog');
---- CATCH
AnalysisException: Location cannot be set for Iceberg table with 'hadoop.catalog'.
@@ -63,9 +63,9 @@ AnalysisException: Table property 'iceberg.catalog_location' is necessary for Ic
---- QUERY
CREATE EXTERNAL TABLE iceberg_external_table_hadoop_catalog
STORED AS ICEBERG
LOCATION '/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/fake_table',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/fake_table',
'iceberg.table_identifier'='fake_db.fake_table');
---- CATCH
AnalysisException: Location cannot be set for Iceberg table with 'hadoop.catalog'.
@@ -99,7 +99,7 @@ Table property 'iceberg.table_identifier' is necessary for external Iceberg tabl
CREATE EXTERNAL TABLE fake_iceberg_table_hadoop_catalog
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/fake_table',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/fake_table',
'iceberg.table_identifier'='fake_db.fake_table');
SHOW CREATE TABLE fake_iceberg_table_hadoop_catalog;
---- CATCH
@@ -144,7 +144,7 @@ AnalysisException: The Iceberg table has multiple partition specs.
CREATE TABLE iceberg_hive_cat_with_cat_locaction (i int)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hive.catalog',
'iceberg.catalog_location'='/test-warehouse/catalog_loc')
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog_loc')
---- CATCH
iceberg.catalog_location cannot be set for Iceberg table stored in hive.catalog
====
@@ -152,7 +152,7 @@ iceberg.catalog_location cannot be set for Iceberg table stored in hive.catalog
CREATE TABLE iceberg_hadoop_tables_with_metdata_locaction (i int)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.tables',
'metadata_location'='/test-warehouse/catalog/metadata_loc')
'metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog/metadata_loc')
---- CATCH
metadata_location cannot be set for Iceberg tables
====
@@ -160,8 +160,8 @@ metadata_location cannot be set for Iceberg tables
CREATE TABLE iceberg_hadoop_cat_with_metadata_locaction (i int)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/catalog',
'metadata_location'='/test-warehouse/catalog/metadata_loc')
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog',
'metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog/metadata_loc')
---- CATCH
metadata_location cannot be set for Iceberg tables
====
@@ -169,7 +169,7 @@ metadata_location cannot be set for Iceberg tables
CREATE TABLE iceberg_hive_cat_with_metadata_locaction (i int)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hive.catalog',
'metadata_location'='/test-warehouse/catalog/metadata_loc')
'metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog/metadata_loc')
---- CATCH
metadata_location cannot be set for Iceberg tables
====
@@ -218,7 +218,7 @@ CREATE TABLE iceberg_table_hadoop_catalog(
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test');
ALTER TABLE iceberg_table_hadoop_catalog RENAME TO iceberg_table_hadoop_catalog_new;
---- CATCH
UnsupportedOperationException: Cannot rename Iceberg tables that use 'hadoop.catalog' as catalog.
@@ -254,7 +254,7 @@ ALTER TABLE iceberg_table_hadoop_catalog unset TBLPROPERTIES('iceberg.table_iden
AnalysisException: Unsetting the 'iceberg.table_identifier' table property is not supported for Iceberg table.
====
---- QUERY
ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('metadata_location'='/test-warehouse/metadata_loc');
ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/metadata_loc');
---- CATCH
AnalysisException: Changing the 'metadata_location' table property is not supported for Iceberg table.
====
@@ -264,7 +264,7 @@ ALTER TABLE iceberg_table_hadoop_catalog unset TBLPROPERTIES('metadata_location'
AnalysisException: Unsetting the 'metadata_location' table property is not supported for Iceberg table.
====
---- QUERY
ALTER TABLE iceberg_table_hive_catalog set TBLPROPERTIES('metadata_location'='/test-warehouse/metadata_loc');
ALTER TABLE iceberg_table_hive_catalog set TBLPROPERTIES('metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/metadata_loc');
---- CATCH
AnalysisException: Changing the 'metadata_location' table property is not supported for Iceberg table.
====
@@ -314,7 +314,7 @@ Iceberg tables cannot have Hive ACID table properties.
CREATE TABLE iceberg_insert_only(i int)
STORED AS ICEBERG
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$EXTERNAL_WAREHOUSE_DIR/specified_location',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/$EXTERNAL_WAREHOUSE_DIR/specified_location',
'transactional'='true', 'transactional_properties'='insert_only');
---- CATCH
Iceberg tables cannot have Hive ACID table properties.

View File

@@ -5,7 +5,7 @@ STORED AS ICEBERG
TBLPROPERTIES(
'iceberg.file_format'='orc',
'iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc'
);
ALTER TABLE iceberg_partitioned_orc_external_old_fileformat
@@ -13,7 +13,7 @@ UNSET TBLPROPERTIES IF EXISTS ('write.format.default');
DESCRIBE FORMATTED iceberg_partitioned_orc_external_old_fileformat;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
'','iceberg.file_format','orc '
'','iceberg.catalog ','hadoop.catalog '

View File

@@ -370,7 +370,7 @@ string, string, string
describe formatted hadoop_catalog_test_external;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/hadoop_catalog_test/functional_parquet/hadoop_catalog_test','NULL'
'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/hadoop_catalog_test'
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/hadoop_catalog_test'
'','iceberg.table_identifier','functional_parquet.hadoop_catalog_test'
'','write.format.default','parquet '
'','iceberg.catalog ','hadoop.catalog '
@@ -381,7 +381,7 @@ string, string, string
describe formatted iceberg_partitioned_orc_external;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
'','write.format.default','orc '
'','iceberg.catalog ','hadoop.catalog '
@@ -451,7 +451,7 @@ bigint,bigint
describe formatted iceberg_resolution_test_external;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_resolution_test/functional_parquet/iceberg_resolution_test','NULL'
'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_resolution_test'
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_resolution_test'
'','iceberg.table_identifier','functional_parquet.iceberg_resolution_test'
'','write.format.default','parquet '
'','iceberg.catalog ','hadoop.catalog '

View File

@@ -120,14 +120,14 @@ INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIM
# Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points
# to the default filesystem.
alter table alltypes add partition(year=2010, month=3)
location '/test-warehouse/alltypes_parquet/year=2010/month=3'
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2010/month=3'
---- RESULTS
====
---- QUERY
# Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points
# to the default filesystem.
alter table alltypes add partition(year=2010, month=4)
location '/test-warehouse/alltypes_parquet/year=2010/month=4'
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2010/month=4'
---- RESULTS
====
---- QUERY
@@ -187,7 +187,7 @@ INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIM
# Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points
# to the default filesystem.
alter table alltypes add partition(year=2009, month=5)
location '/test-warehouse/alltypes_parquet/year=2009/month=5'
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2009/month=5'
---- RESULTS
====
---- QUERY
@@ -199,7 +199,7 @@ location '$SECONDARY_FILESYSTEM/multi_fs_tests/$DATABASE.db/alltypes_parquet/yea
# Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points
# to the default filesystem.
alter table alltypes add partition(year=2010, month=5)
location '/test-warehouse/alltypes_parquet/year=2010/month=5'
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2010/month=5'
---- RESULTS
====
---- QUERY
@@ -306,7 +306,7 @@ year=2009/month=1: 310
# ADD PARTITION on the default filesystem.
# Point to unique database so we don't overwrite someone else's data.
alter table alltypes_multipart_insert add partition(year=2009, month=2)
location '/test-warehouse/$DATABASE.db/alltypes_multipart_insert/year=2009/month=2'
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/alltypes_multipart_insert/year=2009/month=2'
---- RESULTS
====
---- QUERY

View File

@@ -12,7 +12,7 @@ PARTITIONED BY (
float_col FLOAT,
double_col DOUBLE,
string_col STRING
) LOCATION '/test-warehouse/$DATABASE.db/all_insert_partition_col_types'
) LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/all_insert_partition_col_types'
====
---- QUERY
DESCRIBE all_insert_partition_col_types
@@ -193,13 +193,13 @@ STRING, STRING, STRING
ALTER TABLE all_partition_col_types
ADD PARTITION (bool_col=FALSE, tinyint_col=1, smallint_col=1, int_col=1,
bigint_col=10, float_col=0, double_col=1.1, string_col='1')
LOCATION '/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=1/smallint_col=1/int_col=1/bigint_col=10/float_col=0/double_col=1.1/string_col=1/'
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=1/smallint_col=1/int_col=1/bigint_col=10/float_col=0/double_col=1.1/string_col=1/'
====
---- QUERY
ALTER TABLE all_partition_col_types
ADD PARTITION (bool_col=TRUE, tinyint_col=2, smallint_col=2, int_col=2,
bigint_col=20, float_col=0, double_col=1.1, string_col='2')
LOCATION '/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=2/smallint_col=2/int_col=2/bigint_col=20/float_col=0/double_col=1.1/string_col=2/'
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=2/smallint_col=2/int_col=2/bigint_col=20/float_col=0/double_col=1.1/string_col=2/'
====
---- QUERY
EXPLAIN

View File

@@ -652,7 +652,7 @@ TBLPROPERTIES('write.format.default'='parquet',
'write.parquet.page-size-bytes'='65536',
'write.parquet.dict-size-bytes'='131072',
'iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test')
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test')
---- RESULTS-HIVE-3
CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test2 (
level STRING NULL
@@ -667,7 +667,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE',
'write.parquet.dict-size-bytes'='131072',
'engine.hive.enabled'='true',
'iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test')
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test')
====
---- CREATE_TABLE
CREATE TABLE iceberg_test3 (
@@ -681,7 +681,7 @@ TBLPROPERTIES('write.format.default'='parquet',
'write.parquet.page-size-bytes'='65536',
'write.parquet.dict-size-bytes'='131072',
'iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test',
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test',
'iceberg.table_identifier'='org.my_db.my_table')
---- RESULTS-HIVE-3
CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test3 (
@@ -697,7 +697,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE',
'write.parquet.dict-size-bytes'='131072',
'engine.hive.enabled'='true',
'iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test',
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test',
'iceberg.table_identifier'='org.my_db.my_table')
====
---- CREATE_TABLE
@@ -725,7 +725,7 @@ TBLPROPERTIES('write.format.default'='parquet',
'write.parquet.page-size-bytes'='65536',
'write.parquet.dict-size-bytes'='131072',
'iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test')
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test')
---- RESULTS-HIVE-3
CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test1_partitioned (
level STRING NULL,
@@ -753,7 +753,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE',
'write.parquet.dict-size-bytes'='131072',
'engine.hive.enabled'='true',
'iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test')
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test')
====
---- CREATE_TABLE
CREATE TABLE iceberg_test_orc (

View File

@@ -32,6 +32,7 @@ from tests.common.test_dimensions import (create_client_protocol_dimension,
create_exec_option_dimension, create_orc_dimension)
from tests.util.hdfs_util import NAMENODE
from tests.util.calculation_util import get_random_id
from tests.util.filesystem_utils import WAREHOUSE_PREFIX
ADMIN = "admin"
RANGER_AUTH = ("admin", "admin")
@@ -285,7 +286,7 @@ class TestRanger(CustomClusterTestSuite):
admin_client.execute("revoke {0} on server from user {1}".format(privilege, user))
def _test_show_grant_basic(self, admin_client, kw, id, unique_database, unique_table):
uri = '/tmp'
uri = WAREHOUSE_PREFIX + '/tmp'
try:
# Grant server privileges and verify
admin_client.execute("grant all on server to {0} {1}".format(kw, id), user=ADMIN)
@@ -306,7 +307,7 @@ class TestRanger(CustomClusterTestSuite):
result = self.client.execute("show grant {0} {1} on uri '{2}'"
.format(kw, id, uri))
TestRanger._check_privileges(result, [
[kw, id, "", "", "", "{0}{1}".format(NAMENODE, uri), "", "all", "false"]])
[kw, id, "", "", "", "{0}{1}".format(NAMENODE, '/tmp'), "", "all", "false"]])
# Revoke uri privileges and verify
admin_client.execute("revoke all on uri '{0}' from {1} {2}"

View File

@@ -21,9 +21,10 @@
import os
import re
import tempfile
from subprocess import check_call
from tests.util.filesystem_utils import get_fs_path
from tests.util.filesystem_utils import get_fs_path, WAREHOUSE_PREFIX
def create_iceberg_table_from_directory(impala_client, unique_database, table_name,
@@ -38,6 +39,15 @@ def create_iceberg_table_from_directory(impala_client, unique_database, table_na
os.environ['IMPALA_HOME'], 'testdata/data/iceberg_test/{0}'.format(table_name))
assert os.path.isdir(local_dir)
# If using a prefix, rewrite iceberg metadata to use the prefix
if WAREHOUSE_PREFIX:
tmp_dir = tempfile.mktemp(table_name)
check_call(['cp', '-r', local_dir, tmp_dir])
rewrite = os.path.join(
os.environ['IMPALA_HOME'], 'testdata/bin/rewrite-iceberg-metadata.py')
check_call([rewrite, WAREHOUSE_PREFIX, os.path.join(tmp_dir, 'metadata')])
local_dir = tmp_dir
# Put the directory in the database's directory (not the table directory)
hdfs_parent_dir = get_fs_path("/test-warehouse")
@@ -47,7 +57,7 @@ def create_iceberg_table_from_directory(impala_client, unique_database, table_na
check_call(['hdfs', 'dfs', '-rm', '-f', '-r', hdfs_dir])
# Note: -d skips a staging copy
check_call(['hdfs', 'dfs', '-put', '-d', local_dir, hdfs_parent_dir])
check_call(['hdfs', 'dfs', '-put', '-d', local_dir, hdfs_dir])
# Create external table
qualified_table_name = '{0}.{1}'.format(unique_database, table_name)

View File

@@ -491,7 +491,8 @@ class ImpalaTestSuite(BaseTestSuite):
"MANAGED_WAREHOUSE_DIR",
"EXTERNAL_WAREHOUSE_DIR"])
repl.update({
'$SECONDARY_FILESYSTEM': os.environ.get("SECONDARY_FILESYSTEM", ""),
'$SECONDARY_FILESYSTEM': os.getenv("SECONDARY_FILESYSTEM", ""),
'$WAREHOUSE_LOCATION_PREFIX': os.getenv("WAREHOUSE_LOCATION_PREFIX", ""),
'$USER': getuser()})
if use_db:

View File

@@ -25,6 +25,7 @@ import tempfile
from impala_py_lib.helpers import find_all_files, is_core_dump
from tests.common.file_utils import assert_file_in_dir_contains
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.util.filesystem_utils import get_fs_path
LOG = logging.getLogger('test_startup_filesystem_checks')
@@ -38,12 +39,19 @@ class TestStartupFilesystemChecks(CustomClusterTestSuite):
setup_method().
"""
NONEXISTENT_PATH = "/nonexistent_path"
NONDIRECTORY_PATH = "/test-warehouse/alltypes/year=2009/month=1/090101.txt"
VALID_SUBDIRECTORY = "/test-warehouse"
# Use get_fs_path because testdata in Ozone requires a volume prefix and does not
# accept underscore as a bucket name (the first element after volume prefix).
NONEXISTENT_PATH = get_fs_path("/nonexistent-path")
NONDIRECTORY_PATH = \
get_fs_path("/test-warehouse/alltypes/year=2009/month=1/090101.txt")
VALID_SUBDIRECTORY = get_fs_path("/test-warehouse")
# Test multiple valid directories along with an empty entry
MULTIPLE_VALID_DIRECTORIES = \
"/,/test-warehouse/zipcode_incomes,,/test-warehouse/alltypes"
MULTIPLE_VALID_DIRECTORIES = ",".join([
"/",
get_fs_path("/test-warehouse/zipcode_incomes"),
"",
get_fs_path("/test-warehouse/alltypes")]
)
LOG_DIR = tempfile.mkdtemp(prefix="test_startup_filesystem_checks_",
dir=os.getenv("LOG_DIR"))
MINIDUMP_PATH = tempfile.mkdtemp()

View File

@@ -38,6 +38,7 @@ from tests.common.test_vector import ImpalaTestDimension
from tests.util.filesystem_utils import (
get_fs_path,
WAREHOUSE,
WAREHOUSE_PREFIX,
IS_HDFS,
IS_S3,
IS_ADLS,
@@ -47,8 +48,11 @@ from tests.common.impala_cluster import ImpalaCluster
from tests.util.filesystem_utils import FILESYSTEM_PREFIX
TRASH_PATH = ('.Trash/{0}/Current' if IS_OZONE else 'user/{0}/.Trash/Current').\
format(getpass.getuser())
def get_trash_path(bucket, path):
if IS_OZONE:
return get_fs_path('/{0}/.Trash/{1}/Current{2}/{0}/{3}'.format(bucket,
getpass.getuser(), WAREHOUSE_PREFIX, path))
return '/user/{0}/.Trash/Current/{1}/{2}'.format(getpass.getuser(), bucket, path)
# Validates DDL statements (create, drop)
class TestDdlStatements(TestDdlBase):
@@ -59,20 +63,16 @@ class TestDdlStatements(TestDdlBase):
self.client.execute("create table {0}.t1(i int)".format(unique_database))
self.client.execute("create table {0}.t2(i int)".format(unique_database))
# Create sample test data files under the table directories
self.filesystem_client.create_file("test-warehouse/{0}.db/t1/t1.txt".\
format(unique_database), file_data='t1')
self.filesystem_client.create_file("test-warehouse/{0}.db/t2/t2.txt".\
format(unique_database), file_data='t2')
dbpath = "{0}/{1}.db".format(WAREHOUSE, unique_database)
self.filesystem_client.create_file("{}/t1/t1.txt".format(dbpath), file_data='t1')
self.filesystem_client.create_file("{}/t2/t2.txt".format(dbpath), file_data='t2')
# Drop the table (without purge) and make sure it exists in trash
self.client.execute("drop table {0}.t1".format(unique_database))
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/t1.txt".\
format(unique_database))
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/".\
format(unique_database))
assert self.filesystem_client.exists(
'{0}/test-warehouse/{1}.db/t1/t1.txt'.format(TRASH_PATH, unique_database))
assert self.filesystem_client.exists(
'{0}/test-warehouse/{1}.db/t1'.format(TRASH_PATH, unique_database))
assert not self.filesystem_client.exists("{}/t1/t1.txt".format(dbpath))
assert not self.filesystem_client.exists("{}/t1/".format(dbpath))
trash = get_trash_path("test-warehouse", unique_database + ".db")
assert self.filesystem_client.exists("{}/t1/t1.txt".format(trash))
assert self.filesystem_client.exists("{}/t1".format(trash))
# Drop the table (with purge) and make sure it doesn't exist in trash
self.client.execute("drop table {0}.t2 purge".format(unique_database))
if not IS_S3 and not IS_ADLS:
@@ -82,27 +82,20 @@ class TestDdlStatements(TestDdlBase):
# consistent.
# The ADLS Python client is not strongly consistent, so these files may still be
# visible after a DROP. (Remove after IMPALA-5335 is resolved)
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t2/".\
format(unique_database))
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t2/t2.txt".\
format(unique_database))
assert not self.filesystem_client.exists(
'{0}/test-warehouse/{1}.db/t2/t2.txt'.format(TRASH_PATH, unique_database))
assert not self.filesystem_client.exists(
'{0}/test-warehouse/{1}.db/t2'.format(TRASH_PATH, unique_database))
assert not self.filesystem_client.exists("{}/t2/".format(dbpath))
assert not self.filesystem_client.exists("{}/t2/t2.txt".format(dbpath))
assert not self.filesystem_client.exists("{}/t2/t2.txt".format(trash))
assert not self.filesystem_client.exists("{}/t2".format(trash))
# Create an external table t3 and run the same test as above. Make
# sure the data is not deleted
self.filesystem_client.make_dir(
"test-warehouse/{0}.db/data_t3/".format(unique_database), permission=777)
self.filesystem_client.make_dir("{}/data_t3/".format(dbpath), permission=777)
self.filesystem_client.create_file(
"test-warehouse/{0}.db/data_t3/data.txt".format(unique_database), file_data='100')
"{}/data_t3/data.txt".format(dbpath), file_data='100')
self.client.execute("create external table {0}.t3(i int) stored as "
"textfile location \'/test-warehouse/{0}.db/data_t3\'" .format(unique_database))
"textfile location \'{1}/data_t3\'".format(unique_database, dbpath))
self.client.execute("drop table {0}.t3 purge".format(unique_database))
assert self.filesystem_client.exists(
"test-warehouse/{0}.db/data_t3/data.txt".format(unique_database))
self.filesystem_client.delete_file_dir(
"test-warehouse/{0}.db/data_t3".format(unique_database), recursive=True)
assert self.filesystem_client.exists("{}/data_t3/data.txt".format(dbpath))
self.filesystem_client.delete_file_dir("{}/data_t3".format(dbpath), recursive=True)
@SkipIfFS.eventually_consistent
@SkipIfLocal.hdfs_client
@@ -110,24 +103,24 @@ class TestDdlStatements(TestDdlBase):
self.client.execute('use default')
# Verify the db directory exists
assert self.filesystem_client.exists(
"test-warehouse/{0}.db/".format(unique_database))
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
self.client.execute("create table {0}.t1(i int)".format(unique_database))
# Verify the table directory exists
assert self.filesystem_client.exists(
"test-warehouse/{0}.db/t1/".format(unique_database))
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))
# Dropping the table removes the table's directory and preserves the db's directory
self.client.execute("drop table {0}.t1".format(unique_database))
assert not self.filesystem_client.exists(
"test-warehouse/{0}.db/t1/".format(unique_database))
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))
assert self.filesystem_client.exists(
"test-warehouse/{0}.db/".format(unique_database))
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
# Dropping the db removes the db's directory
self.client.execute("drop database {0}".format(unique_database))
assert not self.filesystem_client.exists(
"test-warehouse/{0}.db/".format(unique_database))
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
# Dropping the db using "cascade" removes all tables' and db's directories
# but keeps the external tables' directory
@@ -138,17 +131,17 @@ class TestDdlStatements(TestDdlBase):
"location '{1}/{0}/t3/'".format(unique_database, WAREHOUSE))
self.client.execute("drop database {0} cascade".format(unique_database))
assert not self.filesystem_client.exists(
"test-warehouse/{0}.db/".format(unique_database))
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
assert not self.filesystem_client.exists(
"test-warehouse/{0}.db/t1/".format(unique_database))
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))
assert not self.filesystem_client.exists(
"test-warehouse/{0}.db/t2/".format(unique_database))
"{1}/{0}.db/t2/".format(unique_database, WAREHOUSE))
assert self.filesystem_client.exists(
"test-warehouse/{0}/t3/".format(unique_database))
"{1}/{0}/t3/".format(unique_database, WAREHOUSE))
self.filesystem_client.delete_file_dir(
"test-warehouse/{0}/t3/".format(unique_database), recursive=True)
"{1}/{0}/t3/".format(unique_database, WAREHOUSE), recursive=True)
assert not self.filesystem_client.exists(
"test-warehouse/{0}/t3/".format(unique_database))
"{1}/{0}/t3/".format(unique_database, WAREHOUSE))
# Re-create database to make unique_database teardown succeed.
self._create_db(unique_database)
@@ -157,12 +150,12 @@ class TestDdlStatements(TestDdlBase):
def test_truncate_cleans_hdfs_files(self, unique_database):
# Verify the db directory exists
assert self.filesystem_client.exists(
"test-warehouse/{0}.db/".format(unique_database))
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
self.client.execute("create table {0}.t1(i int)".format(unique_database))
# Verify the table directory exists
assert self.filesystem_client.exists(
"test-warehouse/{0}.db/t1/".format(unique_database))
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))
try:
# If we're testing S3, we want the staging directory to be created.
@@ -170,31 +163,31 @@ class TestDdlStatements(TestDdlBase):
# Should have created one file in the table's dir
self.client.execute("insert into {0}.t1 values (1)".format(unique_database))
assert len(self.filesystem_client.ls(
"test-warehouse/{0}.db/t1/".format(unique_database))) == 2
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 2
# Truncating the table removes the data files and preserves the table's directory
self.client.execute("truncate table {0}.t1".format(unique_database))
assert len(self.filesystem_client.ls(
"test-warehouse/{0}.db/t1/".format(unique_database))) == 1
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 1
self.client.execute(
"create table {0}.t2(i int) partitioned by (p int)".format(unique_database))
# Verify the table directory exists
assert self.filesystem_client.exists(
"test-warehouse/{0}.db/t2/".format(unique_database))
"{1}/{0}.db/t2/".format(unique_database, WAREHOUSE))
# Should have created the partition dir, which should contain exactly one file
self.client.execute(
"insert into {0}.t2 partition(p=1) values (1)".format(unique_database))
assert len(self.filesystem_client.ls(
"test-warehouse/{0}.db/t2/p=1".format(unique_database))) == 1
"{1}/{0}.db/t2/p=1".format(unique_database, WAREHOUSE))) == 1
# Truncating the table removes the data files and preserves the partition's directory
self.client.execute("truncate table {0}.t2".format(unique_database))
assert self.filesystem_client.exists(
"test-warehouse/{0}.db/t2/p=1".format(unique_database))
"{1}/{0}.db/t2/p=1".format(unique_database, WAREHOUSE))
assert len(self.filesystem_client.ls(
"test-warehouse/{0}.db/t2/p=1".format(unique_database))) == 0
"{1}/{0}.db/t2/p=1".format(unique_database, WAREHOUSE))) == 0
finally:
# Reset to its default value.
self.client.execute("set s3_skip_insert_staging=true")
@@ -455,11 +448,10 @@ class TestDdlStatements(TestDdlBase):
# use the (key=value) format. The directory is automatically cleanup up
# by the unique_database fixture.
self.client.execute("create table {0}.part_data (i int)".format(unique_database))
assert self.filesystem_client.exists(
"test-warehouse/{0}.db/part_data".format(unique_database))
dbpath = "{1}/{0}.db".format(unique_database, WAREHOUSE)
assert self.filesystem_client.exists("{}/part_data".format(dbpath))
self.filesystem_client.create_file(
"test-warehouse/{0}.db/part_data/data.txt".format(unique_database),
file_data='1984')
"{}/part_data/data.txt".format(dbpath), file_data='1984')
self.run_test_case('QueryTest/alter-table', vector, use_db=unique_database,
multiple_impalad=self._use_multiple_impalad(vector))
@@ -483,20 +475,16 @@ class TestDdlStatements(TestDdlBase):
# Add two partitions (j=1) and (j=2) to table t1
self.client.execute("alter table {0}.t1 add partition(j=1)".format(unique_database))
self.client.execute("alter table {0}.t1 add partition(j=2)".format(unique_database))
self.filesystem_client.create_file(\
"test-warehouse/{0}.db/t1/j=1/j1.txt".format(unique_database), file_data='j1')
self.filesystem_client.create_file(\
"test-warehouse/{0}.db/t1/j=2/j2.txt".format(unique_database), file_data='j2')
dbpath = "{1}/{0}.db".format(unique_database, WAREHOUSE)
self.filesystem_client.create_file("{}/t1/j=1/j1.txt".format(dbpath), file_data='j1')
self.filesystem_client.create_file("{}/t1/j=2/j2.txt".format(dbpath), file_data='j2')
# Drop the partition (j=1) without purge and make sure it exists in trash
self.client.execute("alter table {0}.t1 drop partition(j=1)".format(unique_database))
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".\
format(unique_database))
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=1".\
format(unique_database))
assert self.filesystem_client.exists(
'{0}/test-warehouse/{1}.db/t1/j=1/j1.txt'.format(TRASH_PATH, unique_database))
assert self.filesystem_client.exists(
'{0}/test-warehouse/{1}.db/t1/j=1'.format(TRASH_PATH, unique_database))
assert not self.filesystem_client.exists("{}/t1/j=1/j1.txt".format(dbpath))
assert not self.filesystem_client.exists("{}/t1/j=1".format(dbpath))
trash = get_trash_path("test-warehouse", unique_database + ".db")
assert self.filesystem_client.exists('{}/t1/j=1/j1.txt'.format(trash))
assert self.filesystem_client.exists('{}/t1/j=1'.format(trash))
# Drop the partition (with purge) and make sure it doesn't exist in trash
self.client.execute("alter table {0}.t1 drop partition(j=2) purge".\
format(unique_database));
@@ -507,14 +495,10 @@ class TestDdlStatements(TestDdlBase):
# consistent.
# The ADLS Python client is not strongly consistent, so these files may still be
# visible after a DROP. (Remove after IMPALA-5335 is resolved)
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=2/j2.txt".\
format(unique_database))
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=2".\
format(unique_database))
assert not self.filesystem_client.exists(
'{0}/test-warehouse/{1}.db/t1/j=2/j2.txt'.format(TRASH_PATH, unique_database))
assert not self.filesystem_client.exists(
'{0}/test-warehouse/{1}.db/t1/j=2'.format(TRASH_PATH, unique_database))
assert not self.filesystem_client.exists("{}/t1/j=2/j2.txt".format(dbpath))
assert not self.filesystem_client.exists("{}/t1/j=2".format(dbpath))
assert not self.filesystem_client.exists('{}/t1/j=2/j2.txt'.format(trash))
assert not self.filesystem_client.exists('{}/t1/j=2'.format(trash))
@UniqueDatabase.parametrize(sync_ddl=True)
def test_views_ddl(self, vector, unique_database):
@@ -751,8 +735,8 @@ class TestDdlStatements(TestDdlBase):
tbl_name = "test_tbl"
self.execute_query_expect_success(self.client, "create table {0}.{1} (c1 string)"
.format(unique_database, tbl_name))
self.filesystem_client.create_file("test-warehouse/{0}.db/{1}/f".
format(unique_database, tbl_name),
self.filesystem_client.create_file("{2}/{0}.db/{1}/f".
format(unique_database, tbl_name, WAREHOUSE),
file_data="\nfoo\n")
self.execute_query_expect_success(self.client,
"alter table {0}.{1} set tblproperties"

View File

@@ -210,8 +210,8 @@ class TestExplainEmptyPartition(ImpalaTestSuite):
"ALTER TABLE %s.empty_partition ADD PARTITION (p=NULL)" % self.TEST_DB_NAME)
# Put an empty file in the partition so we have > 0 files, but 0 rows
self.filesystem_client.create_file(
"test-warehouse/%s.db/empty_partition/p=__HIVE_DEFAULT_PARTITION__/empty" %
self.TEST_DB_NAME, "")
"{1}/{0}.db/empty_partition/p=__HIVE_DEFAULT_PARTITION__/empty".
format(self.TEST_DB_NAME, WAREHOUSE), "")
self.client.execute("REFRESH %s.empty_partition" % self.TEST_DB_NAME)
self.client.execute("COMPUTE STATS %s.empty_partition" % self.TEST_DB_NAME)
assert "NULL\t0\t1" in str(
@@ -224,8 +224,8 @@ class TestExplainEmptyPartition(ImpalaTestSuite):
# that its lack of stats is correctly identified
self.client.execute(
"ALTER TABLE %s.empty_partition ADD PARTITION (p=1)" % self.TEST_DB_NAME)
self.filesystem_client.create_file("test-warehouse/%s.db/empty_partition/p=1/rows" %
self.TEST_DB_NAME, "1")
self.filesystem_client.create_file(
"{1}/{0}.db/empty_partition/p=1/rows".format(self.TEST_DB_NAME, WAREHOUSE), "1")
self.client.execute("REFRESH %s.empty_partition" % self.TEST_DB_NAME)
explain_result = str(
self.client.execute("EXPLAIN SELECT * FROM %s.empty_partition" % self.TEST_DB_NAME))

View File

@@ -28,13 +28,13 @@ from tests.common.test_dimensions import (
create_uncompressed_text_dimension)
from tests.common.skip import SkipIfLocal
from tests.common.test_vector import ImpalaTestDimension
from tests.util.filesystem_utils import (WAREHOUSE)
from tests.util.filesystem_utils import WAREHOUSE
TEST_TBL_PART = "test_load"
TEST_TBL_NOPART = "test_load_nopart"
STAGING_PATH = 'test-warehouse/test_load_staging'
ALLTYPES_PATH = "test-warehouse/alltypes/year=2010/month=1/100101.txt"
MULTIAGG_PATH = 'test-warehouse/alltypesaggmultifiles/year=2010/month=1/day=1'
STAGING_PATH = '%s/test_load_staging' % WAREHOUSE
ALLTYPES_PATH = "%s/alltypes/year=2010/month=1/100101.txt" % WAREHOUSE
MULTIAGG_PATH = '%s/alltypesaggmultifiles/year=2010/month=1/day=1' % WAREHOUSE
HIDDEN_FILES = ["{0}/3/.100101.txt".format(STAGING_PATH),
"{0}/3/_100101.txt".format(STAGING_PATH)]
@@ -155,7 +155,7 @@ class TestAsyncLoadData(ImpalaTestSuite):
# Create a table with the staging path
self.client.execute("create table {0} like functional.alltypesnopart \
location \'/{1}\'".format(qualified_table_name, staging_path))
location \'{1}\'".format(qualified_table_name, staging_path))
try:
@@ -171,7 +171,7 @@ class TestAsyncLoadData(ImpalaTestSuite):
enable_async_load_data
delay = "CRS_DELAY_BEFORE_LOAD_DATA:SLEEP@3000"
new_vector.get_value('exec_option')['debug_action'] = "{0}".format(delay)
load_stmt = "load data inpath \'/{1}\' \
load_stmt = "load data inpath \'{1}\' \
into table {0}".format(qualified_table_name, staging_path)
exec_start = time.time()
handle = self.execute_query_async_using_client(client, load_stmt, new_vector)

View File

@@ -55,7 +55,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
create_uncompressed_text_dimension(cls.get_workload()))
def __get_fs_location(self, db_name, table_name):
return 'test-warehouse/%s.db/%s/' % (db_name, table_name)
return '%s/%s.db/%s/' % (WAREHOUSE, db_name, table_name)
@SkipIfLocal.hdfs_client
def test_recover_partitions(self, vector, unique_database):

View File

@@ -86,36 +86,33 @@ class TestRecursiveListing(ImpalaTestSuite):
fq_tbl_name, part_path = self._init_test_table(unique_database, partitioned)
# Add a file inside a nested directory and refresh.
self.filesystem_client.make_dir("{0}/dir1".format(part_path[1:]))
self.filesystem_client.create_file("{0}/dir1/file1.txt".format(part_path[1:]),
"file1")
self.filesystem_client.make_dir("{0}/dir1".format(part_path))
self.filesystem_client.create_file("{0}/dir1/file1.txt".format(part_path), "file1")
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
assert len(self._show_files(fq_tbl_name)) == 1
assert len(self._get_rows(fq_tbl_name)) == 1
# Add another file inside the same directory, make sure it shows up.
self.filesystem_client.create_file("{0}/dir1/file2.txt".format(part_path[1:]),
"file2")
self.filesystem_client.create_file("{0}/dir1/file2.txt".format(part_path), "file2")
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
assert len(self._show_files(fq_tbl_name)) == 2
assert len(self._get_rows(fq_tbl_name)) == 2
# Add a file at the top level, make sure it shows up.
self.filesystem_client.create_file("{0}/file3.txt".format(part_path[1:]),
"file3")
self.filesystem_client.create_file("{0}/file3.txt".format(part_path), "file3")
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
assert len(self._show_files(fq_tbl_name)) == 3
assert len(self._get_rows(fq_tbl_name)) == 3
# Create files in the nested hidden directories and refresh. Make sure it does not
# show up
self.filesystem_client.make_dir("{0}/.hive-staging".format(part_path[1:]))
self.filesystem_client.make_dir("{0}/.hive-staging".format(part_path))
self.filesystem_client.create_file(
"{0}/.hive-staging/file3.txt".format(part_path[1:]),
"{0}/.hive-staging/file3.txt".format(part_path),
"data-should-be-ignored-by-impala")
self.filesystem_client.make_dir("{0}/_tmp.base_000000_1".format(part_path[1:]))
self.filesystem_client.make_dir("{0}/_tmp.base_000000_1".format(part_path))
self.filesystem_client.create_file(
"{0}/_tmp.base_000000_1/000000_0.manifest".format(part_path[1:]),
"{0}/_tmp.base_000000_1/000000_0.manifest".format(part_path),
"manifest-file_contents")
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
assert len(self._show_files(fq_tbl_name)) == 3
@@ -135,8 +132,7 @@ class TestRecursiveListing(ImpalaTestSuite):
assert len(self._get_rows(fq_tbl_name)) == 3
# Remove the dir with two files. One should remain.
self.filesystem_client.delete_file_dir("{0}/dir1".format(part_path[1:]),
recursive=True)
self.filesystem_client.delete_file_dir("{0}/dir1".format(part_path), recursive=True)
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
assert len(self._show_files(fq_tbl_name)) == 1
assert len(self._get_rows(fq_tbl_name)) == 1
@@ -201,18 +197,15 @@ class TestRecursiveListing(ImpalaTestSuite):
response = requests.get(self.enable_fs_tracing_url)
assert response.status_code == requests.codes.ok
try:
# self.filesystem_client is a DelegatingHdfsClient. It delegates delete_file_dir()
# and make_dir() to the underlying PyWebHdfsClient which expects the HDFS path
# without a leading '/'. So we use large_dir[1:] to remove the leading '/'.
self.filesystem_client.delete_file_dir(large_dir[1:], recursive=True)
self.filesystem_client.make_dir(large_dir[1:])
self.filesystem_client.delete_file_dir(large_dir, recursive=True)
self.filesystem_client.make_dir(large_dir)
self.filesystem_client.touch(files)
LOG.info("created staging files under " + large_dir)
handle = self.execute_query_async(refresh_stmt)
# Wait a moment to let REFRESH finish expected partial listing on the dir.
time.sleep(pause_ms_before_file_cleanup / 1000.0)
LOG.info("removing staging dir " + large_dir)
self.filesystem_client.delete_file_dir(large_dir[1:], recursive=True)
self.filesystem_client.delete_file_dir(large_dir, recursive=True)
LOG.info("removed staging dir " + large_dir)
try:
self.client.fetch(refresh_stmt, handle)

View File

@@ -24,6 +24,7 @@ from tests.common.skip import SkipIf, SkipIfHive2
from tests.common.test_dimensions import create_uncompressed_text_dimension
from tests.util.test_file_parser import QueryTestSectionReader, remove_comments
from tests.common.environ import HIVE_MAJOR_VERSION
from tests.util.filesystem_utils import WAREHOUSE
# The purpose of the show create table tests are to ensure that the "SHOW CREATE TABLE"
@@ -105,7 +106,7 @@ class TestShowCreateTable(ImpalaTestSuite):
if not test_case.existing_table:
# create table in Impala
self.__exec(test_case.create_table_sql)
self.__exec(self.__replace_warehouse(test_case.create_table_sql))
# execute "SHOW CREATE TABLE ..."
result = self.__exec(test_case.show_create_table_sql)
create_table_result = self.__normalize(result.data[0])
@@ -115,9 +116,9 @@ class TestShowCreateTable(ImpalaTestSuite):
self.__exec(test_case.drop_table_sql)
# check the result matches the expected result
expected_result = self.__normalize(self.__replace_uri(
expected_result = self.__normalize(self.__replace_warehouse(self.__replace_uri(
test_case.expected_result,
self.__get_location_uri(create_table_result)))
self.__get_location_uri(create_table_result))))
self.__compare_result(expected_result, create_table_result)
if test_case.existing_table:
@@ -205,6 +206,9 @@ class TestShowCreateTable(ImpalaTestSuite):
def __replace_uri(self, s, uri):
return s if uri is None else s.replace("$$location_uri$$", uri)
def __replace_warehouse(self, s):
return s.replace("$$warehouse$$", WAREHOUSE)
# Represents one show-create-table test case. Performs validation of the test sections
# and provides SQL to execute for each section.

View File

@@ -40,7 +40,7 @@ class TestTestcaseBuilder(ImpalaTestSuite):
tmp_path = get_fs_path("/tmp")
# Make sure /tmp dir exists
if not self.filesystem_client.exists(tmp_path):
self.filesystem_client.make_dir('tmp')
self.filesystem_client.make_dir(tmp_path)
# Generate Testcase Data for query without table reference
testcase_generate_query = """COPY TESTCASE TO '%s' SELECT 5 * 20""" % tmp_path
result = self.execute_query_expect_success(self.client, testcase_generate_query)
@@ -48,7 +48,7 @@ class TestTestcaseBuilder(ImpalaTestSuite):
# Check file exists
testcase_path = str(result.data)[1: -1]
index = testcase_path.index('/tmp')
index = testcase_path.index(tmp_path)
hdfs_path = testcase_path[index:-1]
assert self.filesystem_client.exists(hdfs_path), \
"File not generated {0}".format(hdfs_path)

View File

@@ -27,7 +27,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.test_dimensions import create_single_exec_option_dimension
from tests.common.test_result_verifier import verify_query_result_is_equal
from tests.common.test_vector import ImpalaTestDimension
from tests.util.filesystem_utils import get_fs_path
from tests.util.filesystem_utils import get_fs_path, WAREHOUSE
# (file extension, table suffix) pairs
@@ -65,14 +65,14 @@ class TestCompressedFormatsBase(ImpalaTestSuite):
new table. Unless expected_error is set, it expects the query to run successfully.
"""
# Calculate locations for the source table
base_dir = '/test-warehouse'
base_dir = WAREHOUSE
src_table = "functional{0}.{1}".format(db_suffix, table_name)
src_table_dir = join(base_dir, table_name + db_suffix)
file_basename = self.filesystem_client.ls(src_table_dir)[0]
src_file = join(src_table_dir, file_basename)
# Calculate locations for the destination table
dest_table_dir = "/test-warehouse/{0}.db/{1}".format(unique_database, table_name)
dest_table_dir = "{2}/{0}.db/{1}".format(unique_database, table_name, WAREHOUSE)
dest_table = "{0}.{1}".format(unique_database, table_name)
dest_file = join(dest_table_dir, file_basename + dest_extension)
@@ -240,7 +240,7 @@ class TestLargeCompressedFile(ImpalaTestSuite):
# is generated from a string of 50176 bytes.
payload_size = 50176
hdfs_cat = subprocess.Popen(["hadoop", "fs", "-cat",
"/test-warehouse/compressed_payload.snap"], stdout=subprocess.PIPE)
"%s/compressed_payload.snap" % WAREHOUSE], stdout=subprocess.PIPE)
compressed_payload = hdfs_cat.stdout.read()
compressed_size = len(compressed_payload)
hdfs_cat.stdout.close()

View File

@@ -17,6 +17,7 @@
import getpass
import grp
import os
import pwd
import pytest
import re
@@ -25,7 +26,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.parametrize import UniqueDatabase
from tests.common.skip import (SkipIfFS, SkipIfLocal, SkipIfDockerizedCluster,
SkipIfCatalogV2)
from tests.util.filesystem_utils import WAREHOUSE, get_fs_path, IS_S3
from tests.util.filesystem_utils import WAREHOUSE, IS_S3
@SkipIfLocal.hdfs_client
@@ -52,8 +53,8 @@ class TestInsertBehaviour(ImpalaTestSuite):
@pytest.mark.execute_serially
def test_insert_removes_staging_files(self):
TBL_NAME = "insert_overwrite_nopart"
insert_staging_dir = ("test-warehouse/functional.db/%s/"
"_impala_insert_staging" % TBL_NAME)
insert_staging_dir = ("%s/functional.db/%s/"
"_impala_insert_staging" % (WAREHOUSE, TBL_NAME))
self.filesystem_client.delete_file_dir(insert_staging_dir, recursive=True)
self.client.execute("INSERT OVERWRITE functional.%s "
"SELECT int_col FROM functional.tinyinttable" % TBL_NAME)
@@ -64,7 +65,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
def test_insert_preserves_hidden_files(self):
"""Test that INSERT OVERWRITE preserves hidden files in the root table directory"""
TBL_NAME = "insert_overwrite_nopart"
table_dir = "test-warehouse/functional.db/%s/" % TBL_NAME
table_dir = "%s/functional.db/%s/" % (WAREHOUSE, TBL_NAME)
hidden_file_locations = [".hidden", "_hidden"]
dir_locations = ["dir", ".hidden_dir"]
@@ -103,8 +104,10 @@ class TestInsertBehaviour(ImpalaTestSuite):
def test_insert_alter_partition_location(self, unique_database):
"""Test that inserts after changing the location of a partition work correctly,
including the creation of a non-existant partition dir"""
part_dir = "tmp/{0}".format(unique_database)
qualified_part_dir = get_fs_path('/' + part_dir)
# Moved to WAREHOUSE for Ozone because it does not allow rename between buckets.
work_dir = "%s/tmp" % WAREHOUSE
self.filesystem_client.make_dir(work_dir)
part_dir = os.path.join(work_dir, unique_database)
table_name = "`{0}`.`insert_alter_partition_location`".format(unique_database)
self.execute_query_expect_success(self.client, "DROP TABLE IF EXISTS %s" % table_name)
@@ -119,7 +122,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
self.execute_query_expect_success(
self.client,
"ALTER TABLE %s PARTITION(p=1) SET LOCATION '%s'" % (table_name,
qualified_part_dir))
part_dir))
self.execute_query_expect_success(
self.client,
"INSERT OVERWRITE %s PARTITION(p=1) VALUES(1)" % table_name)
@@ -492,7 +495,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
"should {1}exist but does {2}exist.".format(
path, '' if should_exist else 'not ', 'not ' if should_exist else '')
db_path = "test-warehouse/%s.db/" % self.TEST_DB_NAME
db_path = "%s/%s.db/" % (WAREHOUSE, self.TEST_DB_NAME)
table_path = db_path + "test_insert_empty_result"
partition_path = "{0}/year=2009/month=1".format(table_path)
check_path_exists(table_path, False)
@@ -600,14 +603,13 @@ class TestInsertBehaviour(ImpalaTestSuite):
if self.exploration_strategy() != 'exhaustive' and IS_S3:
pytest.skip("only runs in exhaustive")
table = "{0}.insert_clustered".format(unique_database)
table_path = "test-warehouse/{0}.db/insert_clustered".format(unique_database)
table_location = get_fs_path("/" + table_path)
table_path = "{1}/{0}.db/insert_clustered".format(unique_database, WAREHOUSE)
create_stmt = """create table {0} like functional.alltypes""".format(table)
self.execute_query_expect_success(self.client, create_stmt)
set_location_stmt = """alter table {0} set location '{1}'""".format(
table, table_location)
table, table_path)
self.execute_query_expect_success(self.client, set_location_stmt)
# Setting a lower batch size will result in multiple row batches being written.
@@ -635,8 +637,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
if self.exploration_strategy() != 'exhaustive':
pytest.skip("only runs in exhaustive")
table = "{0}.insert_clustered".format(unique_database)
table_path = "test-warehouse/{0}.db/insert_clustered".format(unique_database)
table_location = get_fs_path("/" + table_path)
table_path = "{1}/{0}.db/insert_clustered".format(unique_database, WAREHOUSE)
create_stmt = """create table {0} (
l_orderkey BIGINT,
@@ -659,7 +660,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
self.execute_query_expect_success(self.client, create_stmt)
set_location_stmt = """alter table {0} set location '{1}'""".format(
table, table_location)
table, table_path)
self.execute_query_expect_success(self.client, set_location_stmt)
# Setting a lower parquet file size will result in multiple files being written.

View File

@@ -32,7 +32,7 @@ from tests.common.skip import SkipIfEC, SkipIfFS, SkipIfLocal
from tests.common.test_dimensions import create_exec_option_dimension
from tests.common.test_result_verifier import verify_query_result_is_equal
from tests.common.test_vector import ImpalaTestDimension
from tests.util.filesystem_utils import get_fs_path
from tests.util.filesystem_utils import get_fs_path, WAREHOUSE
from tests.util.get_parquet_metadata import (decode_stats_value,
get_parquet_metadata_from_hdfs_folder)
@@ -328,8 +328,8 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite):
to have columns with different signed integer logical types. The test verifies
that parquet file written by the hdfs parquet table writer using the generated
file has the same column type metadata as the generated one."""
hdfs_path = (os.environ['DEFAULT_FS'] + "/test-warehouse/{0}.db/"
"signed_integer_logical_types.parquet").format(unique_database)
hdfs_path = "{1}/{0}.db/signed_integer_logical_types.parquet".\
format(unique_database, WAREHOUSE)
self.filesystem_client.copy_from_local(os.environ['IMPALA_HOME'] +
'/testdata/data/signed_integer_logical_types.parquet', hdfs_path)
# Create table with signed integer logical types

View File

@@ -20,7 +20,7 @@ from datetime import datetime
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfFS, SkipIfLocal, SkipIfNotHdfsMinicluster
from tests.util.filesystem_utils import IS_EC
from tests.util.filesystem_utils import IS_EC, WAREHOUSE
from time import sleep
from RuntimeProfile.ttypes import TRuntimeProfileFormat
import pytest
@@ -424,7 +424,7 @@ class TestObservability(ImpalaTestSuite):
def test_query_profile_contains_all_events(self, unique_database):
"""Test that the expected events show up in a query profile for various queries"""
# make a data file to load data from
path = "test-warehouse/{0}.db/data_file".format(unique_database)
path = "{1}/{0}.db/data_file".format(unique_database, WAREHOUSE)
self.filesystem_client.create_file(path, "1")
use_query = "use {0}".format(unique_database)
self.execute_query(use_query)
@@ -441,7 +441,7 @@ class TestObservability(ImpalaTestSuite):
'explain select * from impala_6568',
'describe impala_6568',
'alter table impala_6568 set tblproperties(\'numRows\'=\'10\')',
"load data inpath '/{0}' into table impala_6568".format(path)
"load data inpath '{0}' into table impala_6568".format(path)
]
# run each query...
for query in queries:

View File

@@ -29,6 +29,7 @@ from tests.common.skip import SkipIfEC, SkipIfLocal, SkipIfFS
from tests.common.test_dimensions import add_exec_option_dimension
from tests.common.test_vector import ImpalaTestDimension
from tests.verifiers.metric_verifier import MetricVerifier
from tests.util.filesystem_utils import WAREHOUSE
# slow_build_timeout is set to 200000 to avoid failures like IMPALA-8064 where the
# runtime filters don't arrive in time.
@@ -320,12 +321,10 @@ class TestOverlapMinMaxFilters(ImpalaTestSuite):
tbl_name = "part_col_in_data_file"
self.execute_query("CREATE TABLE {0}.{1} (i INT) PARTITIONED BY (d DATE) "
"STORED AS PARQUET".format(unique_database, tbl_name))
tbl_loc = self._get_table_location("{0}.{1}".format(unique_database, tbl_name),
vector)
self.filesystem_client.make_dir(tbl_loc[tbl_loc.find('test-warehouse'):] +
'/d=2022-02-22/')
tbl_loc = "%s/%s/%s/d=2022-02-22/" % (WAREHOUSE, unique_database, tbl_name)
self.filesystem_client.make_dir(tbl_loc)
self.filesystem_client.copy_from_local(os.environ['IMPALA_HOME'] +
'/testdata/data/partition_col_in_parquet.parquet', tbl_loc + '/d=2022-02-22/')
'/testdata/data/partition_col_in_parquet.parquet', tbl_loc)
self.execute_query("ALTER TABLE {0}.{1} RECOVER PARTITIONS".format(
unique_database, tbl_name))
self.execute_query("SET PARQUET_FALLBACK_SCHEMA_RESOLUTION=NAME")

View File

@@ -82,3 +82,4 @@ def get_secondary_fs_path(path):
WAREHOUSE = get_fs_path('/test-warehouse')
FILESYSTEM_NAME = get_fs_name(FILESYSTEM)
WAREHOUSE_PREFIX = os.getenv("WAREHOUSE_LOCATION_PREFIX")

View File

@@ -67,9 +67,11 @@ class DelegatingHdfsClient(BaseFilesystem):
super(DelegatingHdfsClient, self).__init__()
def create_file(self, path, file_data, overwrite=True):
path = self._normalize_path(path)
return self.webhdfs_client.create_file(path, file_data, overwrite=overwrite)
def make_dir(self, path, permission=None):
path = self._normalize_path(path)
if permission:
return self.webhdfs_client.make_dir(path, permission=permission)
else:
@@ -82,18 +84,23 @@ class DelegatingHdfsClient(BaseFilesystem):
self.hdfs_filesystem_client.copy_from_local(src, dst)
def ls(self, path):
path = self._normalize_path(path)
return self.webhdfs_client.ls(path)
def exists(self, path):
path = self._normalize_path(path)
return self.webhdfs_client.exists(path)
def delete_file_dir(self, path, recursive=False):
path = self._normalize_path(path)
return self.webhdfs_client.delete_file_dir(path, recursive=recursive)
def get_file_dir_status(self, path):
path = self._normalize_path(path)
return self.webhdfs_client.get_file_dir_status(path)
def get_all_file_sizes(self, path):
path = self._normalize_path(path)
return self.webhdfs_client.get_all_file_sizes(path)
def chmod(self, path, permission):
@@ -111,6 +118,11 @@ class DelegatingHdfsClient(BaseFilesystem):
def touch(self, paths):
return self.hdfs_filesystem_client.touch(paths)
def _normalize_path(self, path):
"""Paths passed in may include a leading slash. Remove it as the underlying
PyWebHdfsClient expects the HDFS path without a leading '/'."""
return path[1:] if path.startswith('/') else path
class PyWebHdfsClientWithChmod(PyWebHdfsClient):
def chmod(self, path, permission):
"""Set the permission of 'path' to 'permission' (specified as an octal string, e.g.
@@ -212,20 +224,18 @@ class HadoopFsCommandLineClient(BaseFilesystem):
def create_file(self, path, file_data, overwrite=True):
"""Creates a temporary file with the specified file_data on the local filesystem,
then puts it into the specified path."""
fixed_path = self._normalize_path(path)
if not overwrite and self.exists(fixed_path): return False
if not overwrite and self.exists(path): return False
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(file_data)
put_cmd_params = ['-put', '-d']
if overwrite: put_cmd_params.append('-f')
put_cmd_params.extend([tmp_file.name, fixed_path])
put_cmd_params.extend([tmp_file.name, path])
(status, stdout, stderr) = self._hadoop_fs_shell(put_cmd_params)
return status == 0
def make_dir(self, path, permission=None):
"""Create a directory at the specified path. Permissions are not supported."""
fixed_path = self._normalize_path(path)
(status, stdout, stderr) = self._hadoop_fs_shell(['-mkdir', '-p', fixed_path])
(status, stdout, stderr) = self._hadoop_fs_shell(['-mkdir', '-p', path])
return status == 0
def copy(self, src, dst, overwrite=False):
@@ -233,11 +243,9 @@ class HadoopFsCommandLineClient(BaseFilesystem):
'Skip[s] creation of temporary file with the suffix ._COPYING_.' to avoid extraneous
copies on S3. If overwrite is true, the destination file is overwritten, set to false
by default for backwards compatibility."""
fixed_src = self._normalize_path(src)
fixed_dst = self._normalize_path(dst)
cp_cmd_params = ['-cp', '-d']
if overwrite: cp_cmd_params.append('-f')
cp_cmd_params.extend([fixed_src, fixed_dst])
cp_cmd_params.extend([src, dst])
(status, stdout, stderr) = self._hadoop_fs_shell(cp_cmd_params)
assert status == 0, \
'{0} copy failed: '.format(self.filesystem_type) + stderr + "; " + stdout
@@ -259,8 +267,7 @@ class HadoopFsCommandLineClient(BaseFilesystem):
def _inner_ls(self, path):
"""List names, lengths, and mode for files/directories under the specified path."""
fixed_path = self._normalize_path(path)
(status, stdout, stderr) = self._hadoop_fs_shell(['-ls', fixed_path])
(status, stdout, stderr) = self._hadoop_fs_shell(['-ls', path])
# Trim the "Found X items" line and trailing new-line
entries = stdout.split("\n")[1:-1]
files = []
@@ -275,9 +282,8 @@ class HadoopFsCommandLineClient(BaseFilesystem):
def ls(self, path):
"""Returns a list of all file and directory names in 'path'"""
fixed_path = self._normalize_path(path)
files = []
for f in self._inner_ls(fixed_path):
for f in self._inner_ls(path):
fname = os.path.basename(f['name'])
if not fname == '':
files += [fname]
@@ -285,31 +291,23 @@ class HadoopFsCommandLineClient(BaseFilesystem):
def exists(self, path):
"""Checks if a particular path exists"""
fixed_path = self._normalize_path(path)
(status, stdout, stderr) = self._hadoop_fs_shell(['-test', '-e', fixed_path])
(status, stdout, stderr) = self._hadoop_fs_shell(['-test', '-e', path])
return status == 0
def delete_file_dir(self, path, recursive=False):
"""Delete the file or directory given by the specified path. Recursive must be true
for directories."""
fixed_path = self._normalize_path(path)
rm_command = ['-rm', fixed_path]
rm_command = ['-rm', path]
if recursive:
rm_command = ['-rm', '-r', fixed_path]
rm_command = ['-rm', '-r', path]
(status, stdout, stderr) = self._hadoop_fs_shell(rm_command)
return status == 0
def get_all_file_sizes(self, path):
"""Returns a list of integers which are all the file sizes of files found
under 'path'."""
fixed_path = self._normalize_path(path)
return [f['length'] for f in
self._inner_ls(fixed_path) if f['mode'][0] == "-"]
def _normalize_path(self, path):
"""Paths passed in may lack a leading slash. This adds a leading slash if it is
missing."""
return path if path.startswith('/') else '/' + path
self._inner_ls(path) if f['mode'][0] == "-"]
def touch(self, paths):
"""Updates the access and modification times of the files specified by 'paths' to