mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-11562: Revert support for o3fs as default filesystem
Reverts support for o3fs as a default filesystem added in IMPALA-9442. Updates test setup to use ofs instead. Munges absolute paths in Iceberg metadata to match the new location required for ofs. Ozone has strict requirements on volume and bucket names, so all tables must be created within a bucket (e.g. inside /impala/test-warehouse/). Change-Id: I45e90d30b2e68876dec0db3c43ac15ee510b17bd Reviewed-on: http://gerrit.cloudera.org:8080/19001 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
55194a9c83
commit
3577030df6
@@ -682,8 +682,9 @@ elif [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
|
||||
elif [ "${TARGET_FILESYSTEM}" = "ozone" ]; then
|
||||
export USE_OZONE_ENCRYPTION=${USE_OZONE_ENCRYPTION-true}
|
||||
export OZONE_VOLUME="impala"
|
||||
export OZONE_BUCKET="base"
|
||||
export DEFAULT_FS="o3fs://${OZONE_BUCKET}.${OZONE_VOLUME}.${INTERNAL_LISTEN_HOST}:9862"
|
||||
export DEFAULT_FS="ofs://${INTERNAL_LISTEN_HOST}:9862"
|
||||
export FILESYSTEM_PREFIX="${DEFAULT_FS}/${OZONE_VOLUME}"
|
||||
export WAREHOUSE_LOCATION_PREFIX="/${OZONE_VOLUME}"
|
||||
else
|
||||
echo "Unsupported filesystem '$TARGET_FILESYSTEM'"
|
||||
echo "Valid values are: hdfs, isilon, s3, abfs, adls, gs, local, ozone"
|
||||
|
||||
@@ -197,7 +197,8 @@ do
|
||||
TEST_RET_CODE=0
|
||||
|
||||
# Store a list of the files at the beginning of each iteration.
|
||||
hdfs dfs -ls -R /test-warehouse > ${IMPALA_LOGS_DIR}/file-list-begin-${i}.log 2>&1
|
||||
hdfs dfs -ls -R ${FILESYSTEM_PREFIX}/test-warehouse \
|
||||
> ${IMPALA_LOGS_DIR}/file-list-begin-${i}.log 2>&1
|
||||
|
||||
# Try not restarting the cluster to save time. BE, FE, JDBC and EE tests require
|
||||
# running on a cluster with default flags. We just need to restart the cluster when
|
||||
@@ -334,7 +335,8 @@ do
|
||||
# to the file-list-begin*.log from the beginning of the iteration to see if files
|
||||
# are not being cleaned up. This is most useful on the first iteration, when
|
||||
# the list of files is from dataload.
|
||||
hdfs dfs -ls -R /test-warehouse > ${IMPALA_LOGS_DIR}/file-list-end-${i}.log 2>&1
|
||||
hdfs dfs -ls -R ${FILESYSTEM_PREFIX}/test-warehouse \
|
||||
> ${IMPALA_LOGS_DIR}/file-list-end-${i}.log 2>&1
|
||||
|
||||
if [[ $TEST_RET_CODE == 1 ]]; then
|
||||
break
|
||||
|
||||
@@ -113,7 +113,6 @@ public class FileSystemUtil {
|
||||
.add(SCHEME_ADL)
|
||||
.add(SCHEME_HDFS)
|
||||
.add(SCHEME_S3A)
|
||||
.add(SCHEME_O3FS)
|
||||
.add(SCHEME_OFS)
|
||||
.add(SCHEME_GCS)
|
||||
.add(SCHEME_COS)
|
||||
|
||||
2
testdata/bin/create-tpcds-testcase-files.sh
vendored
2
testdata/bin/create-tpcds-testcase-files.sh
vendored
@@ -31,7 +31,7 @@ IMPALAD=${IMPALAD:-localhost}
|
||||
|
||||
TPCDS_QUERY_HOME=$IMPALA_HOME/testdata/workloads/tpcds/queries/raw
|
||||
# Target directory containing the testcase data.
|
||||
TESTCASE_DATA_DIR=/test-warehouse/tpcds-testcase-data
|
||||
TESTCASE_DATA_DIR=${FILESYSTEM_PREFIX}/test-warehouse/tpcds-testcase-data
|
||||
|
||||
COPY_TEST_CASE_PREFIX="COPY TESTCASE TO '$TESTCASE_DATA_DIR'"
|
||||
|
||||
|
||||
9
testdata/bin/load-metastore-snapshot.sh
vendored
9
testdata/bin/load-metastore-snapshot.sh
vendored
@@ -32,6 +32,8 @@ if [[ $# -ne 1 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
: ${TEST_WAREHOUSE_DIR=/test-warehouse}
|
||||
|
||||
SNAPSHOT_FILE=$1
|
||||
if [ ! -f ${SNAPSHOT_FILE} ]; then
|
||||
echo "Metastore Snapshot file '${SNAPSHOT_FILE}' not found"
|
||||
@@ -69,6 +71,13 @@ elif [[ "${DEFAULT_FS}" != "hdfs://localhost:20500" ]]; then
|
||||
sed -i "s|hdfs://localhost:20500|${DEFAULT_FS}|g" ${TMP_SNAPSHOT_FILE}
|
||||
fi
|
||||
|
||||
if [[ "${WAREHOUSE_LOCATION_PREFIX}" != "" ]]; then
|
||||
echo "Adding prefix ${WAREHOUSE_LOCATION_PREFIX} to iceberg.catalog_location"
|
||||
cloc='iceberg\.catalog_location\t'
|
||||
sed -i "s|\(${cloc}\)\(${TEST_WAREHOUSE_DIR}\)|\1${WAREHOUSE_LOCATION_PREFIX}\2|g" \
|
||||
${TMP_SNAPSHOT_FILE}
|
||||
fi
|
||||
|
||||
# Drop and re-create the hive metastore database
|
||||
dropdb -U hiveuser ${METASTORE_DB} 2> /dev/null || true
|
||||
createdb -U hiveuser ${METASTORE_DB}
|
||||
|
||||
6
testdata/bin/load-test-warehouse-snapshot.sh
vendored
6
testdata/bin/load-test-warehouse-snapshot.sh
vendored
@@ -110,6 +110,12 @@ if [ ! -f ${SNAPSHOT_STAGING_DIR}${TEST_WAREHOUSE_DIR}/githash.txt ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "${WAREHOUSE_LOCATION_PREFIX}" != "" ]; then
|
||||
echo "Updating Iceberg locations with warehouse prefix ${WAREHOUSE_LOCATION_PREFIX}"
|
||||
${IMPALA_HOME}/testdata/bin/rewrite-iceberg-metadata.py ${WAREHOUSE_LOCATION_PREFIX} \
|
||||
$(find ${SNAPSHOT_STAGING_DIR}${TEST_WAREHOUSE_DIR}/iceberg_test -name "metadata")
|
||||
fi
|
||||
|
||||
echo "Copying data to ${TARGET_FILESYSTEM}"
|
||||
if [ "${TARGET_FILESYSTEM}" = "s3" ]; then
|
||||
# hive does not yet work well with s3, so we won't need hive builtins.
|
||||
|
||||
97
testdata/bin/rewrite-iceberg-metadata.py
vendored
Executable file
97
testdata/bin/rewrite-iceberg-metadata.py
vendored
Executable file
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env impala-python
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
from avro.datafile import DataFileReader, DataFileWriter
|
||||
from avro.io import DatumReader, DatumWriter
|
||||
|
||||
args = sys.argv[1:]
|
||||
if len(args) < 2:
|
||||
print("Usage: rewrite-iceberg-metadata.py PREFIX METADATA-dirs...")
|
||||
exit(1)
|
||||
|
||||
prefix = args[0]
|
||||
|
||||
|
||||
def add_prefix_to_snapshot(snapshot):
|
||||
if 'manifest-list' in snapshot:
|
||||
snapshot['manifest-list'] = prefix + snapshot['manifest-list']
|
||||
if 'manifests' in snapshot:
|
||||
snapshot['manifests'] = map(lambda m: prefix + m, snapshot['manifests'])
|
||||
return snapshot
|
||||
|
||||
|
||||
def add_prefix_to_mlog(metadata_log):
|
||||
metadata_log['metadata-file'] = prefix + metadata_log['metadata-file']
|
||||
return metadata_log
|
||||
|
||||
|
||||
def add_prefix_to_snapshot_entry(entry):
|
||||
if 'manifest_path' in entry:
|
||||
entry['manifest_path'] = prefix + entry['manifest_path']
|
||||
if 'data_file' in entry:
|
||||
entry['data_file']['file_path'] = prefix + entry['data_file']['file_path']
|
||||
return entry
|
||||
|
||||
|
||||
for arg in args[1:]:
|
||||
# Update metadata.json
|
||||
for mfile in glob.glob(os.path.join(arg, 'v*.metadata.json')):
|
||||
with open(mfile, 'r') as f:
|
||||
metadata = json.load(f)
|
||||
|
||||
if 'format-version' not in metadata:
|
||||
print("WARN: skipping {}, missing format-version".format(f))
|
||||
continue
|
||||
|
||||
version = metadata['format-version']
|
||||
if version < 1 or version > 2:
|
||||
print("WARN: skipping {}, unknown version {}".format(f, version))
|
||||
continue
|
||||
|
||||
# metadata: required
|
||||
metadata['location'] = prefix + metadata['location']
|
||||
|
||||
# snapshots: optional
|
||||
if 'snapshots' in metadata:
|
||||
metadata['snapshots'] = map(add_prefix_to_snapshot, metadata['snapshots'])
|
||||
|
||||
# metadata-log: optional
|
||||
if 'metadata-log' in metadata:
|
||||
metadata['metadata-log'] = map(add_prefix_to_mlog, metadata['metadata-log'])
|
||||
|
||||
with open(mfile + '.tmp', 'w') as f:
|
||||
json.dump(metadata, f)
|
||||
os.rename(mfile + '.tmp', mfile)
|
||||
|
||||
for afile in glob.glob(os.path.join(arg, '*.avro')):
|
||||
with open(afile, 'rb') as f:
|
||||
with DataFileReader(f, DatumReader()) as reader:
|
||||
schema = reader.datum_reader.writers_schema
|
||||
lines = map(add_prefix_to_snapshot_entry, reader)
|
||||
|
||||
with open(afile + '.tmp', 'wb') as f:
|
||||
with DataFileWriter(f, DatumWriter(), schema) as writer:
|
||||
for line in lines:
|
||||
writer.append(line)
|
||||
os.rename(afile + '.tmp', afile)
|
||||
4
testdata/cluster/admin
vendored
4
testdata/cluster/admin
vendored
@@ -329,7 +329,7 @@ function start_cluster {
|
||||
if [[ "${TARGET_FILESYSTEM}" = "ozone" ]]; then
|
||||
local bucketkey=''
|
||||
if $USE_OZONE_ENCRYPTION; then
|
||||
echo "Ozone encryption enabled for ${OZONE_VOLUME}/${OZONE_BUCKET}"
|
||||
echo "Ozone encryption enabled for ${OZONE_VOLUME}/test-warehouse"
|
||||
|
||||
# Encryption is done at the bucket level, so ensure the keys are available first.
|
||||
${IMPALA_HOME}/testdata/bin/setup-dfs-keys.sh testkey
|
||||
@@ -337,7 +337,7 @@ function start_cluster {
|
||||
fi
|
||||
|
||||
ozone sh volume create ${OZONE_VOLUME} || true
|
||||
ozone sh bucket create ${bucketkey} ${OZONE_VOLUME}/${OZONE_BUCKET} || true
|
||||
ozone sh bucket create ${bucketkey} ${OZONE_VOLUME}/test-warehouse || true
|
||||
fi
|
||||
|
||||
return $?
|
||||
|
||||
@@ -74,7 +74,7 @@ CREATE TABLE iceberg_hadoop_catalog(
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test');
|
||||
ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(event_time TIMESTAMP, register_time DATE);
|
||||
ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(message STRING, price DECIMAL(8,1));
|
||||
ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(map_test MAP <STRING, array <STRING>>, struct_test STRUCT <f1: BIGINT, f2: BIGINT>);
|
||||
|
||||
@@ -139,7 +139,7 @@ create table ice_hadoop_catalog_no_part(
|
||||
array < STRING > >
|
||||
) stored as iceberg tblproperties(
|
||||
'iceberg.catalog' = 'hadoop.catalog',
|
||||
'iceberg.catalog_location' = '/test-warehouse/$DATABASE.db/cat_loc',
|
||||
'iceberg.catalog_location' = '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc',
|
||||
'iceberg.table_identifier' = 'id_a.id_b.ice_hadoop_catalog_no_part'
|
||||
);
|
||||
---- RESULTS
|
||||
@@ -172,7 +172,7 @@ describe formatted ice_hadoop_catalog_no_part_clone;
|
||||
'col_map','map<string,array<string>>','NULL'
|
||||
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/cat_loc/$DATABASE/ice_hadoop_catalog_no_part_clone','NULL'
|
||||
'','iceberg.catalog ','hadoop.catalog '
|
||||
'','iceberg.catalog_location','/test-warehouse/$DATABASE.db/cat_loc'
|
||||
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc'
|
||||
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
|
||||
'','write.format.default','parquet '
|
||||
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
|
||||
@@ -202,7 +202,7 @@ create table ice_hadoop_catalog(
|
||||
array < STRING > >
|
||||
) partitioned by spec (bucket(3, id)) stored as iceberg tblproperties(
|
||||
'iceberg.catalog' = 'hadoop.catalog',
|
||||
'iceberg.catalog_location' = '/test-warehouse/$DATABASE.db/cat_loc',
|
||||
'iceberg.catalog_location' = '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc',
|
||||
'iceberg.table_identifier' = 'id_a.id_b.ice_hadoop_catalog'
|
||||
);
|
||||
---- RESULTS
|
||||
@@ -238,7 +238,7 @@ describe formatted ice_hadoop_catalog_clone;
|
||||
'id','BUCKET[3]','NULL'
|
||||
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/cat_loc/$DATABASE/ice_hadoop_catalog_clone','NULL'
|
||||
'','iceberg.catalog ','hadoop.catalog '
|
||||
'','iceberg.catalog_location','/test-warehouse/$DATABASE.db/cat_loc'
|
||||
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc'
|
||||
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
|
||||
'','write.format.default','parquet '
|
||||
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
|
||||
|
||||
@@ -70,13 +70,13 @@ PARTITIONED BY SPEC
|
||||
TRUNCATE(15, level)
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
LOCATION '/$DATABASE.iceberg_test_with_location'
|
||||
LOCATION '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE.iceberg_test_with_location'
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
||||
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external(
|
||||
level STRING
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
LOCATION '/$DATABASE.iceberg_test_with_location'
|
||||
LOCATION '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE.iceberg_test_with_location'
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
||||
---- RESULTS
|
||||
'Table has been created.'
|
||||
@@ -97,7 +97,7 @@ STRING,BIGINT,BIGINT
|
||||
---- QUERY
|
||||
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external_empty_col
|
||||
STORED AS ICEBERG
|
||||
LOCATION '/$DATABASE.iceberg_test_with_location'
|
||||
LOCATION '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE.iceberg_test_with_location'
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
||||
---- RESULTS
|
||||
'Table has been created.'
|
||||
@@ -137,7 +137,7 @@ PARTITIONED BY SPEC
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test');
|
||||
---- RESULTS
|
||||
'Table has been created.'
|
||||
====
|
||||
@@ -171,13 +171,13 @@ PARTITIONED BY SPEC
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test');
|
||||
CREATE EXTERNAL TABLE iceberg_hadoop_cat_external(
|
||||
level STRING
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test', 'iceberg.table_identifier'='$DATABASE.iceberg_hadoop_catalog');
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test', 'iceberg.table_identifier'='$DATABASE.iceberg_hadoop_catalog');
|
||||
---- RESULTS
|
||||
'Table has been created.'
|
||||
====
|
||||
@@ -200,13 +200,13 @@ CREATE TABLE iceberg_hadoop_cat_drop(
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
|
||||
CREATE TABLE iceberg_hadoop_cat_query(
|
||||
level STRING
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
|
||||
DROP TABLE iceberg_hadoop_cat_drop;
|
||||
SELECT * FROM iceberg_hadoop_cat_query;
|
||||
---- TYPES
|
||||
@@ -217,7 +217,7 @@ string
|
||||
DESCRIBE FORMATTED iceberg_hadoop_cat_query;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/$DATABASE/iceberg_hadoop_cat_query','NULL'
|
||||
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
'','write.format.default','parquet '
|
||||
'','iceberg.catalog ','hadoop.catalog '
|
||||
---- TYPES
|
||||
@@ -229,12 +229,12 @@ CREATE TABLE iceberg_hadoop_cat_with_ident(
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
|
||||
'iceberg.table_identifier'='org.db.tbl');
|
||||
DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL'
|
||||
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
'','write.format.default','parquet '
|
||||
'','iceberg.catalog ','hadoop.catalog '
|
||||
---- TYPES
|
||||
@@ -247,7 +247,7 @@ INSERT INTO iceberg_hadoop_cat_with_ident values ("ice");
|
||||
CREATE EXTERNAL TABLE iceberg_hadoop_cat_with_ident_ext
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
|
||||
'iceberg.table_identifier'='org.db.tbl');
|
||||
---- RESULTS
|
||||
'Table has been created.'
|
||||
@@ -256,7 +256,7 @@ TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident_ext;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL'
|
||||
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
'','write.format.default','parquet '
|
||||
'','iceberg.catalog ','hadoop.catalog '
|
||||
---- TYPES
|
||||
@@ -288,7 +288,7 @@ PARTITIONED BY SPEC
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('write.format.default'='orc','iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
|
||||
---- RESULTS
|
||||
'Table has been created.'
|
||||
====
|
||||
@@ -475,7 +475,7 @@ create table ice_part_hadoop_catalog (
|
||||
col_identity
|
||||
) stored as iceberg TBLPROPERTIES(
|
||||
'iceberg.catalog' = 'hadoop.catalog',
|
||||
'iceberg.catalog_location' = '/$DATABASE/hadoop_catalog_test'
|
||||
'iceberg.catalog_location' = '$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test'
|
||||
);
|
||||
---- RESULTS
|
||||
'Table has been created.'
|
||||
|
||||
@@ -75,7 +75,7 @@ STRING,BIGINT,BIGINT
|
||||
# Use PARTITIONED BY SPEC
|
||||
CREATE TABLE ice_ctas_hadoop_tables_part PARTITIONED BY SPEC (month(d))
|
||||
STORED AS ICEBERG
|
||||
LOCATION '/test-warehouse/$DATABASE.db/loc_test'
|
||||
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/loc_test'
|
||||
TBLPROPERTIES ('iceberg.catalog'='hadoop.tables') AS SELECT s, ts, d FROM value_tbl;
|
||||
SELECT * FROM ice_ctas_hadoop_tables_part where d='2021-02-26';
|
||||
---- RESULTS
|
||||
@@ -134,7 +134,7 @@ STRING,BIGINT,BIGINT
|
||||
CREATE TABLE ice_ctas_hadoop_catalog_part PARTITIONED BY SPEC (truncate(3, s))
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES ('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/cat_loc',
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/cat_loc',
|
||||
'iceberg.table_identifier'='ns1.ns2.ctas')
|
||||
AS SELECT cast(t as INT), s, d FROM value_tbl;
|
||||
INSERT INTO ice_ctas_hadoop_catalog_part VALUES (1, 'lion', '2021-02-27');
|
||||
|
||||
@@ -102,7 +102,7 @@ select * from ts_iceberg;
|
||||
create table iceberg_hadoop_cat (i int)
|
||||
stored as iceberg
|
||||
tblproperties('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test');
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test');
|
||||
insert into iceberg_hadoop_cat values (1), (2), (3);
|
||||
---- RESULTS
|
||||
: 3
|
||||
@@ -128,7 +128,7 @@ STRING, STRING, STRING
|
||||
create table iceberg_hadoop_cat_ti (i int)
|
||||
stored as iceberg
|
||||
tblproperties('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test',
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test',
|
||||
'iceberg.table_identifier'='test.custom_db.int_table');
|
||||
insert into iceberg_hadoop_cat_ti values (1), (2), (3);
|
||||
---- RESULTS
|
||||
@@ -170,7 +170,7 @@ INT
|
||||
# Query external Iceberg table
|
||||
create external table iceberg_hive_cat_ext (i int)
|
||||
stored as iceberg
|
||||
location '/test-warehouse/$DATABASE.db/iceberg_hive_cat'
|
||||
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat'
|
||||
tblproperties('iceberg.catalog'='hive.catalog',
|
||||
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
|
||||
---- RESULTS
|
||||
@@ -223,7 +223,7 @@ INT
|
||||
# Create another external Iceberg table
|
||||
create external table iceberg_hive_cat_ext_2 (i int)
|
||||
stored as iceberg
|
||||
location '/test-warehouse/$DATABASE.db/iceberg_hive_cat'
|
||||
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat'
|
||||
tblproperties('iceberg.catalog'='hive.catalog',
|
||||
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
|
||||
select * from iceberg_hive_cat_ext_2
|
||||
@@ -248,7 +248,7 @@ Table does not exist
|
||||
# Insert into hive catalog with custom location.
|
||||
create table iceberg_hive_cat_custom_loc (i int)
|
||||
stored as iceberg
|
||||
location '/test-warehouse/$DATABASE.db/custom_hive_cat'
|
||||
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/custom_hive_cat'
|
||||
tblproperties('iceberg.catalog'='hive.catalog');
|
||||
insert into iceberg_hive_cat_custom_loc values (1), (2), (3);
|
||||
---- RESULTS
|
||||
|
||||
@@ -37,7 +37,7 @@ AnalysisException: Table is not partitioned: functional_parquet.iceberg_non_part
|
||||
CREATE TABLE iceberg_table_hadoop_tables_cat_loc(i INT)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.tables',
|
||||
'iceberg.catalog_location'='/test-warehouse/cat_loc')
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/cat_loc')
|
||||
---- CATCH
|
||||
iceberg.catalog_location cannot be set for Iceberg table stored in hadoop.tables
|
||||
====
|
||||
@@ -46,7 +46,7 @@ CREATE TABLE iceberg_table_hadoop_catalog(
|
||||
level STRING
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
LOCATION '/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog');
|
||||
---- CATCH
|
||||
AnalysisException: Location cannot be set for Iceberg table with 'hadoop.catalog'.
|
||||
@@ -63,9 +63,9 @@ AnalysisException: Table property 'iceberg.catalog_location' is necessary for Ic
|
||||
---- QUERY
|
||||
CREATE EXTERNAL TABLE iceberg_external_table_hadoop_catalog
|
||||
STORED AS ICEBERG
|
||||
LOCATION '/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/fake_table',
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/fake_table',
|
||||
'iceberg.table_identifier'='fake_db.fake_table');
|
||||
---- CATCH
|
||||
AnalysisException: Location cannot be set for Iceberg table with 'hadoop.catalog'.
|
||||
@@ -99,7 +99,7 @@ Table property 'iceberg.table_identifier' is necessary for external Iceberg tabl
|
||||
CREATE EXTERNAL TABLE fake_iceberg_table_hadoop_catalog
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/fake_table',
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/fake_table',
|
||||
'iceberg.table_identifier'='fake_db.fake_table');
|
||||
SHOW CREATE TABLE fake_iceberg_table_hadoop_catalog;
|
||||
---- CATCH
|
||||
@@ -144,7 +144,7 @@ AnalysisException: The Iceberg table has multiple partition specs.
|
||||
CREATE TABLE iceberg_hive_cat_with_cat_locaction (i int)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hive.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/catalog_loc')
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog_loc')
|
||||
---- CATCH
|
||||
iceberg.catalog_location cannot be set for Iceberg table stored in hive.catalog
|
||||
====
|
||||
@@ -152,7 +152,7 @@ iceberg.catalog_location cannot be set for Iceberg table stored in hive.catalog
|
||||
CREATE TABLE iceberg_hadoop_tables_with_metdata_locaction (i int)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.tables',
|
||||
'metadata_location'='/test-warehouse/catalog/metadata_loc')
|
||||
'metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog/metadata_loc')
|
||||
---- CATCH
|
||||
metadata_location cannot be set for Iceberg tables
|
||||
====
|
||||
@@ -160,8 +160,8 @@ metadata_location cannot be set for Iceberg tables
|
||||
CREATE TABLE iceberg_hadoop_cat_with_metadata_locaction (i int)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/catalog',
|
||||
'metadata_location'='/test-warehouse/catalog/metadata_loc')
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog',
|
||||
'metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog/metadata_loc')
|
||||
---- CATCH
|
||||
metadata_location cannot be set for Iceberg tables
|
||||
====
|
||||
@@ -169,7 +169,7 @@ metadata_location cannot be set for Iceberg tables
|
||||
CREATE TABLE iceberg_hive_cat_with_metadata_locaction (i int)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hive.catalog',
|
||||
'metadata_location'='/test-warehouse/catalog/metadata_loc')
|
||||
'metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/catalog/metadata_loc')
|
||||
---- CATCH
|
||||
metadata_location cannot be set for Iceberg tables
|
||||
====
|
||||
@@ -218,7 +218,7 @@ CREATE TABLE iceberg_table_hadoop_catalog(
|
||||
)
|
||||
STORED AS ICEBERG
|
||||
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/other/$DATABASE/hadoop_catalog_test');
|
||||
ALTER TABLE iceberg_table_hadoop_catalog RENAME TO iceberg_table_hadoop_catalog_new;
|
||||
---- CATCH
|
||||
UnsupportedOperationException: Cannot rename Iceberg tables that use 'hadoop.catalog' as catalog.
|
||||
@@ -254,7 +254,7 @@ ALTER TABLE iceberg_table_hadoop_catalog unset TBLPROPERTIES('iceberg.table_iden
|
||||
AnalysisException: Unsetting the 'iceberg.table_identifier' table property is not supported for Iceberg table.
|
||||
====
|
||||
---- QUERY
|
||||
ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('metadata_location'='/test-warehouse/metadata_loc');
|
||||
ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/metadata_loc');
|
||||
---- CATCH
|
||||
AnalysisException: Changing the 'metadata_location' table property is not supported for Iceberg table.
|
||||
====
|
||||
@@ -264,7 +264,7 @@ ALTER TABLE iceberg_table_hadoop_catalog unset TBLPROPERTIES('metadata_location'
|
||||
AnalysisException: Unsetting the 'metadata_location' table property is not supported for Iceberg table.
|
||||
====
|
||||
---- QUERY
|
||||
ALTER TABLE iceberg_table_hive_catalog set TBLPROPERTIES('metadata_location'='/test-warehouse/metadata_loc');
|
||||
ALTER TABLE iceberg_table_hive_catalog set TBLPROPERTIES('metadata_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/metadata_loc');
|
||||
---- CATCH
|
||||
AnalysisException: Changing the 'metadata_location' table property is not supported for Iceberg table.
|
||||
====
|
||||
@@ -314,7 +314,7 @@ Iceberg tables cannot have Hive ACID table properties.
|
||||
CREATE TABLE iceberg_insert_only(i int)
|
||||
STORED AS ICEBERG
|
||||
tblproperties('iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/$EXTERNAL_WAREHOUSE_DIR/specified_location',
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/$EXTERNAL_WAREHOUSE_DIR/specified_location',
|
||||
'transactional'='true', 'transactional_properties'='insert_only');
|
||||
---- CATCH
|
||||
Iceberg tables cannot have Hive ACID table properties.
|
||||
|
||||
@@ -5,7 +5,7 @@ STORED AS ICEBERG
|
||||
TBLPROPERTIES(
|
||||
'iceberg.file_format'='orc',
|
||||
'iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
|
||||
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc',
|
||||
'iceberg.table_identifier'='functional_parquet.iceberg_partitioned_orc'
|
||||
);
|
||||
ALTER TABLE iceberg_partitioned_orc_external_old_fileformat
|
||||
@@ -13,7 +13,7 @@ UNSET TBLPROPERTIES IF EXISTS ('write.format.default');
|
||||
DESCRIBE FORMATTED iceberg_partitioned_orc_external_old_fileformat;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
|
||||
'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
|
||||
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
|
||||
'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
|
||||
'','iceberg.file_format','orc '
|
||||
'','iceberg.catalog ','hadoop.catalog '
|
||||
|
||||
@@ -370,7 +370,7 @@ string, string, string
|
||||
describe formatted hadoop_catalog_test_external;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/hadoop_catalog_test/functional_parquet/hadoop_catalog_test','NULL'
|
||||
'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/hadoop_catalog_test'
|
||||
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/hadoop_catalog_test'
|
||||
'','iceberg.table_identifier','functional_parquet.hadoop_catalog_test'
|
||||
'','write.format.default','parquet '
|
||||
'','iceberg.catalog ','hadoop.catalog '
|
||||
@@ -381,7 +381,7 @@ string, string, string
|
||||
describe formatted iceberg_partitioned_orc_external;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc/functional_parquet/iceberg_partitioned_orc','NULL'
|
||||
'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
|
||||
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_partitioned_orc'
|
||||
'','iceberg.table_identifier','functional_parquet.iceberg_partitioned_orc'
|
||||
'','write.format.default','orc '
|
||||
'','iceberg.catalog ','hadoop.catalog '
|
||||
@@ -451,7 +451,7 @@ bigint,bigint
|
||||
describe formatted iceberg_resolution_test_external;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'Location: ','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/iceberg_resolution_test/functional_parquet/iceberg_resolution_test','NULL'
|
||||
'','iceberg.catalog_location','/test-warehouse/iceberg_test/hadoop_catalog/iceberg_resolution_test'
|
||||
'','iceberg.catalog_location','$WAREHOUSE_LOCATION_PREFIX/test-warehouse/iceberg_test/hadoop_catalog/iceberg_resolution_test'
|
||||
'','iceberg.table_identifier','functional_parquet.iceberg_resolution_test'
|
||||
'','write.format.default','parquet '
|
||||
'','iceberg.catalog ','hadoop.catalog '
|
||||
|
||||
@@ -120,14 +120,14 @@ INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIM
|
||||
# Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points
|
||||
# to the default filesystem.
|
||||
alter table alltypes add partition(year=2010, month=3)
|
||||
location '/test-warehouse/alltypes_parquet/year=2010/month=3'
|
||||
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2010/month=3'
|
||||
---- RESULTS
|
||||
====
|
||||
---- QUERY
|
||||
# Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points
|
||||
# to the default filesystem.
|
||||
alter table alltypes add partition(year=2010, month=4)
|
||||
location '/test-warehouse/alltypes_parquet/year=2010/month=4'
|
||||
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2010/month=4'
|
||||
---- RESULTS
|
||||
====
|
||||
---- QUERY
|
||||
@@ -187,7 +187,7 @@ INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIM
|
||||
# Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points
|
||||
# to the default filesystem.
|
||||
alter table alltypes add partition(year=2009, month=5)
|
||||
location '/test-warehouse/alltypes_parquet/year=2009/month=5'
|
||||
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2009/month=5'
|
||||
---- RESULTS
|
||||
====
|
||||
---- QUERY
|
||||
@@ -199,7 +199,7 @@ location '$SECONDARY_FILESYSTEM/multi_fs_tests/$DATABASE.db/alltypes_parquet/yea
|
||||
# Note: intentionally not using $SECONDARY_FILESYSTEM so that the partition points
|
||||
# to the default filesystem.
|
||||
alter table alltypes add partition(year=2010, month=5)
|
||||
location '/test-warehouse/alltypes_parquet/year=2010/month=5'
|
||||
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/alltypes_parquet/year=2010/month=5'
|
||||
---- RESULTS
|
||||
====
|
||||
---- QUERY
|
||||
@@ -306,7 +306,7 @@ year=2009/month=1: 310
|
||||
# ADD PARTITION on the default filesystem.
|
||||
# Point to unique database so we don't overwrite someone else's data.
|
||||
alter table alltypes_multipart_insert add partition(year=2009, month=2)
|
||||
location '/test-warehouse/$DATABASE.db/alltypes_multipart_insert/year=2009/month=2'
|
||||
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/alltypes_multipart_insert/year=2009/month=2'
|
||||
---- RESULTS
|
||||
====
|
||||
---- QUERY
|
||||
|
||||
@@ -12,7 +12,7 @@ PARTITIONED BY (
|
||||
float_col FLOAT,
|
||||
double_col DOUBLE,
|
||||
string_col STRING
|
||||
) LOCATION '/test-warehouse/$DATABASE.db/all_insert_partition_col_types'
|
||||
) LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/all_insert_partition_col_types'
|
||||
====
|
||||
---- QUERY
|
||||
DESCRIBE all_insert_partition_col_types
|
||||
@@ -193,13 +193,13 @@ STRING, STRING, STRING
|
||||
ALTER TABLE all_partition_col_types
|
||||
ADD PARTITION (bool_col=FALSE, tinyint_col=1, smallint_col=1, int_col=1,
|
||||
bigint_col=10, float_col=0, double_col=1.1, string_col='1')
|
||||
LOCATION '/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=1/smallint_col=1/int_col=1/bigint_col=10/float_col=0/double_col=1.1/string_col=1/'
|
||||
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=1/smallint_col=1/int_col=1/bigint_col=10/float_col=0/double_col=1.1/string_col=1/'
|
||||
====
|
||||
---- QUERY
|
||||
ALTER TABLE all_partition_col_types
|
||||
ADD PARTITION (bool_col=TRUE, tinyint_col=2, smallint_col=2, int_col=2,
|
||||
bigint_col=20, float_col=0, double_col=1.1, string_col='2')
|
||||
LOCATION '/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=2/smallint_col=2/int_col=2/bigint_col=20/float_col=0/double_col=1.1/string_col=2/'
|
||||
LOCATION '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/all_insert_partition_col_types/tinyint_col=2/smallint_col=2/int_col=2/bigint_col=20/float_col=0/double_col=1.1/string_col=2/'
|
||||
====
|
||||
---- QUERY
|
||||
EXPLAIN
|
||||
|
||||
@@ -652,7 +652,7 @@ TBLPROPERTIES('write.format.default'='parquet',
|
||||
'write.parquet.page-size-bytes'='65536',
|
||||
'write.parquet.dict-size-bytes'='131072',
|
||||
'iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test')
|
||||
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test')
|
||||
---- RESULTS-HIVE-3
|
||||
CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test2 (
|
||||
level STRING NULL
|
||||
@@ -667,7 +667,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE',
|
||||
'write.parquet.dict-size-bytes'='131072',
|
||||
'engine.hive.enabled'='true',
|
||||
'iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test')
|
||||
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test')
|
||||
====
|
||||
---- CREATE_TABLE
|
||||
CREATE TABLE iceberg_test3 (
|
||||
@@ -681,7 +681,7 @@ TBLPROPERTIES('write.format.default'='parquet',
|
||||
'write.parquet.page-size-bytes'='65536',
|
||||
'write.parquet.dict-size-bytes'='131072',
|
||||
'iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test',
|
||||
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test',
|
||||
'iceberg.table_identifier'='org.my_db.my_table')
|
||||
---- RESULTS-HIVE-3
|
||||
CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test3 (
|
||||
@@ -697,7 +697,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE',
|
||||
'write.parquet.dict-size-bytes'='131072',
|
||||
'engine.hive.enabled'='true',
|
||||
'iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test',
|
||||
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test',
|
||||
'iceberg.table_identifier'='org.my_db.my_table')
|
||||
====
|
||||
---- CREATE_TABLE
|
||||
@@ -725,7 +725,7 @@ TBLPROPERTIES('write.format.default'='parquet',
|
||||
'write.parquet.page-size-bytes'='65536',
|
||||
'write.parquet.dict-size-bytes'='131072',
|
||||
'iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test')
|
||||
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test')
|
||||
---- RESULTS-HIVE-3
|
||||
CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test1_partitioned (
|
||||
level STRING NULL,
|
||||
@@ -753,7 +753,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE',
|
||||
'write.parquet.dict-size-bytes'='131072',
|
||||
'engine.hive.enabled'='true',
|
||||
'iceberg.catalog'='hadoop.catalog',
|
||||
'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test')
|
||||
'iceberg.catalog_location'='$$warehouse$$/hadoop_catalog_test')
|
||||
====
|
||||
---- CREATE_TABLE
|
||||
CREATE TABLE iceberg_test_orc (
|
||||
|
||||
@@ -32,6 +32,7 @@ from tests.common.test_dimensions import (create_client_protocol_dimension,
|
||||
create_exec_option_dimension, create_orc_dimension)
|
||||
from tests.util.hdfs_util import NAMENODE
|
||||
from tests.util.calculation_util import get_random_id
|
||||
from tests.util.filesystem_utils import WAREHOUSE_PREFIX
|
||||
|
||||
ADMIN = "admin"
|
||||
RANGER_AUTH = ("admin", "admin")
|
||||
@@ -285,7 +286,7 @@ class TestRanger(CustomClusterTestSuite):
|
||||
admin_client.execute("revoke {0} on server from user {1}".format(privilege, user))
|
||||
|
||||
def _test_show_grant_basic(self, admin_client, kw, id, unique_database, unique_table):
|
||||
uri = '/tmp'
|
||||
uri = WAREHOUSE_PREFIX + '/tmp'
|
||||
try:
|
||||
# Grant server privileges and verify
|
||||
admin_client.execute("grant all on server to {0} {1}".format(kw, id), user=ADMIN)
|
||||
@@ -306,7 +307,7 @@ class TestRanger(CustomClusterTestSuite):
|
||||
result = self.client.execute("show grant {0} {1} on uri '{2}'"
|
||||
.format(kw, id, uri))
|
||||
TestRanger._check_privileges(result, [
|
||||
[kw, id, "", "", "", "{0}{1}".format(NAMENODE, uri), "", "all", "false"]])
|
||||
[kw, id, "", "", "", "{0}{1}".format(NAMENODE, '/tmp'), "", "all", "false"]])
|
||||
|
||||
# Revoke uri privileges and verify
|
||||
admin_client.execute("revoke all on uri '{0}' from {1} {2}"
|
||||
|
||||
@@ -21,9 +21,10 @@
|
||||
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from subprocess import check_call
|
||||
|
||||
from tests.util.filesystem_utils import get_fs_path
|
||||
from tests.util.filesystem_utils import get_fs_path, WAREHOUSE_PREFIX
|
||||
|
||||
|
||||
def create_iceberg_table_from_directory(impala_client, unique_database, table_name,
|
||||
@@ -38,6 +39,15 @@ def create_iceberg_table_from_directory(impala_client, unique_database, table_na
|
||||
os.environ['IMPALA_HOME'], 'testdata/data/iceberg_test/{0}'.format(table_name))
|
||||
assert os.path.isdir(local_dir)
|
||||
|
||||
# If using a prefix, rewrite iceberg metadata to use the prefix
|
||||
if WAREHOUSE_PREFIX:
|
||||
tmp_dir = tempfile.mktemp(table_name)
|
||||
check_call(['cp', '-r', local_dir, tmp_dir])
|
||||
rewrite = os.path.join(
|
||||
os.environ['IMPALA_HOME'], 'testdata/bin/rewrite-iceberg-metadata.py')
|
||||
check_call([rewrite, WAREHOUSE_PREFIX, os.path.join(tmp_dir, 'metadata')])
|
||||
local_dir = tmp_dir
|
||||
|
||||
# Put the directory in the database's directory (not the table directory)
|
||||
hdfs_parent_dir = get_fs_path("/test-warehouse")
|
||||
|
||||
@@ -47,7 +57,7 @@ def create_iceberg_table_from_directory(impala_client, unique_database, table_na
|
||||
check_call(['hdfs', 'dfs', '-rm', '-f', '-r', hdfs_dir])
|
||||
|
||||
# Note: -d skips a staging copy
|
||||
check_call(['hdfs', 'dfs', '-put', '-d', local_dir, hdfs_parent_dir])
|
||||
check_call(['hdfs', 'dfs', '-put', '-d', local_dir, hdfs_dir])
|
||||
|
||||
# Create external table
|
||||
qualified_table_name = '{0}.{1}'.format(unique_database, table_name)
|
||||
|
||||
@@ -491,7 +491,8 @@ class ImpalaTestSuite(BaseTestSuite):
|
||||
"MANAGED_WAREHOUSE_DIR",
|
||||
"EXTERNAL_WAREHOUSE_DIR"])
|
||||
repl.update({
|
||||
'$SECONDARY_FILESYSTEM': os.environ.get("SECONDARY_FILESYSTEM", ""),
|
||||
'$SECONDARY_FILESYSTEM': os.getenv("SECONDARY_FILESYSTEM", ""),
|
||||
'$WAREHOUSE_LOCATION_PREFIX': os.getenv("WAREHOUSE_LOCATION_PREFIX", ""),
|
||||
'$USER': getuser()})
|
||||
|
||||
if use_db:
|
||||
|
||||
@@ -25,6 +25,7 @@ import tempfile
|
||||
from impala_py_lib.helpers import find_all_files, is_core_dump
|
||||
from tests.common.file_utils import assert_file_in_dir_contains
|
||||
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
|
||||
from tests.util.filesystem_utils import get_fs_path
|
||||
|
||||
LOG = logging.getLogger('test_startup_filesystem_checks')
|
||||
|
||||
@@ -38,12 +39,19 @@ class TestStartupFilesystemChecks(CustomClusterTestSuite):
|
||||
setup_method().
|
||||
"""
|
||||
|
||||
NONEXISTENT_PATH = "/nonexistent_path"
|
||||
NONDIRECTORY_PATH = "/test-warehouse/alltypes/year=2009/month=1/090101.txt"
|
||||
VALID_SUBDIRECTORY = "/test-warehouse"
|
||||
# Use get_fs_path because testdata in Ozone requires a volume prefix and does not
|
||||
# accept underscore as a bucket name (the first element after volume prefix).
|
||||
NONEXISTENT_PATH = get_fs_path("/nonexistent-path")
|
||||
NONDIRECTORY_PATH = \
|
||||
get_fs_path("/test-warehouse/alltypes/year=2009/month=1/090101.txt")
|
||||
VALID_SUBDIRECTORY = get_fs_path("/test-warehouse")
|
||||
# Test multiple valid directories along with an empty entry
|
||||
MULTIPLE_VALID_DIRECTORIES = \
|
||||
"/,/test-warehouse/zipcode_incomes,,/test-warehouse/alltypes"
|
||||
MULTIPLE_VALID_DIRECTORIES = ",".join([
|
||||
"/",
|
||||
get_fs_path("/test-warehouse/zipcode_incomes"),
|
||||
"",
|
||||
get_fs_path("/test-warehouse/alltypes")]
|
||||
)
|
||||
LOG_DIR = tempfile.mkdtemp(prefix="test_startup_filesystem_checks_",
|
||||
dir=os.getenv("LOG_DIR"))
|
||||
MINIDUMP_PATH = tempfile.mkdtemp()
|
||||
|
||||
@@ -38,6 +38,7 @@ from tests.common.test_vector import ImpalaTestDimension
|
||||
from tests.util.filesystem_utils import (
|
||||
get_fs_path,
|
||||
WAREHOUSE,
|
||||
WAREHOUSE_PREFIX,
|
||||
IS_HDFS,
|
||||
IS_S3,
|
||||
IS_ADLS,
|
||||
@@ -47,8 +48,11 @@ from tests.common.impala_cluster import ImpalaCluster
|
||||
from tests.util.filesystem_utils import FILESYSTEM_PREFIX
|
||||
|
||||
|
||||
TRASH_PATH = ('.Trash/{0}/Current' if IS_OZONE else 'user/{0}/.Trash/Current').\
|
||||
format(getpass.getuser())
|
||||
def get_trash_path(bucket, path):
|
||||
if IS_OZONE:
|
||||
return get_fs_path('/{0}/.Trash/{1}/Current{2}/{0}/{3}'.format(bucket,
|
||||
getpass.getuser(), WAREHOUSE_PREFIX, path))
|
||||
return '/user/{0}/.Trash/Current/{1}/{2}'.format(getpass.getuser(), bucket, path)
|
||||
|
||||
# Validates DDL statements (create, drop)
|
||||
class TestDdlStatements(TestDdlBase):
|
||||
@@ -59,20 +63,16 @@ class TestDdlStatements(TestDdlBase):
|
||||
self.client.execute("create table {0}.t1(i int)".format(unique_database))
|
||||
self.client.execute("create table {0}.t2(i int)".format(unique_database))
|
||||
# Create sample test data files under the table directories
|
||||
self.filesystem_client.create_file("test-warehouse/{0}.db/t1/t1.txt".\
|
||||
format(unique_database), file_data='t1')
|
||||
self.filesystem_client.create_file("test-warehouse/{0}.db/t2/t2.txt".\
|
||||
format(unique_database), file_data='t2')
|
||||
dbpath = "{0}/{1}.db".format(WAREHOUSE, unique_database)
|
||||
self.filesystem_client.create_file("{}/t1/t1.txt".format(dbpath), file_data='t1')
|
||||
self.filesystem_client.create_file("{}/t2/t2.txt".format(dbpath), file_data='t2')
|
||||
# Drop the table (without purge) and make sure it exists in trash
|
||||
self.client.execute("drop table {0}.t1".format(unique_database))
|
||||
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/t1.txt".\
|
||||
format(unique_database))
|
||||
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/".\
|
||||
format(unique_database))
|
||||
assert self.filesystem_client.exists(
|
||||
'{0}/test-warehouse/{1}.db/t1/t1.txt'.format(TRASH_PATH, unique_database))
|
||||
assert self.filesystem_client.exists(
|
||||
'{0}/test-warehouse/{1}.db/t1'.format(TRASH_PATH, unique_database))
|
||||
assert not self.filesystem_client.exists("{}/t1/t1.txt".format(dbpath))
|
||||
assert not self.filesystem_client.exists("{}/t1/".format(dbpath))
|
||||
trash = get_trash_path("test-warehouse", unique_database + ".db")
|
||||
assert self.filesystem_client.exists("{}/t1/t1.txt".format(trash))
|
||||
assert self.filesystem_client.exists("{}/t1".format(trash))
|
||||
# Drop the table (with purge) and make sure it doesn't exist in trash
|
||||
self.client.execute("drop table {0}.t2 purge".format(unique_database))
|
||||
if not IS_S3 and not IS_ADLS:
|
||||
@@ -82,27 +82,20 @@ class TestDdlStatements(TestDdlBase):
|
||||
# consistent.
|
||||
# The ADLS Python client is not strongly consistent, so these files may still be
|
||||
# visible after a DROP. (Remove after IMPALA-5335 is resolved)
|
||||
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t2/".\
|
||||
format(unique_database))
|
||||
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t2/t2.txt".\
|
||||
format(unique_database))
|
||||
assert not self.filesystem_client.exists(
|
||||
'{0}/test-warehouse/{1}.db/t2/t2.txt'.format(TRASH_PATH, unique_database))
|
||||
assert not self.filesystem_client.exists(
|
||||
'{0}/test-warehouse/{1}.db/t2'.format(TRASH_PATH, unique_database))
|
||||
assert not self.filesystem_client.exists("{}/t2/".format(dbpath))
|
||||
assert not self.filesystem_client.exists("{}/t2/t2.txt".format(dbpath))
|
||||
assert not self.filesystem_client.exists("{}/t2/t2.txt".format(trash))
|
||||
assert not self.filesystem_client.exists("{}/t2".format(trash))
|
||||
# Create an external table t3 and run the same test as above. Make
|
||||
# sure the data is not deleted
|
||||
self.filesystem_client.make_dir(
|
||||
"test-warehouse/{0}.db/data_t3/".format(unique_database), permission=777)
|
||||
self.filesystem_client.make_dir("{}/data_t3/".format(dbpath), permission=777)
|
||||
self.filesystem_client.create_file(
|
||||
"test-warehouse/{0}.db/data_t3/data.txt".format(unique_database), file_data='100')
|
||||
"{}/data_t3/data.txt".format(dbpath), file_data='100')
|
||||
self.client.execute("create external table {0}.t3(i int) stored as "
|
||||
"textfile location \'/test-warehouse/{0}.db/data_t3\'" .format(unique_database))
|
||||
"textfile location \'{1}/data_t3\'".format(unique_database, dbpath))
|
||||
self.client.execute("drop table {0}.t3 purge".format(unique_database))
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/data_t3/data.txt".format(unique_database))
|
||||
self.filesystem_client.delete_file_dir(
|
||||
"test-warehouse/{0}.db/data_t3".format(unique_database), recursive=True)
|
||||
assert self.filesystem_client.exists("{}/data_t3/data.txt".format(dbpath))
|
||||
self.filesystem_client.delete_file_dir("{}/data_t3".format(dbpath), recursive=True)
|
||||
|
||||
@SkipIfFS.eventually_consistent
|
||||
@SkipIfLocal.hdfs_client
|
||||
@@ -110,24 +103,24 @@ class TestDdlStatements(TestDdlBase):
|
||||
self.client.execute('use default')
|
||||
# Verify the db directory exists
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/".format(unique_database))
|
||||
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
|
||||
|
||||
self.client.execute("create table {0}.t1(i int)".format(unique_database))
|
||||
# Verify the table directory exists
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/t1/".format(unique_database))
|
||||
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))
|
||||
|
||||
# Dropping the table removes the table's directory and preserves the db's directory
|
||||
self.client.execute("drop table {0}.t1".format(unique_database))
|
||||
assert not self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/t1/".format(unique_database))
|
||||
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/".format(unique_database))
|
||||
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
|
||||
|
||||
# Dropping the db removes the db's directory
|
||||
self.client.execute("drop database {0}".format(unique_database))
|
||||
assert not self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/".format(unique_database))
|
||||
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
|
||||
|
||||
# Dropping the db using "cascade" removes all tables' and db's directories
|
||||
# but keeps the external tables' directory
|
||||
@@ -138,17 +131,17 @@ class TestDdlStatements(TestDdlBase):
|
||||
"location '{1}/{0}/t3/'".format(unique_database, WAREHOUSE))
|
||||
self.client.execute("drop database {0} cascade".format(unique_database))
|
||||
assert not self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/".format(unique_database))
|
||||
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
|
||||
assert not self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/t1/".format(unique_database))
|
||||
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))
|
||||
assert not self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/t2/".format(unique_database))
|
||||
"{1}/{0}.db/t2/".format(unique_database, WAREHOUSE))
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}/t3/".format(unique_database))
|
||||
"{1}/{0}/t3/".format(unique_database, WAREHOUSE))
|
||||
self.filesystem_client.delete_file_dir(
|
||||
"test-warehouse/{0}/t3/".format(unique_database), recursive=True)
|
||||
"{1}/{0}/t3/".format(unique_database, WAREHOUSE), recursive=True)
|
||||
assert not self.filesystem_client.exists(
|
||||
"test-warehouse/{0}/t3/".format(unique_database))
|
||||
"{1}/{0}/t3/".format(unique_database, WAREHOUSE))
|
||||
# Re-create database to make unique_database teardown succeed.
|
||||
self._create_db(unique_database)
|
||||
|
||||
@@ -157,12 +150,12 @@ class TestDdlStatements(TestDdlBase):
|
||||
def test_truncate_cleans_hdfs_files(self, unique_database):
|
||||
# Verify the db directory exists
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/".format(unique_database))
|
||||
"{1}/{0}.db/".format(unique_database, WAREHOUSE))
|
||||
|
||||
self.client.execute("create table {0}.t1(i int)".format(unique_database))
|
||||
# Verify the table directory exists
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/t1/".format(unique_database))
|
||||
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))
|
||||
|
||||
try:
|
||||
# If we're testing S3, we want the staging directory to be created.
|
||||
@@ -170,31 +163,31 @@ class TestDdlStatements(TestDdlBase):
|
||||
# Should have created one file in the table's dir
|
||||
self.client.execute("insert into {0}.t1 values (1)".format(unique_database))
|
||||
assert len(self.filesystem_client.ls(
|
||||
"test-warehouse/{0}.db/t1/".format(unique_database))) == 2
|
||||
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 2
|
||||
|
||||
# Truncating the table removes the data files and preserves the table's directory
|
||||
self.client.execute("truncate table {0}.t1".format(unique_database))
|
||||
assert len(self.filesystem_client.ls(
|
||||
"test-warehouse/{0}.db/t1/".format(unique_database))) == 1
|
||||
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 1
|
||||
|
||||
self.client.execute(
|
||||
"create table {0}.t2(i int) partitioned by (p int)".format(unique_database))
|
||||
# Verify the table directory exists
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/t2/".format(unique_database))
|
||||
"{1}/{0}.db/t2/".format(unique_database, WAREHOUSE))
|
||||
|
||||
# Should have created the partition dir, which should contain exactly one file
|
||||
self.client.execute(
|
||||
"insert into {0}.t2 partition(p=1) values (1)".format(unique_database))
|
||||
assert len(self.filesystem_client.ls(
|
||||
"test-warehouse/{0}.db/t2/p=1".format(unique_database))) == 1
|
||||
"{1}/{0}.db/t2/p=1".format(unique_database, WAREHOUSE))) == 1
|
||||
|
||||
# Truncating the table removes the data files and preserves the partition's directory
|
||||
self.client.execute("truncate table {0}.t2".format(unique_database))
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/t2/p=1".format(unique_database))
|
||||
"{1}/{0}.db/t2/p=1".format(unique_database, WAREHOUSE))
|
||||
assert len(self.filesystem_client.ls(
|
||||
"test-warehouse/{0}.db/t2/p=1".format(unique_database))) == 0
|
||||
"{1}/{0}.db/t2/p=1".format(unique_database, WAREHOUSE))) == 0
|
||||
finally:
|
||||
# Reset to its default value.
|
||||
self.client.execute("set s3_skip_insert_staging=true")
|
||||
@@ -455,11 +448,10 @@ class TestDdlStatements(TestDdlBase):
|
||||
# use the (key=value) format. The directory is automatically cleanup up
|
||||
# by the unique_database fixture.
|
||||
self.client.execute("create table {0}.part_data (i int)".format(unique_database))
|
||||
assert self.filesystem_client.exists(
|
||||
"test-warehouse/{0}.db/part_data".format(unique_database))
|
||||
dbpath = "{1}/{0}.db".format(unique_database, WAREHOUSE)
|
||||
assert self.filesystem_client.exists("{}/part_data".format(dbpath))
|
||||
self.filesystem_client.create_file(
|
||||
"test-warehouse/{0}.db/part_data/data.txt".format(unique_database),
|
||||
file_data='1984')
|
||||
"{}/part_data/data.txt".format(dbpath), file_data='1984')
|
||||
self.run_test_case('QueryTest/alter-table', vector, use_db=unique_database,
|
||||
multiple_impalad=self._use_multiple_impalad(vector))
|
||||
|
||||
@@ -483,20 +475,16 @@ class TestDdlStatements(TestDdlBase):
|
||||
# Add two partitions (j=1) and (j=2) to table t1
|
||||
self.client.execute("alter table {0}.t1 add partition(j=1)".format(unique_database))
|
||||
self.client.execute("alter table {0}.t1 add partition(j=2)".format(unique_database))
|
||||
self.filesystem_client.create_file(\
|
||||
"test-warehouse/{0}.db/t1/j=1/j1.txt".format(unique_database), file_data='j1')
|
||||
self.filesystem_client.create_file(\
|
||||
"test-warehouse/{0}.db/t1/j=2/j2.txt".format(unique_database), file_data='j2')
|
||||
dbpath = "{1}/{0}.db".format(unique_database, WAREHOUSE)
|
||||
self.filesystem_client.create_file("{}/t1/j=1/j1.txt".format(dbpath), file_data='j1')
|
||||
self.filesystem_client.create_file("{}/t1/j=2/j2.txt".format(dbpath), file_data='j2')
|
||||
# Drop the partition (j=1) without purge and make sure it exists in trash
|
||||
self.client.execute("alter table {0}.t1 drop partition(j=1)".format(unique_database))
|
||||
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".\
|
||||
format(unique_database))
|
||||
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=1".\
|
||||
format(unique_database))
|
||||
assert self.filesystem_client.exists(
|
||||
'{0}/test-warehouse/{1}.db/t1/j=1/j1.txt'.format(TRASH_PATH, unique_database))
|
||||
assert self.filesystem_client.exists(
|
||||
'{0}/test-warehouse/{1}.db/t1/j=1'.format(TRASH_PATH, unique_database))
|
||||
assert not self.filesystem_client.exists("{}/t1/j=1/j1.txt".format(dbpath))
|
||||
assert not self.filesystem_client.exists("{}/t1/j=1".format(dbpath))
|
||||
trash = get_trash_path("test-warehouse", unique_database + ".db")
|
||||
assert self.filesystem_client.exists('{}/t1/j=1/j1.txt'.format(trash))
|
||||
assert self.filesystem_client.exists('{}/t1/j=1'.format(trash))
|
||||
# Drop the partition (with purge) and make sure it doesn't exist in trash
|
||||
self.client.execute("alter table {0}.t1 drop partition(j=2) purge".\
|
||||
format(unique_database));
|
||||
@@ -507,14 +495,10 @@ class TestDdlStatements(TestDdlBase):
|
||||
# consistent.
|
||||
# The ADLS Python client is not strongly consistent, so these files may still be
|
||||
# visible after a DROP. (Remove after IMPALA-5335 is resolved)
|
||||
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=2/j2.txt".\
|
||||
format(unique_database))
|
||||
assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=2".\
|
||||
format(unique_database))
|
||||
assert not self.filesystem_client.exists(
|
||||
'{0}/test-warehouse/{1}.db/t1/j=2/j2.txt'.format(TRASH_PATH, unique_database))
|
||||
assert not self.filesystem_client.exists(
|
||||
'{0}/test-warehouse/{1}.db/t1/j=2'.format(TRASH_PATH, unique_database))
|
||||
assert not self.filesystem_client.exists("{}/t1/j=2/j2.txt".format(dbpath))
|
||||
assert not self.filesystem_client.exists("{}/t1/j=2".format(dbpath))
|
||||
assert not self.filesystem_client.exists('{}/t1/j=2/j2.txt'.format(trash))
|
||||
assert not self.filesystem_client.exists('{}/t1/j=2'.format(trash))
|
||||
|
||||
@UniqueDatabase.parametrize(sync_ddl=True)
|
||||
def test_views_ddl(self, vector, unique_database):
|
||||
@@ -751,8 +735,8 @@ class TestDdlStatements(TestDdlBase):
|
||||
tbl_name = "test_tbl"
|
||||
self.execute_query_expect_success(self.client, "create table {0}.{1} (c1 string)"
|
||||
.format(unique_database, tbl_name))
|
||||
self.filesystem_client.create_file("test-warehouse/{0}.db/{1}/f".
|
||||
format(unique_database, tbl_name),
|
||||
self.filesystem_client.create_file("{2}/{0}.db/{1}/f".
|
||||
format(unique_database, tbl_name, WAREHOUSE),
|
||||
file_data="\nfoo\n")
|
||||
self.execute_query_expect_success(self.client,
|
||||
"alter table {0}.{1} set tblproperties"
|
||||
|
||||
@@ -210,8 +210,8 @@ class TestExplainEmptyPartition(ImpalaTestSuite):
|
||||
"ALTER TABLE %s.empty_partition ADD PARTITION (p=NULL)" % self.TEST_DB_NAME)
|
||||
# Put an empty file in the partition so we have > 0 files, but 0 rows
|
||||
self.filesystem_client.create_file(
|
||||
"test-warehouse/%s.db/empty_partition/p=__HIVE_DEFAULT_PARTITION__/empty" %
|
||||
self.TEST_DB_NAME, "")
|
||||
"{1}/{0}.db/empty_partition/p=__HIVE_DEFAULT_PARTITION__/empty".
|
||||
format(self.TEST_DB_NAME, WAREHOUSE), "")
|
||||
self.client.execute("REFRESH %s.empty_partition" % self.TEST_DB_NAME)
|
||||
self.client.execute("COMPUTE STATS %s.empty_partition" % self.TEST_DB_NAME)
|
||||
assert "NULL\t0\t1" in str(
|
||||
@@ -224,8 +224,8 @@ class TestExplainEmptyPartition(ImpalaTestSuite):
|
||||
# that its lack of stats is correctly identified
|
||||
self.client.execute(
|
||||
"ALTER TABLE %s.empty_partition ADD PARTITION (p=1)" % self.TEST_DB_NAME)
|
||||
self.filesystem_client.create_file("test-warehouse/%s.db/empty_partition/p=1/rows" %
|
||||
self.TEST_DB_NAME, "1")
|
||||
self.filesystem_client.create_file(
|
||||
"{1}/{0}.db/empty_partition/p=1/rows".format(self.TEST_DB_NAME, WAREHOUSE), "1")
|
||||
self.client.execute("REFRESH %s.empty_partition" % self.TEST_DB_NAME)
|
||||
explain_result = str(
|
||||
self.client.execute("EXPLAIN SELECT * FROM %s.empty_partition" % self.TEST_DB_NAME))
|
||||
|
||||
@@ -28,13 +28,13 @@ from tests.common.test_dimensions import (
|
||||
create_uncompressed_text_dimension)
|
||||
from tests.common.skip import SkipIfLocal
|
||||
from tests.common.test_vector import ImpalaTestDimension
|
||||
from tests.util.filesystem_utils import (WAREHOUSE)
|
||||
from tests.util.filesystem_utils import WAREHOUSE
|
||||
|
||||
TEST_TBL_PART = "test_load"
|
||||
TEST_TBL_NOPART = "test_load_nopart"
|
||||
STAGING_PATH = 'test-warehouse/test_load_staging'
|
||||
ALLTYPES_PATH = "test-warehouse/alltypes/year=2010/month=1/100101.txt"
|
||||
MULTIAGG_PATH = 'test-warehouse/alltypesaggmultifiles/year=2010/month=1/day=1'
|
||||
STAGING_PATH = '%s/test_load_staging' % WAREHOUSE
|
||||
ALLTYPES_PATH = "%s/alltypes/year=2010/month=1/100101.txt" % WAREHOUSE
|
||||
MULTIAGG_PATH = '%s/alltypesaggmultifiles/year=2010/month=1/day=1' % WAREHOUSE
|
||||
HIDDEN_FILES = ["{0}/3/.100101.txt".format(STAGING_PATH),
|
||||
"{0}/3/_100101.txt".format(STAGING_PATH)]
|
||||
|
||||
@@ -155,7 +155,7 @@ class TestAsyncLoadData(ImpalaTestSuite):
|
||||
|
||||
# Create a table with the staging path
|
||||
self.client.execute("create table {0} like functional.alltypesnopart \
|
||||
location \'/{1}\'".format(qualified_table_name, staging_path))
|
||||
location \'{1}\'".format(qualified_table_name, staging_path))
|
||||
|
||||
try:
|
||||
|
||||
@@ -171,7 +171,7 @@ class TestAsyncLoadData(ImpalaTestSuite):
|
||||
enable_async_load_data
|
||||
delay = "CRS_DELAY_BEFORE_LOAD_DATA:SLEEP@3000"
|
||||
new_vector.get_value('exec_option')['debug_action'] = "{0}".format(delay)
|
||||
load_stmt = "load data inpath \'/{1}\' \
|
||||
load_stmt = "load data inpath \'{1}\' \
|
||||
into table {0}".format(qualified_table_name, staging_path)
|
||||
exec_start = time.time()
|
||||
handle = self.execute_query_async_using_client(client, load_stmt, new_vector)
|
||||
|
||||
@@ -55,7 +55,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
|
||||
create_uncompressed_text_dimension(cls.get_workload()))
|
||||
|
||||
def __get_fs_location(self, db_name, table_name):
|
||||
return 'test-warehouse/%s.db/%s/' % (db_name, table_name)
|
||||
return '%s/%s.db/%s/' % (WAREHOUSE, db_name, table_name)
|
||||
|
||||
@SkipIfLocal.hdfs_client
|
||||
def test_recover_partitions(self, vector, unique_database):
|
||||
|
||||
@@ -86,36 +86,33 @@ class TestRecursiveListing(ImpalaTestSuite):
|
||||
fq_tbl_name, part_path = self._init_test_table(unique_database, partitioned)
|
||||
|
||||
# Add a file inside a nested directory and refresh.
|
||||
self.filesystem_client.make_dir("{0}/dir1".format(part_path[1:]))
|
||||
self.filesystem_client.create_file("{0}/dir1/file1.txt".format(part_path[1:]),
|
||||
"file1")
|
||||
self.filesystem_client.make_dir("{0}/dir1".format(part_path))
|
||||
self.filesystem_client.create_file("{0}/dir1/file1.txt".format(part_path), "file1")
|
||||
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
|
||||
assert len(self._show_files(fq_tbl_name)) == 1
|
||||
assert len(self._get_rows(fq_tbl_name)) == 1
|
||||
|
||||
# Add another file inside the same directory, make sure it shows up.
|
||||
self.filesystem_client.create_file("{0}/dir1/file2.txt".format(part_path[1:]),
|
||||
"file2")
|
||||
self.filesystem_client.create_file("{0}/dir1/file2.txt".format(part_path), "file2")
|
||||
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
|
||||
assert len(self._show_files(fq_tbl_name)) == 2
|
||||
assert len(self._get_rows(fq_tbl_name)) == 2
|
||||
|
||||
# Add a file at the top level, make sure it shows up.
|
||||
self.filesystem_client.create_file("{0}/file3.txt".format(part_path[1:]),
|
||||
"file3")
|
||||
self.filesystem_client.create_file("{0}/file3.txt".format(part_path), "file3")
|
||||
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
|
||||
assert len(self._show_files(fq_tbl_name)) == 3
|
||||
assert len(self._get_rows(fq_tbl_name)) == 3
|
||||
|
||||
# Create files in the nested hidden directories and refresh. Make sure it does not
|
||||
# show up
|
||||
self.filesystem_client.make_dir("{0}/.hive-staging".format(part_path[1:]))
|
||||
self.filesystem_client.make_dir("{0}/.hive-staging".format(part_path))
|
||||
self.filesystem_client.create_file(
|
||||
"{0}/.hive-staging/file3.txt".format(part_path[1:]),
|
||||
"{0}/.hive-staging/file3.txt".format(part_path),
|
||||
"data-should-be-ignored-by-impala")
|
||||
self.filesystem_client.make_dir("{0}/_tmp.base_000000_1".format(part_path[1:]))
|
||||
self.filesystem_client.make_dir("{0}/_tmp.base_000000_1".format(part_path))
|
||||
self.filesystem_client.create_file(
|
||||
"{0}/_tmp.base_000000_1/000000_0.manifest".format(part_path[1:]),
|
||||
"{0}/_tmp.base_000000_1/000000_0.manifest".format(part_path),
|
||||
"manifest-file_contents")
|
||||
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
|
||||
assert len(self._show_files(fq_tbl_name)) == 3
|
||||
@@ -135,8 +132,7 @@ class TestRecursiveListing(ImpalaTestSuite):
|
||||
assert len(self._get_rows(fq_tbl_name)) == 3
|
||||
|
||||
# Remove the dir with two files. One should remain.
|
||||
self.filesystem_client.delete_file_dir("{0}/dir1".format(part_path[1:]),
|
||||
recursive=True)
|
||||
self.filesystem_client.delete_file_dir("{0}/dir1".format(part_path), recursive=True)
|
||||
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
|
||||
assert len(self._show_files(fq_tbl_name)) == 1
|
||||
assert len(self._get_rows(fq_tbl_name)) == 1
|
||||
@@ -201,18 +197,15 @@ class TestRecursiveListing(ImpalaTestSuite):
|
||||
response = requests.get(self.enable_fs_tracing_url)
|
||||
assert response.status_code == requests.codes.ok
|
||||
try:
|
||||
# self.filesystem_client is a DelegatingHdfsClient. It delegates delete_file_dir()
|
||||
# and make_dir() to the underlying PyWebHdfsClient which expects the HDFS path
|
||||
# without a leading '/'. So we use large_dir[1:] to remove the leading '/'.
|
||||
self.filesystem_client.delete_file_dir(large_dir[1:], recursive=True)
|
||||
self.filesystem_client.make_dir(large_dir[1:])
|
||||
self.filesystem_client.delete_file_dir(large_dir, recursive=True)
|
||||
self.filesystem_client.make_dir(large_dir)
|
||||
self.filesystem_client.touch(files)
|
||||
LOG.info("created staging files under " + large_dir)
|
||||
handle = self.execute_query_async(refresh_stmt)
|
||||
# Wait a moment to let REFRESH finish expected partial listing on the dir.
|
||||
time.sleep(pause_ms_before_file_cleanup / 1000.0)
|
||||
LOG.info("removing staging dir " + large_dir)
|
||||
self.filesystem_client.delete_file_dir(large_dir[1:], recursive=True)
|
||||
self.filesystem_client.delete_file_dir(large_dir, recursive=True)
|
||||
LOG.info("removed staging dir " + large_dir)
|
||||
try:
|
||||
self.client.fetch(refresh_stmt, handle)
|
||||
|
||||
@@ -24,6 +24,7 @@ from tests.common.skip import SkipIf, SkipIfHive2
|
||||
from tests.common.test_dimensions import create_uncompressed_text_dimension
|
||||
from tests.util.test_file_parser import QueryTestSectionReader, remove_comments
|
||||
from tests.common.environ import HIVE_MAJOR_VERSION
|
||||
from tests.util.filesystem_utils import WAREHOUSE
|
||||
|
||||
|
||||
# The purpose of the show create table tests are to ensure that the "SHOW CREATE TABLE"
|
||||
@@ -105,7 +106,7 @@ class TestShowCreateTable(ImpalaTestSuite):
|
||||
|
||||
if not test_case.existing_table:
|
||||
# create table in Impala
|
||||
self.__exec(test_case.create_table_sql)
|
||||
self.__exec(self.__replace_warehouse(test_case.create_table_sql))
|
||||
# execute "SHOW CREATE TABLE ..."
|
||||
result = self.__exec(test_case.show_create_table_sql)
|
||||
create_table_result = self.__normalize(result.data[0])
|
||||
@@ -115,9 +116,9 @@ class TestShowCreateTable(ImpalaTestSuite):
|
||||
self.__exec(test_case.drop_table_sql)
|
||||
|
||||
# check the result matches the expected result
|
||||
expected_result = self.__normalize(self.__replace_uri(
|
||||
expected_result = self.__normalize(self.__replace_warehouse(self.__replace_uri(
|
||||
test_case.expected_result,
|
||||
self.__get_location_uri(create_table_result)))
|
||||
self.__get_location_uri(create_table_result))))
|
||||
self.__compare_result(expected_result, create_table_result)
|
||||
|
||||
if test_case.existing_table:
|
||||
@@ -205,6 +206,9 @@ class TestShowCreateTable(ImpalaTestSuite):
|
||||
def __replace_uri(self, s, uri):
|
||||
return s if uri is None else s.replace("$$location_uri$$", uri)
|
||||
|
||||
def __replace_warehouse(self, s):
|
||||
return s.replace("$$warehouse$$", WAREHOUSE)
|
||||
|
||||
|
||||
# Represents one show-create-table test case. Performs validation of the test sections
|
||||
# and provides SQL to execute for each section.
|
||||
|
||||
@@ -40,7 +40,7 @@ class TestTestcaseBuilder(ImpalaTestSuite):
|
||||
tmp_path = get_fs_path("/tmp")
|
||||
# Make sure /tmp dir exists
|
||||
if not self.filesystem_client.exists(tmp_path):
|
||||
self.filesystem_client.make_dir('tmp')
|
||||
self.filesystem_client.make_dir(tmp_path)
|
||||
# Generate Testcase Data for query without table reference
|
||||
testcase_generate_query = """COPY TESTCASE TO '%s' SELECT 5 * 20""" % tmp_path
|
||||
result = self.execute_query_expect_success(self.client, testcase_generate_query)
|
||||
@@ -48,7 +48,7 @@ class TestTestcaseBuilder(ImpalaTestSuite):
|
||||
|
||||
# Check file exists
|
||||
testcase_path = str(result.data)[1: -1]
|
||||
index = testcase_path.index('/tmp')
|
||||
index = testcase_path.index(tmp_path)
|
||||
hdfs_path = testcase_path[index:-1]
|
||||
assert self.filesystem_client.exists(hdfs_path), \
|
||||
"File not generated {0}".format(hdfs_path)
|
||||
|
||||
@@ -27,7 +27,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.test_dimensions import create_single_exec_option_dimension
|
||||
from tests.common.test_result_verifier import verify_query_result_is_equal
|
||||
from tests.common.test_vector import ImpalaTestDimension
|
||||
from tests.util.filesystem_utils import get_fs_path
|
||||
from tests.util.filesystem_utils import get_fs_path, WAREHOUSE
|
||||
|
||||
|
||||
# (file extension, table suffix) pairs
|
||||
@@ -65,14 +65,14 @@ class TestCompressedFormatsBase(ImpalaTestSuite):
|
||||
new table. Unless expected_error is set, it expects the query to run successfully.
|
||||
"""
|
||||
# Calculate locations for the source table
|
||||
base_dir = '/test-warehouse'
|
||||
base_dir = WAREHOUSE
|
||||
src_table = "functional{0}.{1}".format(db_suffix, table_name)
|
||||
src_table_dir = join(base_dir, table_name + db_suffix)
|
||||
file_basename = self.filesystem_client.ls(src_table_dir)[0]
|
||||
src_file = join(src_table_dir, file_basename)
|
||||
|
||||
# Calculate locations for the destination table
|
||||
dest_table_dir = "/test-warehouse/{0}.db/{1}".format(unique_database, table_name)
|
||||
dest_table_dir = "{2}/{0}.db/{1}".format(unique_database, table_name, WAREHOUSE)
|
||||
dest_table = "{0}.{1}".format(unique_database, table_name)
|
||||
dest_file = join(dest_table_dir, file_basename + dest_extension)
|
||||
|
||||
@@ -240,7 +240,7 @@ class TestLargeCompressedFile(ImpalaTestSuite):
|
||||
# is generated from a string of 50176 bytes.
|
||||
payload_size = 50176
|
||||
hdfs_cat = subprocess.Popen(["hadoop", "fs", "-cat",
|
||||
"/test-warehouse/compressed_payload.snap"], stdout=subprocess.PIPE)
|
||||
"%s/compressed_payload.snap" % WAREHOUSE], stdout=subprocess.PIPE)
|
||||
compressed_payload = hdfs_cat.stdout.read()
|
||||
compressed_size = len(compressed_payload)
|
||||
hdfs_cat.stdout.close()
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
|
||||
import getpass
|
||||
import grp
|
||||
import os
|
||||
import pwd
|
||||
import pytest
|
||||
import re
|
||||
@@ -25,7 +26,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.parametrize import UniqueDatabase
|
||||
from tests.common.skip import (SkipIfFS, SkipIfLocal, SkipIfDockerizedCluster,
|
||||
SkipIfCatalogV2)
|
||||
from tests.util.filesystem_utils import WAREHOUSE, get_fs_path, IS_S3
|
||||
from tests.util.filesystem_utils import WAREHOUSE, IS_S3
|
||||
|
||||
|
||||
@SkipIfLocal.hdfs_client
|
||||
@@ -52,8 +53,8 @@ class TestInsertBehaviour(ImpalaTestSuite):
|
||||
@pytest.mark.execute_serially
|
||||
def test_insert_removes_staging_files(self):
|
||||
TBL_NAME = "insert_overwrite_nopart"
|
||||
insert_staging_dir = ("test-warehouse/functional.db/%s/"
|
||||
"_impala_insert_staging" % TBL_NAME)
|
||||
insert_staging_dir = ("%s/functional.db/%s/"
|
||||
"_impala_insert_staging" % (WAREHOUSE, TBL_NAME))
|
||||
self.filesystem_client.delete_file_dir(insert_staging_dir, recursive=True)
|
||||
self.client.execute("INSERT OVERWRITE functional.%s "
|
||||
"SELECT int_col FROM functional.tinyinttable" % TBL_NAME)
|
||||
@@ -64,7 +65,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
|
||||
def test_insert_preserves_hidden_files(self):
|
||||
"""Test that INSERT OVERWRITE preserves hidden files in the root table directory"""
|
||||
TBL_NAME = "insert_overwrite_nopart"
|
||||
table_dir = "test-warehouse/functional.db/%s/" % TBL_NAME
|
||||
table_dir = "%s/functional.db/%s/" % (WAREHOUSE, TBL_NAME)
|
||||
hidden_file_locations = [".hidden", "_hidden"]
|
||||
dir_locations = ["dir", ".hidden_dir"]
|
||||
|
||||
@@ -103,8 +104,10 @@ class TestInsertBehaviour(ImpalaTestSuite):
|
||||
def test_insert_alter_partition_location(self, unique_database):
|
||||
"""Test that inserts after changing the location of a partition work correctly,
|
||||
including the creation of a non-existant partition dir"""
|
||||
part_dir = "tmp/{0}".format(unique_database)
|
||||
qualified_part_dir = get_fs_path('/' + part_dir)
|
||||
# Moved to WAREHOUSE for Ozone because it does not allow rename between buckets.
|
||||
work_dir = "%s/tmp" % WAREHOUSE
|
||||
self.filesystem_client.make_dir(work_dir)
|
||||
part_dir = os.path.join(work_dir, unique_database)
|
||||
table_name = "`{0}`.`insert_alter_partition_location`".format(unique_database)
|
||||
|
||||
self.execute_query_expect_success(self.client, "DROP TABLE IF EXISTS %s" % table_name)
|
||||
@@ -119,7 +122,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
|
||||
self.execute_query_expect_success(
|
||||
self.client,
|
||||
"ALTER TABLE %s PARTITION(p=1) SET LOCATION '%s'" % (table_name,
|
||||
qualified_part_dir))
|
||||
part_dir))
|
||||
self.execute_query_expect_success(
|
||||
self.client,
|
||||
"INSERT OVERWRITE %s PARTITION(p=1) VALUES(1)" % table_name)
|
||||
@@ -492,7 +495,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
|
||||
"should {1}exist but does {2}exist.".format(
|
||||
path, '' if should_exist else 'not ', 'not ' if should_exist else '')
|
||||
|
||||
db_path = "test-warehouse/%s.db/" % self.TEST_DB_NAME
|
||||
db_path = "%s/%s.db/" % (WAREHOUSE, self.TEST_DB_NAME)
|
||||
table_path = db_path + "test_insert_empty_result"
|
||||
partition_path = "{0}/year=2009/month=1".format(table_path)
|
||||
check_path_exists(table_path, False)
|
||||
@@ -600,14 +603,13 @@ class TestInsertBehaviour(ImpalaTestSuite):
|
||||
if self.exploration_strategy() != 'exhaustive' and IS_S3:
|
||||
pytest.skip("only runs in exhaustive")
|
||||
table = "{0}.insert_clustered".format(unique_database)
|
||||
table_path = "test-warehouse/{0}.db/insert_clustered".format(unique_database)
|
||||
table_location = get_fs_path("/" + table_path)
|
||||
table_path = "{1}/{0}.db/insert_clustered".format(unique_database, WAREHOUSE)
|
||||
|
||||
create_stmt = """create table {0} like functional.alltypes""".format(table)
|
||||
self.execute_query_expect_success(self.client, create_stmt)
|
||||
|
||||
set_location_stmt = """alter table {0} set location '{1}'""".format(
|
||||
table, table_location)
|
||||
table, table_path)
|
||||
self.execute_query_expect_success(self.client, set_location_stmt)
|
||||
|
||||
# Setting a lower batch size will result in multiple row batches being written.
|
||||
@@ -635,8 +637,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
|
||||
if self.exploration_strategy() != 'exhaustive':
|
||||
pytest.skip("only runs in exhaustive")
|
||||
table = "{0}.insert_clustered".format(unique_database)
|
||||
table_path = "test-warehouse/{0}.db/insert_clustered".format(unique_database)
|
||||
table_location = get_fs_path("/" + table_path)
|
||||
table_path = "{1}/{0}.db/insert_clustered".format(unique_database, WAREHOUSE)
|
||||
|
||||
create_stmt = """create table {0} (
|
||||
l_orderkey BIGINT,
|
||||
@@ -659,7 +660,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
|
||||
self.execute_query_expect_success(self.client, create_stmt)
|
||||
|
||||
set_location_stmt = """alter table {0} set location '{1}'""".format(
|
||||
table, table_location)
|
||||
table, table_path)
|
||||
self.execute_query_expect_success(self.client, set_location_stmt)
|
||||
|
||||
# Setting a lower parquet file size will result in multiple files being written.
|
||||
|
||||
@@ -32,7 +32,7 @@ from tests.common.skip import SkipIfEC, SkipIfFS, SkipIfLocal
|
||||
from tests.common.test_dimensions import create_exec_option_dimension
|
||||
from tests.common.test_result_verifier import verify_query_result_is_equal
|
||||
from tests.common.test_vector import ImpalaTestDimension
|
||||
from tests.util.filesystem_utils import get_fs_path
|
||||
from tests.util.filesystem_utils import get_fs_path, WAREHOUSE
|
||||
from tests.util.get_parquet_metadata import (decode_stats_value,
|
||||
get_parquet_metadata_from_hdfs_folder)
|
||||
|
||||
@@ -328,8 +328,8 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite):
|
||||
to have columns with different signed integer logical types. The test verifies
|
||||
that parquet file written by the hdfs parquet table writer using the generated
|
||||
file has the same column type metadata as the generated one."""
|
||||
hdfs_path = (os.environ['DEFAULT_FS'] + "/test-warehouse/{0}.db/"
|
||||
"signed_integer_logical_types.parquet").format(unique_database)
|
||||
hdfs_path = "{1}/{0}.db/signed_integer_logical_types.parquet".\
|
||||
format(unique_database, WAREHOUSE)
|
||||
self.filesystem_client.copy_from_local(os.environ['IMPALA_HOME'] +
|
||||
'/testdata/data/signed_integer_logical_types.parquet', hdfs_path)
|
||||
# Create table with signed integer logical types
|
||||
|
||||
@@ -20,7 +20,7 @@ from datetime import datetime
|
||||
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.skip import SkipIfFS, SkipIfLocal, SkipIfNotHdfsMinicluster
|
||||
from tests.util.filesystem_utils import IS_EC
|
||||
from tests.util.filesystem_utils import IS_EC, WAREHOUSE
|
||||
from time import sleep
|
||||
from RuntimeProfile.ttypes import TRuntimeProfileFormat
|
||||
import pytest
|
||||
@@ -424,7 +424,7 @@ class TestObservability(ImpalaTestSuite):
|
||||
def test_query_profile_contains_all_events(self, unique_database):
|
||||
"""Test that the expected events show up in a query profile for various queries"""
|
||||
# make a data file to load data from
|
||||
path = "test-warehouse/{0}.db/data_file".format(unique_database)
|
||||
path = "{1}/{0}.db/data_file".format(unique_database, WAREHOUSE)
|
||||
self.filesystem_client.create_file(path, "1")
|
||||
use_query = "use {0}".format(unique_database)
|
||||
self.execute_query(use_query)
|
||||
@@ -441,7 +441,7 @@ class TestObservability(ImpalaTestSuite):
|
||||
'explain select * from impala_6568',
|
||||
'describe impala_6568',
|
||||
'alter table impala_6568 set tblproperties(\'numRows\'=\'10\')',
|
||||
"load data inpath '/{0}' into table impala_6568".format(path)
|
||||
"load data inpath '{0}' into table impala_6568".format(path)
|
||||
]
|
||||
# run each query...
|
||||
for query in queries:
|
||||
|
||||
@@ -29,6 +29,7 @@ from tests.common.skip import SkipIfEC, SkipIfLocal, SkipIfFS
|
||||
from tests.common.test_dimensions import add_exec_option_dimension
|
||||
from tests.common.test_vector import ImpalaTestDimension
|
||||
from tests.verifiers.metric_verifier import MetricVerifier
|
||||
from tests.util.filesystem_utils import WAREHOUSE
|
||||
|
||||
# slow_build_timeout is set to 200000 to avoid failures like IMPALA-8064 where the
|
||||
# runtime filters don't arrive in time.
|
||||
@@ -320,12 +321,10 @@ class TestOverlapMinMaxFilters(ImpalaTestSuite):
|
||||
tbl_name = "part_col_in_data_file"
|
||||
self.execute_query("CREATE TABLE {0}.{1} (i INT) PARTITIONED BY (d DATE) "
|
||||
"STORED AS PARQUET".format(unique_database, tbl_name))
|
||||
tbl_loc = self._get_table_location("{0}.{1}".format(unique_database, tbl_name),
|
||||
vector)
|
||||
self.filesystem_client.make_dir(tbl_loc[tbl_loc.find('test-warehouse'):] +
|
||||
'/d=2022-02-22/')
|
||||
tbl_loc = "%s/%s/%s/d=2022-02-22/" % (WAREHOUSE, unique_database, tbl_name)
|
||||
self.filesystem_client.make_dir(tbl_loc)
|
||||
self.filesystem_client.copy_from_local(os.environ['IMPALA_HOME'] +
|
||||
'/testdata/data/partition_col_in_parquet.parquet', tbl_loc + '/d=2022-02-22/')
|
||||
'/testdata/data/partition_col_in_parquet.parquet', tbl_loc)
|
||||
self.execute_query("ALTER TABLE {0}.{1} RECOVER PARTITIONS".format(
|
||||
unique_database, tbl_name))
|
||||
self.execute_query("SET PARQUET_FALLBACK_SCHEMA_RESOLUTION=NAME")
|
||||
|
||||
@@ -82,3 +82,4 @@ def get_secondary_fs_path(path):
|
||||
|
||||
WAREHOUSE = get_fs_path('/test-warehouse')
|
||||
FILESYSTEM_NAME = get_fs_name(FILESYSTEM)
|
||||
WAREHOUSE_PREFIX = os.getenv("WAREHOUSE_LOCATION_PREFIX")
|
||||
|
||||
@@ -67,9 +67,11 @@ class DelegatingHdfsClient(BaseFilesystem):
|
||||
super(DelegatingHdfsClient, self).__init__()
|
||||
|
||||
def create_file(self, path, file_data, overwrite=True):
|
||||
path = self._normalize_path(path)
|
||||
return self.webhdfs_client.create_file(path, file_data, overwrite=overwrite)
|
||||
|
||||
def make_dir(self, path, permission=None):
|
||||
path = self._normalize_path(path)
|
||||
if permission:
|
||||
return self.webhdfs_client.make_dir(path, permission=permission)
|
||||
else:
|
||||
@@ -82,18 +84,23 @@ class DelegatingHdfsClient(BaseFilesystem):
|
||||
self.hdfs_filesystem_client.copy_from_local(src, dst)
|
||||
|
||||
def ls(self, path):
|
||||
path = self._normalize_path(path)
|
||||
return self.webhdfs_client.ls(path)
|
||||
|
||||
def exists(self, path):
|
||||
path = self._normalize_path(path)
|
||||
return self.webhdfs_client.exists(path)
|
||||
|
||||
def delete_file_dir(self, path, recursive=False):
|
||||
path = self._normalize_path(path)
|
||||
return self.webhdfs_client.delete_file_dir(path, recursive=recursive)
|
||||
|
||||
def get_file_dir_status(self, path):
|
||||
path = self._normalize_path(path)
|
||||
return self.webhdfs_client.get_file_dir_status(path)
|
||||
|
||||
def get_all_file_sizes(self, path):
|
||||
path = self._normalize_path(path)
|
||||
return self.webhdfs_client.get_all_file_sizes(path)
|
||||
|
||||
def chmod(self, path, permission):
|
||||
@@ -111,6 +118,11 @@ class DelegatingHdfsClient(BaseFilesystem):
|
||||
def touch(self, paths):
|
||||
return self.hdfs_filesystem_client.touch(paths)
|
||||
|
||||
def _normalize_path(self, path):
|
||||
"""Paths passed in may include a leading slash. Remove it as the underlying
|
||||
PyWebHdfsClient expects the HDFS path without a leading '/'."""
|
||||
return path[1:] if path.startswith('/') else path
|
||||
|
||||
class PyWebHdfsClientWithChmod(PyWebHdfsClient):
|
||||
def chmod(self, path, permission):
|
||||
"""Set the permission of 'path' to 'permission' (specified as an octal string, e.g.
|
||||
@@ -212,20 +224,18 @@ class HadoopFsCommandLineClient(BaseFilesystem):
|
||||
def create_file(self, path, file_data, overwrite=True):
|
||||
"""Creates a temporary file with the specified file_data on the local filesystem,
|
||||
then puts it into the specified path."""
|
||||
fixed_path = self._normalize_path(path)
|
||||
if not overwrite and self.exists(fixed_path): return False
|
||||
if not overwrite and self.exists(path): return False
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
tmp_file.write(file_data)
|
||||
put_cmd_params = ['-put', '-d']
|
||||
if overwrite: put_cmd_params.append('-f')
|
||||
put_cmd_params.extend([tmp_file.name, fixed_path])
|
||||
put_cmd_params.extend([tmp_file.name, path])
|
||||
(status, stdout, stderr) = self._hadoop_fs_shell(put_cmd_params)
|
||||
return status == 0
|
||||
|
||||
def make_dir(self, path, permission=None):
|
||||
"""Create a directory at the specified path. Permissions are not supported."""
|
||||
fixed_path = self._normalize_path(path)
|
||||
(status, stdout, stderr) = self._hadoop_fs_shell(['-mkdir', '-p', fixed_path])
|
||||
(status, stdout, stderr) = self._hadoop_fs_shell(['-mkdir', '-p', path])
|
||||
return status == 0
|
||||
|
||||
def copy(self, src, dst, overwrite=False):
|
||||
@@ -233,11 +243,9 @@ class HadoopFsCommandLineClient(BaseFilesystem):
|
||||
'Skip[s] creation of temporary file with the suffix ._COPYING_.' to avoid extraneous
|
||||
copies on S3. If overwrite is true, the destination file is overwritten, set to false
|
||||
by default for backwards compatibility."""
|
||||
fixed_src = self._normalize_path(src)
|
||||
fixed_dst = self._normalize_path(dst)
|
||||
cp_cmd_params = ['-cp', '-d']
|
||||
if overwrite: cp_cmd_params.append('-f')
|
||||
cp_cmd_params.extend([fixed_src, fixed_dst])
|
||||
cp_cmd_params.extend([src, dst])
|
||||
(status, stdout, stderr) = self._hadoop_fs_shell(cp_cmd_params)
|
||||
assert status == 0, \
|
||||
'{0} copy failed: '.format(self.filesystem_type) + stderr + "; " + stdout
|
||||
@@ -259,8 +267,7 @@ class HadoopFsCommandLineClient(BaseFilesystem):
|
||||
|
||||
def _inner_ls(self, path):
|
||||
"""List names, lengths, and mode for files/directories under the specified path."""
|
||||
fixed_path = self._normalize_path(path)
|
||||
(status, stdout, stderr) = self._hadoop_fs_shell(['-ls', fixed_path])
|
||||
(status, stdout, stderr) = self._hadoop_fs_shell(['-ls', path])
|
||||
# Trim the "Found X items" line and trailing new-line
|
||||
entries = stdout.split("\n")[1:-1]
|
||||
files = []
|
||||
@@ -275,9 +282,8 @@ class HadoopFsCommandLineClient(BaseFilesystem):
|
||||
|
||||
def ls(self, path):
|
||||
"""Returns a list of all file and directory names in 'path'"""
|
||||
fixed_path = self._normalize_path(path)
|
||||
files = []
|
||||
for f in self._inner_ls(fixed_path):
|
||||
for f in self._inner_ls(path):
|
||||
fname = os.path.basename(f['name'])
|
||||
if not fname == '':
|
||||
files += [fname]
|
||||
@@ -285,31 +291,23 @@ class HadoopFsCommandLineClient(BaseFilesystem):
|
||||
|
||||
def exists(self, path):
|
||||
"""Checks if a particular path exists"""
|
||||
fixed_path = self._normalize_path(path)
|
||||
(status, stdout, stderr) = self._hadoop_fs_shell(['-test', '-e', fixed_path])
|
||||
(status, stdout, stderr) = self._hadoop_fs_shell(['-test', '-e', path])
|
||||
return status == 0
|
||||
|
||||
def delete_file_dir(self, path, recursive=False):
|
||||
"""Delete the file or directory given by the specified path. Recursive must be true
|
||||
for directories."""
|
||||
fixed_path = self._normalize_path(path)
|
||||
rm_command = ['-rm', fixed_path]
|
||||
rm_command = ['-rm', path]
|
||||
if recursive:
|
||||
rm_command = ['-rm', '-r', fixed_path]
|
||||
rm_command = ['-rm', '-r', path]
|
||||
(status, stdout, stderr) = self._hadoop_fs_shell(rm_command)
|
||||
return status == 0
|
||||
|
||||
def get_all_file_sizes(self, path):
|
||||
"""Returns a list of integers which are all the file sizes of files found
|
||||
under 'path'."""
|
||||
fixed_path = self._normalize_path(path)
|
||||
return [f['length'] for f in
|
||||
self._inner_ls(fixed_path) if f['mode'][0] == "-"]
|
||||
|
||||
def _normalize_path(self, path):
|
||||
"""Paths passed in may lack a leading slash. This adds a leading slash if it is
|
||||
missing."""
|
||||
return path if path.startswith('/') else '/' + path
|
||||
self._inner_ls(path) if f['mode'][0] == "-"]
|
||||
|
||||
def touch(self, paths):
|
||||
"""Updates the access and modification times of the files specified by 'paths' to
|
||||
|
||||
Reference in New Issue
Block a user