Files
impala/tests/util/filesystem_utils.py
Sean Mackrory 7a022cf36a IMPALA-7681. Add Azure Blob File System (ADLS Gen2) support.
HADOOP-15407 adds a new FileSystem implementation called "ABFS" for the
ADLS Gen2 service. It's in the hadoop-azure module as a replacement for
WASB. Filesystem semantics should be the same, so skipped tests and
other behavior changes have simply mirrored what is done for ADLS Gen1
by default. Tests skipped on ADLS Gen1 due to eventual consistency of
the Python client can be run against ADLS Gen2.

Change-Id: I5120b071760e7655e78902dce8483f8f54de445d
Reviewed-on: http://gerrit.cloudera.org:8080/11630
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2018-10-20 06:43:00 +00:00

67 lines
2.7 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Utilities for supporting different filesystems.
import os
# FILESYSTEM_PREFIX is the path prefix that should be used in queries. When running
# the tests against the default filesystem (fs.defaultFS), FILESYSTEM_PREFIX is the
# empty string. When running against a secondary filesystem, it will be the scheme
# and authority portion of the qualified path.
FILESYSTEM_PREFIX = os.getenv("FILESYSTEM_PREFIX") or str()
SECONDARY_FILESYSTEM = os.getenv("SECONDARY_FILESYSTEM") or str()
FILESYSTEM = os.getenv("TARGET_FILESYSTEM")
IS_S3 = FILESYSTEM == "s3"
IS_ISILON = FILESYSTEM == "isilon"
IS_LOCAL = FILESYSTEM == "local"
IS_HDFS = FILESYSTEM == "hdfs"
IS_ADLS = FILESYSTEM == "adls"
IS_ABFS = FILESYSTEM == "abfs"
IS_EC = os.getenv("ERASURE_CODING") == "true"
# This condition satisfies both the states where one can assume a default fs
# - The environment variable is set to an empty string.
# - Tne environment variables is unset ( None )
# When the local filesystem is used, it should always be the default filesystem.
IS_DEFAULT_FS = not FILESYSTEM_PREFIX or IS_LOCAL
# Isilon specific values.
ISILON_NAMENODE = os.getenv("ISILON_NAMENODE") or str()
ISILON_WEBHDFS_PORT = 8082
# S3 specific values
S3_BUCKET_NAME = os.getenv("S3_BUCKET")
# ADLS / ABFS specific values
ABFS_ACCOUNT_NAME = os.getenv("azure_storage_account_name")
ABFS_CONTAINER_NAME = os.getenv("azure_storage_container_name")
ADLS_STORE_NAME = os.getenv("azure_data_lake_store_name")
ADLS_CLIENT_ID = os.getenv("azure_client_id")
ADLS_TENANT_ID = os.getenv("azure_tenant_id")
ADLS_CLIENT_SECRET = os.getenv("azure_client_secret")
def prepend_with_fs(fs, path):
"""Prepend 'path' with 'fs' if it's not already the prefix."""
return path if path.startswith(fs) else "%s%s" % (fs, path)
def get_fs_path(path):
return prepend_with_fs(FILESYSTEM_PREFIX, path)
def get_secondary_fs_path(path):
return prepend_with_fs(SECONDARY_FILESYSTEM, path)
WAREHOUSE = get_fs_path('/test-warehouse')