mirror of
https://github.com/apache/impala.git
synced 2025-12-21 10:58:31 -05:00
IMPALA-13279 upgraded gcovr to 7.2 and moved it from python 2 to python 3.8. gcovr has several dependencies that require native compilation, and this increased the cost of initializing the Python 3 virtualenv substantially: Without gcovr: 1m43.279s With gcovr and deps: 6m35.107s This moves gcovr to its own requirements file and only installs gcovr if this is a coverage build (detected from the .cmake_buid_type file). Testing: - Verified that a coverage build does install gcovr and produce a report Change-Id: I1d0fd6d21273053aaf2acee39fcb83d9093d49a2 Reviewed-on: http://gerrit.cloudera.org:8080/21849 Reviewed-by: Laszlo Gaal <laszlo.gaal@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
164 lines
6.4 KiB
Python
Executable File
164 lines
6.4 KiB
Python
Executable File
#!/usr/bin/python
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# Implement the basic 'pip download' functionality in a way that gives us more control
|
|
# over which archive type is downloaded and what post-download steps are executed.
|
|
# This script requires Python 2.7+.
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
import hashlib
|
|
import multiprocessing.pool
|
|
import os
|
|
import os.path
|
|
import re
|
|
import sys
|
|
from random import randint
|
|
from time import sleep
|
|
import subprocess
|
|
|
|
NUM_DOWNLOAD_ATTEMPTS = 8
|
|
|
|
PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org')
|
|
|
|
# The requirement files that list all of the required packages and versions.
|
|
REQUIREMENTS_FILES = ['requirements.txt', 'setuptools-requirements.txt',
|
|
'kudu-requirements.txt', 'adls-requirements.txt',
|
|
'py2-requirements.txt', 'py3-requirements.txt',
|
|
'gcovr-requirements.txt']
|
|
|
|
|
|
def check_digest(filename, algorithm, expected_digest):
|
|
try:
|
|
supported_algorithms = hashlib.algorithms_available
|
|
except AttributeError:
|
|
# Fallback to hardcoded set if hashlib.algorithms_available doesn't exist.
|
|
supported_algorithms = set(['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'])
|
|
if algorithm not in supported_algorithms:
|
|
print('Hash algorithm {0} is not supported by hashlib'.format(algorithm))
|
|
return False
|
|
h = hashlib.new(algorithm)
|
|
h.update(open(filename, mode='rb').read())
|
|
actual_digest = h.hexdigest()
|
|
return actual_digest == expected_digest
|
|
|
|
|
|
def retry(func):
|
|
'''Retry decorator.'''
|
|
|
|
def wrapper(*args, **kwargs):
|
|
for try_num in range(NUM_DOWNLOAD_ATTEMPTS):
|
|
if try_num > 0:
|
|
sleep_len = randint(5, 10 * 2 ** try_num)
|
|
print('Sleeping for {0} seconds before retrying'.format(sleep_len))
|
|
sleep(sleep_len)
|
|
try:
|
|
result = func(*args, **kwargs)
|
|
if result:
|
|
return result
|
|
except Exception as e:
|
|
print(e)
|
|
print('Download failed after several attempts.')
|
|
sys.exit(1)
|
|
|
|
return wrapper
|
|
|
|
def get_package_info(pkg_name, pkg_version):
|
|
'''Returns the file name, path, hash algorithm and digest of the package.'''
|
|
# We store the matching result in the candidates list instead of returning right away
|
|
# to sort them and return the first value in alphabetical order. This ensures that the
|
|
# same result is always returned even if the ordering changed on the server.
|
|
candidates = []
|
|
normalized_name = re.sub(r"[-_.]+", "-", pkg_name).lower()
|
|
url = '{0}/simple/{1}/'.format(PYPI_MIRROR, normalized_name)
|
|
print('Getting package info from {0}'.format(url))
|
|
# The web page should be in PEP 503 format (https://www.python.org/dev/peps/pep-0503/).
|
|
# We parse the page with regex instead of an html parser because that requires
|
|
# downloading an extra package before running this script. Since the HTML is guaranteed
|
|
# to be formatted according to PEP 503, this is acceptable.
|
|
pkg_info = subprocess.check_output(
|
|
["wget", "-q", "-O", "-", url], universal_newlines=True)
|
|
regex = r'<a .*?href=\".*?packages/(.*?)#(.*?)=(.*?)\".*?>(.*?)<\/a>'
|
|
for match in re.finditer(regex, pkg_info):
|
|
path = match.group(1)
|
|
hash_algorithm = match.group(2)
|
|
digest = match.group(3)
|
|
file_name = match.group(4)
|
|
# Make sure that we consider only non Wheel archives, because those are not supported.
|
|
if (file_name.endswith('-{0}.tar.gz'.format(pkg_version)) or
|
|
file_name.endswith('-{0}.tar.bz2'.format(pkg_version)) or
|
|
file_name.endswith('-{0}.zip'.format(pkg_version))):
|
|
candidates.append((file_name, path, hash_algorithm, digest))
|
|
if not candidates:
|
|
print('Could not find archive to download for {0} {1}'.format(pkg_name, pkg_version))
|
|
return (None, None, None, None)
|
|
return sorted(candidates)[0]
|
|
|
|
@retry
|
|
def download_package(pkg_name, pkg_version):
|
|
file_name, path, hash_algorithm, expected_digest = get_package_info(pkg_name,
|
|
pkg_version)
|
|
if not file_name:
|
|
return False
|
|
if os.path.isfile(file_name) and check_digest(file_name, hash_algorithm,
|
|
expected_digest):
|
|
print('File with matching digest already exists, skipping {0}'.format(file_name))
|
|
return True
|
|
pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path)
|
|
print('Downloading {0} from {1}'.format(file_name, pkg_url))
|
|
if 0 != subprocess.check_call(["wget", pkg_url, "-q", "-O", file_name]):
|
|
return False
|
|
if check_digest(file_name, hash_algorithm, expected_digest):
|
|
return True
|
|
else:
|
|
print('Hash digest check failed in file {0}.'.format(file_name))
|
|
return False
|
|
|
|
def main():
|
|
if len(sys.argv) > 1:
|
|
_, pkg_name, pkg_version = sys.argv
|
|
download_package(pkg_name, pkg_version)
|
|
return
|
|
|
|
pool = multiprocessing.pool.ThreadPool(processes=min(multiprocessing.cpu_count(), 4))
|
|
results = []
|
|
|
|
for requirements_file in REQUIREMENTS_FILES:
|
|
# If the package name and version are not specified in the command line arguments,
|
|
# download the packages that in requirements.txt.
|
|
# requirements.txt follows the standard pip grammar.
|
|
for line in open(requirements_file):
|
|
# A hash symbol ("#") represents a comment that should be ignored.
|
|
line = line.split("#")[0]
|
|
# A semi colon (";") specifies some additional condition for when the package
|
|
# should be installed (for example a specific OS). We can ignore this and download
|
|
# the package anyways because the installation script(bootstrap_virtualenv.py) can
|
|
# take it into account.
|
|
l = line.split(";")[0].strip()
|
|
if not l:
|
|
continue
|
|
pkg_name, pkg_version = l.split('==')
|
|
results.append(pool.apply_async(
|
|
download_package, args=[pkg_name.strip(), pkg_version.strip()]))
|
|
|
|
for x in results:
|
|
x.get()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|