Files
glazier/lib/download.py
2017-01-23 10:57:59 -05:00

368 lines
11 KiB
Python

# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Download files over HTTPS.
> Resource Requirements
* resources/ca_certs.crt
A certificate file containing permitted root certs for SSL validation.
"""
import hashlib
import logging
import os
import re
import socket
import subprocess
import sys
import tempfile
import time
import urllib2
CHUNK_BYTE_SIZE = 65536
def Transform(string, build_info):
"""Transforms abbreviated file names to absolute file paths.
Short name support:
#: A reference to the active release branch location.
@: A reference to the binary storage root.
Args:
string: The configuration string to be transformed.
build_info: the current build information
Returns:
The adjusted file name string to be used in the manifest.
"""
if '#' in string:
string = string.replace('#', '%s/' % PathCompile(build_info))
if '@' in string:
string = string.replace('@', str(build_info.BinaryPath()))
return string
def PathCompile(build_info, file_name=None, base=None):
"""Compile the active path from the base path and the active conf path.
Attempt to do a reasonable job of joining path components with single
slashes.
The three main parts considered are the _base_url (or base arg), any
subdirectories from _conf_path, and the optional file name arg. These are
combined into [https://base.url][/conf/path/parts][/filename.ext]
We attempt to strip trailing slashes, so paths without a filename return
with no trailing /.
Args:
build_info: the current build information
file_name: append a filename to the path
base: use a non-default base path
Returns:
The compiled URL as a string.
"""
path = base
if not path:
path = build_info.ReleasePath()
path = path.rstrip('/')
sub_path = build_info.ActiveConfigPath()
if sub_path:
path += '/'
sub_path = '/'.join(sub_path).strip('/')
path += sub_path
if file_name:
path += '/'
file_name = file_name.lstrip('/')
path += file_name
return path
class DownloadError(Exception):
"""The transfer of the file failed."""
pass
class BaseDownloader(object):
"""Downloads files over HTTPS."""
def __init__(self, show_progress=False):
self._debug_info = {}
self._save_location = None
self._default_show_progress = show_progress
def _ConvertBytes(self, num_bytes):
"""Converts number of bytes to a human readable format.
Args:
num_bytes: The number to convert to a more human readable format (int).
Returns:
size: The number of bytes in human readable format (string).
"""
num_bytes = float(num_bytes)
if num_bytes >= 1099511627776:
terabytes = num_bytes / 1099511627776
size = '%.2fTB' % terabytes
elif num_bytes >= 1073741824:
gigabytes = num_bytes / 1073741824
size = '%.2fGB' % gigabytes
elif num_bytes >= 1048576:
megabytes = num_bytes / 1048576
size = '%.2fMB' % megabytes
elif num_bytes >= 1024:
kilobytes = num_bytes / 1024
size = '%.2fKB' % kilobytes
else:
size = '%.2fB' % num_bytes
return size
def _GetHandlers(self):
return [urllib2.HTTPSHandler()]
def _DownloadFile(self, url, max_retries=5, show_progress=None):
"""Downloads a file from and saves it to the specified location.
Args:
url: The address of the file to be downloaded.
max_retries: The number of times to attempt to download
a file if the first attempt fails.
show_progress: Print download progress to stdout (overrides default).
Raises:
DownloadError: The downloaded file did not match the expected file
size.
"""
attempt = 0
file_stream = None
opener = urllib2.OpenerDirector()
for handler in self._GetHandlers():
opener.add_handler(handler)
urllib2.install_opener(opener)
while True:
try:
attempt += 1
file_stream = urllib2.urlopen(url)
except urllib2.HTTPError:
logging.error('File not found on remote server: %s.', url)
except urllib2.URLError as e:
logging.error('Error connecting to remote server to download file '
'"%s". The error was: %s', url, e)
if file_stream:
if file_stream.getcode() in [200]:
break
else:
raise DownloadError('Invalid return code for file %s. [%d]' %
(url, file_stream.getcode()))
if attempt < max_retries:
logging.info('Sleeping for 20 seconds and then retrying the download.')
time.sleep(20)
else:
raise DownloadError('Permanent download failure for file %s.' % url)
self._StreamToDisk(file_stream, show_progress)
def DownloadFile(self, url, save_location, max_retries=5, show_progress=None):
"""Downloads a file to temporary storage.
Args:
url: The address of the file to be downloaded.
save_location: The full path of where the file should be saved.
max_retries: The number of times to attempt to download
a file if the first attempt fails.
show_progress: Print download progress to stdout (overrides default).
"""
self._save_location = save_location
self._DownloadFile(url, max_retries, show_progress)
def DownloadFileTemp(self, url, max_retries=5, show_progress=None):
"""Downloads a file to temporary storage.
Args:
url: The address of the file to be downloaded.
max_retries: The number of times to attempt to download
a file if the first attempt fails.
show_progress: Print download progress to stdout (overrides default).
Returns:
A string containing a path to the temporary file.
"""
destination = tempfile.NamedTemporaryFile()
self._save_location = destination.name
destination.close()
self._DownloadFile(url, max_retries, show_progress)
return self._save_location
def _DownloadChunkReport(self, bytes_so_far, total_size):
"""Prints download progress information.
Args:
bytes_so_far: The number of bytes downloaded so far.
total_size: The total size of the file being downloaded.
"""
percent = float(bytes_so_far) / total_size
percent = round(percent * 100, 2)
message = (('\rDownloaded %s of %s (%0.2f%%)' + (' ' * 10)) %
(self._ConvertBytes(bytes_so_far),
self._ConvertBytes(total_size), percent))
sys.stdout.write(message)
sys.stdout.flush()
if bytes_so_far >= total_size:
sys.stdout.write('\n')
def _StoreDebugInfo(self, file_stream, socket_error=None):
"""Gathers debug information for use when file downloads fail.
Args:
file_stream: The file stream object of the file being downloaded.
socket_error: Store the error raised from the socket class with
other debug info.
Returns:
debug_info: A dictionary containing various pieces of debugging
information.
"""
if socket_error:
self._debug_info['socket_error'] = socket_error
if file_stream:
for header in file_stream.info().header_items():
self._debug_info[header[0]] = header[1]
self._debug_info['current_time'] = time.strftime(
'%A, %d %B %Y %H:%M:%S UTC')
def PrintDebugInfo(self):
"""Print the debugging information to the screen."""
if self._debug_info:
print '\n\n\n\n'
print '---------------'
print 'Debugging info: '
print '---------------'
for key, value in self._debug_info.items():
print '%s: %s' % (key, value)
print '\n\n\n'
def _StreamToDisk(self, file_stream, show_progress=None):
"""Save a file stream to disk.
Args:
file_stream: The file stream returned by a successful urlopen()
show_progress: Print download progress to stdout (overrides default).
Raises:
DownloadError: Error retrieving file or saving to disk.
"""
progress = self._default_show_progress
if show_progress is not None:
progress = show_progress
bytes_so_far = 0
url = file_stream.geturl()
total_size = int(file_stream.info().getheader('Content-Length').strip())
try:
with open(self._save_location, 'wb') as output_file:
logging.info('Downloading file "%s" to "%s".', url, self._save_location)
while 1:
chunk = file_stream.read(CHUNK_BYTE_SIZE)
bytes_so_far += len(chunk)
if not chunk:
break
output_file.write(chunk)
if progress:
self._DownloadChunkReport(bytes_so_far, total_size)
except socket.error as e:
self._StoreDebugInfo(file_stream, str(e))
raise DownloadError('Socket error during download.')
except IOError:
raise DownloadError('File location could not be opened for writing: %s' %
self._save_location)
self._Validate(file_stream, total_size)
file_stream.close()
def _Validate(self, file_stream, expected_size):
"""Validate the downloaded file.
Args:
file_stream: The file stream returned by a successful urlopen()
expected_size: The total size of the file being downloaded.
Raises:
DownloadError: File failed validation.
"""
if not os.path.exists(self._save_location):
self._StoreDebugInfo(file_stream)
raise DownloadError('Could not locate file at %s' % self._save_location)
actual_file_size = os.path.getsize(self._save_location)
if actual_file_size != expected_size:
self._StoreDebugInfo(file_stream)
message = ('File size of %s bytes did not match expected size of %s!',
actual_file_size, expected_size)
raise DownloadError(message)
def VerifyShaHash(self, file_path, expected):
"""Verifies the SHA256 hash of a file.
Arguments:
file_path: The path to the file that will be checked.
expected: The expected SHA hash as a string.
Returns:
True if the calculated hash matches the expected hash.
False if the calculated hash does not match the expected hash or if there
was an error reading the file or the SHA file.
"""
sha_object = hashlib.new('sha256')
# Read the file in 4MB chunks to avoid running out of memory
# while processing very large files.
try:
with open(file_path, 'rb') as f:
while True:
current_chunk = f.read(4194304)
if not current_chunk:
break
sha_object.update(current_chunk)
except IOError:
logging.error('Unable to read file %s for SHA verification.', file_path)
return False
file_hash = sha_object.hexdigest()
expected = expected.lower()
if file_hash == expected:
logging.info('SHA256 hash for %s matched expected hash of %s.', file_path,
expected)
return True
else:
logging.error(
'SHA256 hash for %s was %s, which did not match expected hash of %s.',
file_path, file_hash, expected)
return False
# Set our downloader of choice
Download = BaseDownloader