mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
update samples from Release-97 as a part of SDK release
This commit is contained in:
@@ -21,7 +21,7 @@ def fetch_openml_with_retries(data_id, max_retries=4, retry_delay=60):
|
||||
print("Download attempt {0} of {1}".format(i + 1, max_retries))
|
||||
data = fetch_openml(data_id=data_id, as_frame=True)
|
||||
break
|
||||
except Exception as e:
|
||||
except Exception as e: # noqa: B902
|
||||
print("Download attempt failed with exception:")
|
||||
print(e)
|
||||
if i + 1 != max_retries:
|
||||
@@ -47,7 +47,7 @@ _categorical_columns = [
|
||||
|
||||
|
||||
def fetch_census_dataset():
|
||||
"""Fetch the Adult Census Dataset
|
||||
"""Fetch the Adult Census Dataset.
|
||||
|
||||
This uses a particular URL for the Adult Census dataset. The code
|
||||
is a simplified version of fetch_openml() in sklearn.
|
||||
@@ -63,17 +63,35 @@ def fetch_census_dataset():
|
||||
|
||||
filename = "1595261.gz"
|
||||
data_url = "https://rainotebookscdn.blob.core.windows.net/datasets/"
|
||||
urlretrieve(data_url + filename, filename)
|
||||
|
||||
http_stream = gzip.GzipFile(filename=filename, mode='rb')
|
||||
remaining_attempts = 5
|
||||
sleep_duration = 10
|
||||
while remaining_attempts > 0:
|
||||
try:
|
||||
urlretrieve(data_url + filename, filename)
|
||||
|
||||
with closing(http_stream):
|
||||
def _stream_generator(response):
|
||||
for line in response:
|
||||
yield line.decode('utf-8')
|
||||
http_stream = gzip.GzipFile(filename=filename, mode='rb')
|
||||
|
||||
stream = _stream_generator(http_stream)
|
||||
data = arff.load(stream)
|
||||
with closing(http_stream):
|
||||
def _stream_generator(response):
|
||||
for line in response:
|
||||
yield line.decode('utf-8')
|
||||
|
||||
stream = _stream_generator(http_stream)
|
||||
data = arff.load(stream)
|
||||
except Exception as exc: # noqa: B902
|
||||
remaining_attempts -= 1
|
||||
print("Error downloading dataset from {} ({} attempt(s) remaining)"
|
||||
.format(data_url, remaining_attempts))
|
||||
print(exc)
|
||||
time.sleep(sleep_duration)
|
||||
sleep_duration *= 2
|
||||
continue
|
||||
else:
|
||||
# dataset successfully downloaded
|
||||
break
|
||||
else:
|
||||
raise Exception("Could not retrieve dataset from {}.".format(data_url))
|
||||
|
||||
attributes = OrderedDict(data['attributes'])
|
||||
arff_columns = list(attributes)
|
||||
|
||||
Reference in New Issue
Block a user