update samples from Release-97 as a part of SDK release

This commit is contained in:
amlrelsa-ms
2021-05-24 17:39:23 +00:00
parent 467630f955
commit ec9a5a061d
40 changed files with 644 additions and 361 deletions

View File

@@ -21,7 +21,7 @@ def fetch_openml_with_retries(data_id, max_retries=4, retry_delay=60):
print("Download attempt {0} of {1}".format(i + 1, max_retries))
data = fetch_openml(data_id=data_id, as_frame=True)
break
except Exception as e:
except Exception as e: # noqa: B902
print("Download attempt failed with exception:")
print(e)
if i + 1 != max_retries:
@@ -47,7 +47,7 @@ _categorical_columns = [
def fetch_census_dataset():
"""Fetch the Adult Census Dataset
"""Fetch the Adult Census Dataset.
This uses a particular URL for the Adult Census dataset. The code
is a simplified version of fetch_openml() in sklearn.
@@ -63,17 +63,35 @@ def fetch_census_dataset():
filename = "1595261.gz"
data_url = "https://rainotebookscdn.blob.core.windows.net/datasets/"
urlretrieve(data_url + filename, filename)
http_stream = gzip.GzipFile(filename=filename, mode='rb')
remaining_attempts = 5
sleep_duration = 10
while remaining_attempts > 0:
try:
urlretrieve(data_url + filename, filename)
with closing(http_stream):
def _stream_generator(response):
for line in response:
yield line.decode('utf-8')
http_stream = gzip.GzipFile(filename=filename, mode='rb')
stream = _stream_generator(http_stream)
data = arff.load(stream)
with closing(http_stream):
def _stream_generator(response):
for line in response:
yield line.decode('utf-8')
stream = _stream_generator(http_stream)
data = arff.load(stream)
except Exception as exc: # noqa: B902
remaining_attempts -= 1
print("Error downloading dataset from {} ({} attempt(s) remaining)"
.format(data_url, remaining_attempts))
print(exc)
time.sleep(sleep_duration)
sleep_duration *= 2
continue
else:
# dataset successfully downloaded
break
else:
raise Exception("Could not retrieve dataset from {}.".format(data_url))
attributes = OrderedDict(data['attributes'])
arff_columns = list(attributes)