update samples from Release-97 as a part of SDK release

2025-12-19 17:17:04 -05:00 · 2021-05-24 17:39:23 +00:00
parent 467630f955
commit ec9a5a061d
40 changed files with 644 additions and 361 deletions
--- a/contrib/fairness/fairness_nb_utils.py
+++ b/contrib/fairness/fairness_nb_utils.py
@@ -21,7 +21,7 @@ def fetch_openml_with_retries(data_id, max_retries=4, retry_delay=60):
            print("Download attempt {0} of {1}".format(i + 1, max_retries))
            data = fetch_openml(data_id=data_id, as_frame=True)
            break
-        except Exception as e:
+        except Exception as e:  # noqa: B902
            print("Download attempt failed with exception:")
            print(e)
            if i + 1 != max_retries:
@@ -47,7 +47,7 @@ _categorical_columns = [


 def fetch_census_dataset():
-    """Fetch the Adult Census Dataset
+    """Fetch the Adult Census Dataset.

    This uses a particular URL for the Adult Census dataset. The code
    is a simplified version of fetch_openml() in sklearn.
@@ -63,17 +63,35 @@ def fetch_census_dataset():

    filename = "1595261.gz"
    data_url = "https://rainotebookscdn.blob.core.windows.net/datasets/"
-    urlretrieve(data_url + filename, filename)

-    http_stream = gzip.GzipFile(filename=filename, mode='rb')
+    remaining_attempts = 5
+    sleep_duration = 10
+    while remaining_attempts > 0:
+        try:
+            urlretrieve(data_url + filename, filename)

-    with closing(http_stream):
-        def _stream_generator(response):
-            for line in response:
-                yield line.decode('utf-8')
+            http_stream = gzip.GzipFile(filename=filename, mode='rb')

-        stream = _stream_generator(http_stream)
-        data = arff.load(stream)
+            with closing(http_stream):
+                def _stream_generator(response):
+                    for line in response:
+                        yield line.decode('utf-8')
+
+                stream = _stream_generator(http_stream)
+                data = arff.load(stream)
+        except Exception as exc:  # noqa: B902
+            remaining_attempts -= 1
+            print("Error downloading dataset from {} ({} attempt(s) remaining)"
+                  .format(data_url, remaining_attempts))
+            print(exc)
+            time.sleep(sleep_duration)
+            sleep_duration *= 2
+            continue
+        else:
+            # dataset successfully downloaded
+            break
+    else:
+        raise Exception("Could not retrieve dataset from {}.".format(data_url))

    attributes = OrderedDict(data['attributes'])
    arff_columns = list(attributes)