update samples from Release-96 as a part of SDK release

2025-12-20 01:27:06 -05:00 · 2021-05-10 18:38:34 +00:00
parent 441a5b0141
commit eac6b69bae
117 changed files with 451 additions and 2252 deletions
--- a/tutorials/compute-instance-quickstarts/quickstart-azureml-python-sdk/src/train.py
+++ b/tutorials/compute-instance-quickstarts/quickstart-azureml-python-sdk/src/train.py
@@ -0,0 +1,82 @@
+import argparse
+import os
+import numpy as np
+import glob
+
+from sklearn.linear_model import LogisticRegression
+import joblib
+
+from azureml.core import Run
+from utils import load_data
+
+# let user feed in 2 parameters, the dataset to mount or download,
+# and the regularization rate of the logistic regression model
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--data-folder", type=str, dest="data_folder", help="data folder mounting point"
+)
+parser.add_argument(
+    "--regularization", type=float, dest="reg", default=0.01, help="regularization rate"
+)
+args = parser.parse_args()
+
+data_folder = args.data_folder
+print("Data folder:", data_folder)
+
+# load train and test set into numpy arrays
+# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
+X_train = (
+    load_data(
+        glob.glob(
+            os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"), recursive=True
+        )[0],
+        False,
+    ) /
+    255.0
+)
+X_test = (
+    load_data(
+        glob.glob(
+            os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"), recursive=True
+        )[0],
+        False,
+    ) /
+    255.0
+)
+y_train = load_data(
+    glob.glob(
+        os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"), recursive=True
+    )[0],
+    True,
+).reshape(-1)
+y_test = load_data(
+    glob.glob(
+        os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"), recursive=True
+    )[0],
+    True,
+).reshape(-1)
+
+print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep="\n")
+
+# get hold of the current run
+run = Run.get_context()
+
+print("Train a logistic regression model with regularization rate of", args.reg)
+clf = LogisticRegression(
+    C=1.0 / args.reg, solver="liblinear", multi_class="auto", random_state=42
+)
+clf.fit(X_train, y_train)
+
+print("Predict the test set")
+y_hat = clf.predict(X_test)
+
+# calculate accuracy on the prediction
+acc = np.average(y_hat == y_test)
+print("Accuracy is", acc)
+
+run.log("regularization rate", np.float(args.reg))
+run.log("accuracy", np.float(acc))
+
+os.makedirs("outputs", exist_ok=True)
+# note file saved in the outputs folder is automatically uploaded into experiment record
+joblib.dump(value=clf, filename="outputs/sklearn_mnist_model.pkl")
--- a/tutorials/compute-instance-quickstarts/quickstart-azureml-python-sdk/src/utils.py
+++ b/tutorials/compute-instance-quickstarts/quickstart-azureml-python-sdk/src/utils.py
@@ -0,0 +1,24 @@
+import gzip
+import numpy as np
+import struct
+
+
+# load compressed MNIST gz files and return numpy arrays
+def load_data(filename, label=False):
+    with gzip.open(filename) as gz:
+        struct.unpack("I", gz.read(4))
+        n_items = struct.unpack(">I", gz.read(4))
+        if not label:
+            n_rows = struct.unpack(">I", gz.read(4))[0]
+            n_cols = struct.unpack(">I", gz.read(4))[0]
+            res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
+            res = res.reshape(n_items[0], n_rows * n_cols)
+        else:
+            res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
+            res = res.reshape(n_items[0], 1)
+    return res
+
+
+# one-hot encode a 1-D array
+def one_hot_encode(array, num_of_classes):
+    return np.eye(num_of_classes)[array.reshape(-1)]