update samples from Release-130 as a part of SDK release

This commit is contained in:
amlrelsa-ms
2022-03-29 22:33:38 +00:00
parent 796798cb49
commit 95b0392ed2
534 changed files with 151904 additions and 27048 deletions

View File

@@ -0,0 +1,72 @@
import argparse
import os
import numpy as np
import glob
# import joblib
import mlflow
from sklearn.linear_model import LogisticRegression
from utils import load_data
# let user feed in 2 parameters, the dataset to mount or download,
# and the regularization rate of the logistic regression model
parser = argparse.ArgumentParser()
parser.add_argument(
"--data-folder", type=str, dest="data_folder", help="data folder mounting point"
)
parser.add_argument(
"--regularization", type=float, dest="reg", default=0.01, help="regularization rate"
)
args = parser.parse_args()
data_folder = args.data_folder
print("Data folder:", data_folder)
# load train and test set into numpy arrays
# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
X_train = (
load_data(
glob.glob(
os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"), recursive=True
)[0],
False,
) / 255.0
)
X_test = (
load_data(
glob.glob(
os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"), recursive=True
)[0],
False,
) / 255.0
)
y_train = load_data(
glob.glob(
os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"), recursive=True
)[0],
True,
).reshape(-1)
y_test = load_data(
glob.glob(
os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"), recursive=True
)[0],
True,
).reshape(-1)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep="\n")
# use mlflow autologging
mlflow.autolog()
print("Train a logistic regression model with regularization rate of", args.reg)
clf = LogisticRegression(
C=1.0 / args.reg, solver="liblinear", multi_class="auto", random_state=42
)
clf.fit(X_train, y_train)
print("Predict the test set")
y_hat = clf.predict(X_test)
# calculate accuracy on the prediction
acc = np.average(y_hat == y_test)
print("Accuracy is", acc)

View File

@@ -0,0 +1,24 @@
import gzip
import numpy as np
import struct
# load compressed MNIST gz files and return numpy arrays
def load_data(filename, label=False):
with gzip.open(filename) as gz:
struct.unpack("I", gz.read(4))
n_items = struct.unpack(">I", gz.read(4))
if not label:
n_rows = struct.unpack(">I", gz.read(4))[0]
n_cols = struct.unpack(">I", gz.read(4))[0]
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
res = res.reshape(n_items[0], n_rows * n_cols)
else:
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
res = res.reshape(n_items[0], 1)
return res
# one-hot encode a 1-D array
def one_hot_encode(array, num_of_classes):
return np.eye(num_of_classes)[array.reshape(-1)]