mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 01:27:06 -05:00
update samples from Release-96 as a part of SDK release
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
import argparse
|
||||
import os
|
||||
import numpy as np
|
||||
import glob
|
||||
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import joblib
|
||||
|
||||
from azureml.core import Run
|
||||
from utils import load_data
|
||||
|
||||
# let user feed in 2 parameters, the dataset to mount or download,
|
||||
# and the regularization rate of the logistic regression model
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--data-folder", type=str, dest="data_folder", help="data folder mounting point"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--regularization", type=float, dest="reg", default=0.01, help="regularization rate"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
data_folder = args.data_folder
|
||||
print("Data folder:", data_folder)
|
||||
|
||||
# load train and test set into numpy arrays
|
||||
# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
|
||||
X_train = (
|
||||
load_data(
|
||||
glob.glob(
|
||||
os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"), recursive=True
|
||||
)[0],
|
||||
False,
|
||||
) /
|
||||
255.0
|
||||
)
|
||||
X_test = (
|
||||
load_data(
|
||||
glob.glob(
|
||||
os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"), recursive=True
|
||||
)[0],
|
||||
False,
|
||||
) /
|
||||
255.0
|
||||
)
|
||||
y_train = load_data(
|
||||
glob.glob(
|
||||
os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"), recursive=True
|
||||
)[0],
|
||||
True,
|
||||
).reshape(-1)
|
||||
y_test = load_data(
|
||||
glob.glob(
|
||||
os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"), recursive=True
|
||||
)[0],
|
||||
True,
|
||||
).reshape(-1)
|
||||
|
||||
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep="\n")
|
||||
|
||||
# get hold of the current run
|
||||
run = Run.get_context()
|
||||
|
||||
print("Train a logistic regression model with regularization rate of", args.reg)
|
||||
clf = LogisticRegression(
|
||||
C=1.0 / args.reg, solver="liblinear", multi_class="auto", random_state=42
|
||||
)
|
||||
clf.fit(X_train, y_train)
|
||||
|
||||
print("Predict the test set")
|
||||
y_hat = clf.predict(X_test)
|
||||
|
||||
# calculate accuracy on the prediction
|
||||
acc = np.average(y_hat == y_test)
|
||||
print("Accuracy is", acc)
|
||||
|
||||
run.log("regularization rate", np.float(args.reg))
|
||||
run.log("accuracy", np.float(acc))
|
||||
|
||||
os.makedirs("outputs", exist_ok=True)
|
||||
# note file saved in the outputs folder is automatically uploaded into experiment record
|
||||
joblib.dump(value=clf, filename="outputs/sklearn_mnist_model.pkl")
|
||||
@@ -0,0 +1,24 @@
|
||||
import gzip
|
||||
import numpy as np
|
||||
import struct
|
||||
|
||||
|
||||
# load compressed MNIST gz files and return numpy arrays
|
||||
def load_data(filename, label=False):
|
||||
with gzip.open(filename) as gz:
|
||||
struct.unpack("I", gz.read(4))
|
||||
n_items = struct.unpack(">I", gz.read(4))
|
||||
if not label:
|
||||
n_rows = struct.unpack(">I", gz.read(4))[0]
|
||||
n_cols = struct.unpack(">I", gz.read(4))[0]
|
||||
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
|
||||
res = res.reshape(n_items[0], n_rows * n_cols)
|
||||
else:
|
||||
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
|
||||
res = res.reshape(n_items[0], 1)
|
||||
return res
|
||||
|
||||
|
||||
# one-hot encode a 1-D array
|
||||
def one_hot_encode(array, num_of_classes):
|
||||
return np.eye(num_of_classes)[array.reshape(-1)]
|
||||
Reference in New Issue
Block a user