mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 01:27:06 -05:00
73 lines
1.9 KiB
Python
73 lines
1.9 KiB
Python
import argparse
|
|
import os
|
|
import numpy as np
|
|
import glob
|
|
# import joblib
|
|
import mlflow
|
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
from utils import load_data
|
|
|
|
# let user feed in 2 parameters, the dataset to mount or download,
|
|
# and the regularization rate of the logistic regression model
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"--data-folder", type=str, dest="data_folder", help="data folder mounting point"
|
|
)
|
|
parser.add_argument(
|
|
"--regularization", type=float, dest="reg", default=0.01, help="regularization rate"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
data_folder = args.data_folder
|
|
print("Data folder:", data_folder)
|
|
|
|
# load train and test set into numpy arrays
|
|
# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
|
|
X_train = (
|
|
load_data(
|
|
glob.glob(
|
|
os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"), recursive=True
|
|
)[0],
|
|
False,
|
|
) / 255.0
|
|
)
|
|
X_test = (
|
|
load_data(
|
|
glob.glob(
|
|
os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"), recursive=True
|
|
)[0],
|
|
False,
|
|
) / 255.0
|
|
)
|
|
y_train = load_data(
|
|
glob.glob(
|
|
os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"), recursive=True
|
|
)[0],
|
|
True,
|
|
).reshape(-1)
|
|
y_test = load_data(
|
|
glob.glob(
|
|
os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"), recursive=True
|
|
)[0],
|
|
True,
|
|
).reshape(-1)
|
|
|
|
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep="\n")
|
|
|
|
# use mlflow autologging
|
|
mlflow.autolog()
|
|
|
|
print("Train a logistic regression model with regularization rate of", args.reg)
|
|
clf = LogisticRegression(
|
|
C=1.0 / args.reg, solver="liblinear", multi_class="auto", random_state=42
|
|
)
|
|
clf.fit(X_train, y_train)
|
|
|
|
print("Predict the test set")
|
|
y_hat = clf.predict(X_test)
|
|
|
|
# calculate accuracy on the prediction
|
|
acc = np.average(y_hat == y_test)
|
|
print("Accuracy is", acc)
|