Update notebooks

2025-12-19 17:17:04 -05:00 · 2018-09-14 15:14:43 -04:00
parent 01a12c0b74
commit 8178484586
40 changed files with 14985 additions and 67 deletions
--- a/tutorials/01.train-models.ipynb
+++ b/tutorials/01.train-models.ipynb
@@ -110,7 +110,7 @@
    "experiment_name = 'sklearn-mnist'\n",
    "\n",
    "from azureml.core import Experiment\n",
-    "exp = Experiment(workspace = ws, name = experiment_name)"
+    "exp = Experiment(workspace=ws, name=experiment_name)"
   ]
  },
  {
@@ -143,25 +143,25 @@
    "\n",
    "try:\n",
    "    # look for the existing cluster by name\n",
-    "    compute_target = ComputeTarget(workspace = ws, name = batchai_cluster_name)\n",
+    "    compute_target = ComputeTarget(workspace=ws, name=batchai_cluster_name)\n",
    "    if compute_target is BatchAiCompute:\n",
    "        print('found compute target {}, just use it.'.format(batchai_cluster_name))\n",
    "    else:\n",
    "        print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(batchai_cluster_name))\n",
    "except ComputeTargetException:\n",
    "    print('creating a new compute target...')\n",
-    "    compute_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # small CPU-based VM\n",
-    "                                                                #vm_priority = 'lowpriority', # optional\n",
-    "                                                                autoscale_enabled = True,\n",
-    "                                                                cluster_min_nodes = 0, \n",
-    "                                                                cluster_max_nodes = 4)\n",
+    "    compute_config = BatchAiCompute.provisioning_configuration(vm_size=\"STANDARD_D2_V2\", # small CPU-based VM\n",
+    "                                                                #vm_priority='lowpriority', # optional\n",
+    "                                                                autoscale_enabled=True,\n",
+    "                                                                cluster_min_nodes=0, \n",
+    "                                                                cluster_max_nodes=4)\n",
    "\n",
    "    # create the cluster\n",
    "    compute_target = ComputeTarget.create(ws, batchai_cluster_name, compute_config)\n",
    "    \n",
    "    # can poll for a minimum number of nodes and for a specific timeout. \n",
    "    # if no min node count is provided it uses the scale settings for the cluster\n",
-    "    compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
+    "    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
    "    \n",
    "    # Use the 'status' property to get a detailed status for the current cluster. \n",
    "    print(compute_target.status.serialize())"
@@ -197,10 +197,10 @@
    "\n",
    "os.makedirs('./data', exist_ok = True)\n",
    "\n",
-    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/train-images.gz')\n",
-    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/train-labels.gz')\n",
-    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/test-images.gz')\n",
-    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/test-labels.gz')"
+    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename='./data/train-images.gz')\n",
+    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename='./data/train-labels.gz')\n",
+    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename='./data/test-images.gz')\n",
+    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename='./data/test-labels.gz')"
   ]
  },
  {
@@ -237,8 +237,8 @@
    "    plt.subplot(1, sample_size, count)\n",
    "    plt.axhline('')\n",
    "    plt.axvline('')\n",
-    "    plt.text(x = 10, y = -10, s = y_train[i], fontsize = 18)\n",
-    "    plt.imshow(X_train[i].reshape(28, 28), cmap = plt.cm.Greys)\n",
+    "    plt.text(x=10, y=-10, s=y_train[i], fontsize=18)\n",
+    "    plt.imshow(X_train[i].reshape(28, 28), cmap=plt.cm.Greys)\n",
    "plt.show()"
   ]
  },
@@ -264,7 +264,7 @@
    "ds = ws.get_default_datastore()\n",
    "print(ds.datastore_type, ds.account_name, ds.container_name)\n",
    "\n",
-    "ds.upload(src_dir = './data', target_path = 'mnist', overwrite = True, show_progress = True)"
+    "ds.upload(src_dir='./data', target_path='mnist', overwrite=True, show_progress=True)"
   ]
  },
  {
@@ -339,7 +339,7 @@
   "source": [
    "import os\n",
    "script_folder = './sklearn-mnist'\n",
-    "os.makedirs(script_folder, exist_ok = True)"
+    "os.makedirs(script_folder, exist_ok=True)"
   ]
  },
  {
@@ -371,8 +371,8 @@
    "\n",
    "# let user feed in 2 parameters, the location of the data files (from datastore), and the regularization rate of the logistic regression model\n",
    "parser = argparse.ArgumentParser()\n",
-    "parser.add_argument('--data-folder', type = str, dest = 'data_folder', help = 'data folder mounting point')\n",
-    "parser.add_argument('--regularization', type = float, dest = 'reg', default = 0.01, help = 'regularization rate')\n",
+    "parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')\n",
+    "parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regularization rate')\n",
    "args = parser.parse_args()\n",
    "\n",
    "data_folder = os.path.join(args.data_folder, 'mnist')\n",
@@ -389,25 +389,23 @@
    "# get hold of the current run\n",
    "run = Run.get_submitted_run()\n",
    "\n",
-    "# train a logistic regression model with specified regularization rate\n",
    "print('Train a logistic regression model with regularizaion rate of', args.reg)\n",
-    "clf = LogisticRegression(C = 1.0/args.reg, random_state = 42)\n",
+    "clf = LogisticRegression(C=1.0/args.reg, random_state=42)\n",
    "clf.fit(X_train, y_train)\n",
    "\n",
    "print('Predict the test set')\n",
-    "# predict on the test set\n",
    "y_hat = clf.predict(X_test)\n",
    "\n",
    "# calculate accuracy on the prediction\n",
    "acc = np.average(y_hat == y_test)\n",
    "print('Accuracy is', acc)\n",
    "\n",
-    "# log regularization rate and accuracy \n",
    "run.log('regularization rate', np.float(args.reg))\n",
    "run.log('accuracy', np.float(acc))\n",
    "\n",
-    "os.makedirs('outputs', exist_ok = True)\n",
-    "joblib.dump(value = clf, filename = 'outputs/sklearn_mnist_model.pkl')"
+    "os.makedirs('outputs', exist_ok=True)\n",
+    "# note file saved in the outputs folder is automatically uploaded into experiment record\n",
+    "joblib.dump(value=clf, filename='outputs/sklearn_mnist_model.pkl')"
   ]
  },
  {
@@ -417,7 +415,7 @@
    "Notice how the script gets data and saves models:\n",
    "\n",
    "+ The training script reads an argument to find the directory containing the data.  When you submit the job later, you point to the datastore for this argument:\n",
-    "`parser.add_argument('--data-folder', type = str, dest = 'data_folder', help = 'data directory mounting point')`"
+    "`parser.add_argument('--data-folder', type=str, dest='data_folder', help='data directory mounting point')`"
   ]
  },
  {
@@ -426,7 +424,7 @@
   "source": [
    "\n",
    "+ The training script saves your model into a directory named outputs. <br/>\n",
-    "`joblib.dump(value = clf, filename = 'outputs/sklearn_mnist_model.pkl')`<br/>\n",
+    "`joblib.dump(value=clf, filename='outputs/sklearn_mnist_model.pkl')`<br/>\n",
    "Anything written in this directory is automatically uploaded into your workspace. You'll access your model from this directory later in the tutorial."
   ]
  },
@@ -477,11 +475,11 @@
    "    '--regularization': 0.8\n",
    "}\n",
    "\n",
-    "est = Estimator(source_directory = script_folder,\n",
-    "                script_params = script_params,\n",
-    "                compute_target = compute_target,\n",
-    "                entry_script = 'train.py',\n",
-    "                conda_packages = ['scikit-learn'])"
+    "est = Estimator(source_directory=script_folder,\n",
+    "                script_params=script_params,\n",
+    "                compute_target=compute_target,\n",
+    "                entry_script='train.py',\n",
+    "                conda_packages=['scikit-learn'])"
   ]
  },
  {
@@ -562,7 +560,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "run.wait_for_completion(show_output = True) # specify True for a verbose log"
+    "run.wait_for_completion(show_output=True) # specify True for a verbose log"
   ]
  },
  {
@@ -623,7 +621,7 @@
   "outputs": [],
   "source": [
    "# register model \n",
-    "model = run.register_model(model_name = 'sklearn_mnist', model_path = 'outputs/sklearn_mnist_model.pkl')\n",
+    "model = run.register_model(model_name='sklearn_mnist', model_path='outputs/sklearn_mnist_model.pkl')\n",
    "print(model.name, model.id, model.version, sep = '\\t')"
   ]
  },
--- a/tutorials/02.deploy-models.ipynb
+++ b/tutorials/02.deploy-models.ipynb
@@ -34,7 +34,45 @@
    "\n",
    "Complete the model training in the [Tutorial #1: Train an image classification model with Azure Machine Learning](01.train-models.ipynb) notebook.  \n",
    "\n",
+    "If you did NOT complete the tutorial, you can instead run this cell to create a model and download the data needed for this tutorial:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# These prerequisites are created in the training tutorial\n",
+    "# Feel free to skip this cell if you completed the training tutorial \n",
    "\n",
+    "# register a model\n",
+    "from azureml.core import Workspace\n",
+    "ws = Workspace.from_config()\n",
+    "\n",
+    "from azureml.core.model import Model\n",
+    "\n",
+    "model_name = \"sklearn_mnist\"\n",
+    "model = Model.register(model_path=\"sklearn_mnist_model.pkl\",\n",
+    "                        model_name=model_name,\n",
+    "                        tags={\"data\": \"mnist\", \"model\": \"classification\"},\n",
+    "                        description=\"Mnist handwriting recognition\",\n",
+    "                        workspace=ws)\n",
+    "\n",
+    "# download test data\n",
+    "import os\n",
+    "import urllib.request\n",
+    "\n",
+    "os.makedirs('./data', exist_ok=True)\n",
+    "\n",
+    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename='./data/test-images.gz')\n",
+    "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename='./data/test-labels.gz')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
    "## Set up the environment\n",
    "\n",
    "Start by setting up a testing environment.\n",
@@ -113,9 +151,8 @@
    "from utils import load_data\n",
    "\n",
    "# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster\n",
-    "\n",
    "X_test = load_data('./data/test-images.gz', False) / 255.0\n",
-    "y_test = load_data('./data/test-labels.gz', True).reshape(-1)\n"
+    "y_test = load_data('./data/test-labels.gz', True).reshape(-1)"
   ]
  },
  {
@@ -175,13 +212,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "row_sums = conf_mx.sum(axis = 1, keepdims = True)\n",
+    "# normalize the diagnal cells so that they don't overpower the rest of the cells when visualized\n",
+    "row_sums = conf_mx.sum(axis=1, keepdims=True)\n",
    "norm_conf_mx = conf_mx / row_sums\n",
    "np.fill_diagonal(norm_conf_mx, 0)\n",
    "\n",
-    "fig = plt.figure(figsize = (8,5))\n",
+    "fig = plt.figure(figsize=(8,5))\n",
    "ax = fig.add_subplot(111)\n",
-    "cax = ax.matshow(norm_conf_mx, cmap = plt.cm.bone)\n",
+    "cax = ax.matshow(norm_conf_mx, cmap=plt.cm.bone)\n",
    "ticks = np.arange(0, 10, 1)\n",
    "ax.set_xticks(ticks)\n",
    "ax.set_yticks(ticks)\n",
@@ -232,12 +270,11 @@
    "from sklearn.externals import joblib\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
-    "#from azureml.assets.persistence.persistence import get_model_path\n",
    "from azureml.core.model import Model\n",
    "\n",
    "def init():\n",
    "    global model\n",
-    "    # retreive the local path to the model using the model name\n",
+    "    # retreive the path to the model file using the model name\n",
    "    model_path = Model.get_model_path('sklearn_mnist')\n",
    "    model = joblib.load(model_path)\n",
    "\n",
@@ -263,16 +300,29 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%%writefile myenv.yml\n",
-    "name: myenv\n",
-    "channels:\n",
-    "  - defaults\n",
-    "dependencies:\n",
-    "  - scikit-learn\n",
-    "  - pip:\n",
-    "    # Required packages for AzureML execution, history, and data preparation.\n",
-    "    - --extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1\n",
-    "    - azureml-core"
+    "from azureml.core.conda_dependencies import CondaDependencies \n",
+    "\n",
+    "myenv = CondaDependencies()\n",
+    "myenv.add_conda_package(\"scikit-learn\")\n",
+    "\n",
+    "with open(\"myenv.yml\",\"w\") as f:\n",
+    "    f.write(myenv.serialize_to_string())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Review the content of the file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pfile myenv.yml"
   ]
  },
  {
@@ -281,7 +331,7 @@
   "source": [
    "### Create configuration file\n",
    "\n",
-    "Create a deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for your ACI container. While it depends on your model, the default of 1 core and 1 gigabyte of RAM is usually sufficient for many models. If you feel you need more later, you would have to recreate the image and redeploy the service."
+    "Create a deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for your ACI container. While it depends on your model, the default of 1 core and 1 gigabyte of RAM is usually sufficient for many models. If you feel you need more later, you can always modify the configuration and redeploy the service."
   ]
  },
  {
@@ -292,10 +342,10 @@
   "source": [
    "from azureml.core.webservice import AciWebservice\n",
    "\n",
-    "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
-    "                                               memory_gb = 1, \n",
-    "                                               tags = {\"data\": \"MNIST\",  \"method\" : \"sklearn\"}, \n",
-    "                                               description = 'Predict MNIST with sklearn')"
+    "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
+    "                                               memory_gb=1, \n",
+    "                                               tags={\"data\": \"MNIST\",  \"method\" : \"sklearn\"}, \n",
+    "                                               description='Predict MNIST with sklearn')"
   ]
  },
  {
@@ -328,17 +378,17 @@
    "from azureml.core.image import ContainerImage\n",
    "\n",
    "# configure the image\n",
-    "image_config = ContainerImage.image_configuration(execution_script = \"score.py\", \n",
-    "                                                  runtime = \"python\", \n",
-    "                                                  conda_file = \"myenv.yml\")\n",
+    "image_config = ContainerImage.image_configuration(execution_script=\"score.py\", \n",
+    "                                                  runtime=\"python\", \n",
+    "                                                  conda_file=\"myenv.yml\")\n",
    "\n",
-    "service = Webservice.deploy_from_model(workspace = ws,\n",
-    "                                       name = 'sklearn-mnist-model',\n",
-    "                                       deployment_config = aciconfig,\n",
-    "                                       models = [model],\n",
-    "                                       image_config = image_config)\n",
+    "service = Webservice.deploy_from_model(workspace=ws,\n",
+    "                                       name='sklearn-mnist-model',\n",
+    "                                       deployment_config=aciconfig,\n",
+    "                                       models=[model],\n",
+    "                                       image_config=image_config)\n",
    "\n",
-    "service.wait_for_deployment(show_output = True)"
+    "service.wait_for_deployment(show_output=True)"
   ]
  },
  {
@@ -391,7 +441,7 @@
    "test_samples = bytes(test_samples, encoding = 'utf8')\n",
    "\n",
    "# predict using the deployed model\n",
-    "result = json.loads(service.run(input_data = test_samples))\n",
+    "result = json.loads(service.run(input_data=test_samples))\n",
    "\n",
    "# compare actual value vs. the predicted values:\n",
    "i = 0\n",
@@ -406,8 +456,8 @@
    "    font_color = 'red' if y_test[s] != result[i] else 'black'\n",
    "    clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n",
    "    \n",
-    "    plt.text(x = 10, y = -10, s = result[i], fontsize = 18, color = font_color)\n",
-    "    plt.imshow(X_test[s].reshape(28, 28), cmap = clr_map)\n",
+    "    plt.text(x=10, y =-10, s=result[i], fontsize=18, color=font_color)\n",
+    "    plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n",
    "    \n",
    "    i = i + 1\n",
    "plt.show()"
--- a/tutorials/sklearn_mnist_model.pkl
+++ b/tutorials/sklearn_mnist_model.pkl