mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-22 10:35:12 -05:00
version 1.0.17
This commit is contained in:
@@ -94,7 +94,7 @@
|
||||
"source": [
|
||||
"# load workspace configuration from the config.json file in the current folder.\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\\t')"
|
||||
"print(ws.name, ws.location, ws.resource_group, ws.location, sep='\\t')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -205,7 +205,7 @@
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"data_folder = os.path.join(os.getcwd(), 'data')\n",
|
||||
"os.makedirs(data_folder, exist_ok = True)\n",
|
||||
"os.makedirs(data_folder, exist_ok=True)\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename=os.path.join(data_folder, 'train-images.gz'))\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename=os.path.join(data_folder, 'train-labels.gz'))\n",
|
||||
@@ -304,7 +304,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"script_folder = os.path.join(os.getcwd(), \"sklearn-mnist\")\n",
|
||||
"script_folder = os.path.join(os.getcwd(), \"sklearn-mnist\")\n",
|
||||
"os.makedirs(script_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
@@ -341,7 +341,7 @@
|
||||
"parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regularization rate')\n",
|
||||
"args = parser.parse_args()\n",
|
||||
"\n",
|
||||
"data_folder = os.path.join(args.data_folder, 'mnist')\n",
|
||||
"data_folder = args.data_folder\n",
|
||||
"print('Data folder:', data_folder)\n",
|
||||
"\n",
|
||||
"# load train and test set into numpy arrays\n",
|
||||
@@ -426,7 +426,7 @@
|
||||
"* Parameters required from the training script \n",
|
||||
"* Python packages needed for training\n",
|
||||
"\n",
|
||||
"In this tutorial, this target is AmlCompute. All files in the script folder are uploaded into the cluster nodes for execution. The data_folder is set to use the datastore (`ds.as_mount()`)."
|
||||
"In this tutorial, this target is AmlCompute. All files in the script folder are uploaded into the cluster nodes for execution. The data_folder is set to use the datastore (`ds.path('mnist').as_mount()`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -442,8 +442,8 @@
|
||||
"from azureml.train.estimator import Estimator\n",
|
||||
"\n",
|
||||
"script_params = {\n",
|
||||
" '--data-folder': ds.as_mount(),\n",
|
||||
" '--regularization': 0.8\n",
|
||||
" '--data-folder': ds.path('mnist').as_mount(),\n",
|
||||
" '--regularization': 0.05\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"est = Estimator(source_directory=script_folder,\n",
|
||||
@@ -453,13 +453,29 @@
|
||||
" conda_packages=['scikit-learn'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This is what the mounting point looks like:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(ds.path('mnist').as_mount())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit the job to the cluster\n",
|
||||
"\n",
|
||||
"Run the experiment by submitting the estimator object."
|
||||
"Run the experiment by submitting the estimator object. And you can navigate to Azure portal to monitor the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -486,17 +502,17 @@
|
||||
"\n",
|
||||
"## Monitor a remote run\n",
|
||||
"\n",
|
||||
"In total, the first run takes **approximately 10 minutes**. But for subsequent runs, as long as the script dependencies don't change, the same image is reused and hence the container start up time is much faster.\n",
|
||||
"In total, the first run takes **approximately 10 minutes**. But for subsequent runs, as long as the dependencies (`conda_packages` parameter in the above estimator constructor) don't change, the same image is reused and hence the container start up time is much faster.\n",
|
||||
"\n",
|
||||
"Here is what's happening while you wait:\n",
|
||||
"\n",
|
||||
"- **Image creation**: A Docker image is created matching the Python environment specified by the estimator. The image is uploaded to the workspace. Image creation and uploading takes **about 5 minutes**. \n",
|
||||
"- **Image creation**: A Docker image is created matching the Python environment specified by the estimator. The image is built and stored in the ACR (Azure Container Registry) associated with your workspace. Image creation and uploading takes **about 5 minutes**. \n",
|
||||
"\n",
|
||||
" This stage happens once for each Python environment since the container is cached for subsequent runs. During image creation, logs are streamed to the run history. You can monitor the image creation progress using these logs.\n",
|
||||
"\n",
|
||||
"- **Scaling**: If the remote cluster requires more nodes to execute the run than currently available, additional nodes are added automatically. Scaling typically takes **about 5 minutes.**\n",
|
||||
"\n",
|
||||
"- **Running**: In this stage, the necessary scripts and files are sent to the compute target, then data stores are mounted/copied, then the entry_script is run. While the job is running, stdout and the ./logs directory are streamed to the run history. You can monitor the run's progress using these logs.\n",
|
||||
"- **Running**: In this stage, the necessary scripts and files are sent to the compute target, then data stores are mounted/copied, then the entry_script is run. While the job is running, stdout and the files in the ./logs directory are streamed to the run history. You can monitor the run's progress using these logs.\n",
|
||||
"\n",
|
||||
"- **Post-Processing**: The ./outputs directory of the run is copied over to the run history in your workspace so you can access these results.\n",
|
||||
"\n",
|
||||
@@ -526,7 +542,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
||||
"By the way, if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -535,7 +551,7 @@
|
||||
"source": [
|
||||
"### Get log results upon completion\n",
|
||||
"\n",
|
||||
"Model training and monitoring happen in the background. Wait until the model has completed training before running more code. Use `wait_for_completion` to show when the model training is complete."
|
||||
"Model training happens in the background. You can use `wait_for_completion` to block and wait until the model has completed training before running more code. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -550,7 +566,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=False) # specify True for a verbose log"
|
||||
"# specify show_output to True for a verbose log\n",
|
||||
"run.wait_for_completion(show_output=False) "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -559,7 +576,7 @@
|
||||
"source": [
|
||||
"### Display run results\n",
|
||||
"\n",
|
||||
"You now have a model trained on a remote cluster. Retrieve the accuracy of the model:"
|
||||
"You now have a model trained on a remote cluster. Retrieve all the metrics logged during the run, including the accuracy of the model:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -620,7 +637,7 @@
|
||||
"source": [
|
||||
"# register model \n",
|
||||
"model = run.register_model(model_name='sklearn_mnist', model_path='outputs/sklearn_mnist_model.pkl')\n",
|
||||
"print(model.name, model.id, model.version, sep = '\\t')"
|
||||
"print(model.name, model.id, model.version, sep='\\t')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -663,9 +680,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
"version": "3.6.8"
|
||||
},
|
||||
"msauthor": "sgilley"
|
||||
"msauthor": "haining"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
Reference in New Issue
Block a user