diff --git a/01.getting-started/02.train-on-local/.ipynb_checkpoints/02.train-on-local-checkpoint.ipynb b/01.getting-started/02.train-on-local/.ipynb_checkpoints/02.train-on-local-checkpoint.ipynb deleted file mode 100644 index 1864013e..00000000 --- a/01.getting-started/02.train-on-local/.ipynb_checkpoints/02.train-on-local-checkpoint.ipynb +++ /dev/null @@ -1,477 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright (c) Microsoft Corporation. All rights reserved.\n", - "\n", - "Licensed under the MIT License." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 02. Train locally\n", - "* Create or load workspace.\n", - "* Create scripts locally.\n", - "* Create `train.py` in a folder, along with a `my.lib` file.\n", - "* Configure & execute a local run in a user-managed Python environment.\n", - "* Configure & execute a local run in a system-managed Python environment.\n", - "* Configure & execute a local run in a Docker environment.\n", - "* Query run metrics to find the best model\n", - "* Register model for operationalization." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prerequisites\n", - "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Check core SDK version number\n", - "import azureml.core\n", - "\n", - "print(\"SDK version:\", azureml.core.VERSION)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Initialize Workspace\n", - "\n", - "Initialize a workspace object from persisted configuration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.workspace import Workspace\n", - "\n", - "ws = Workspace.from_config()\n", - "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create An Experiment\n", - "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core import Experiment\n", - "experiment_name = 'train-on-local'\n", - "exp = Experiment(workspace=ws, name=experiment_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## View `train.py`\n", - "\n", - "`train.py` is already created for you." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with open('./train.py', 'r') as f:\n", - " print(f.read())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note `train.py` also references a `mylib.py` file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with open('./mylib.py', 'r') as f:\n", - " print(f.read())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Configure & Run\n", - "### User-managed environment\n", - "Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages are available in the Python environment you choose to run the script." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.runconfig import RunConfiguration\n", - "\n", - "# Editing a run configuration property on-fly.\n", - "run_config_user_managed = RunConfiguration()\n", - "\n", - "run_config_user_managed.environment.python.user_managed_dependencies = True\n", - "\n", - "# You can choose a specific Python environment by pointing to a Python path \n", - "#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Submit script to run in the user-managed environment\n", - "Note whole script folder is submitted for execution, including the `mylib.py` file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core import ScriptRunConfig\n", - "\n", - "src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n", - "run = exp.submit(src)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get run history details" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Block to wait till run finishes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run.wait_for_completion(show_output=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### System-managed environment\n", - "You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.runconfig import RunConfiguration\n", - "from azureml.core.conda_dependencies import CondaDependencies\n", - "\n", - "run_config_system_managed = RunConfiguration()\n", - "\n", - "run_config_system_managed.environment.python.user_managed_dependencies = False\n", - "run_config_system_managed.auto_prepare_environment = True\n", - "\n", - "# Specify conda dependencies with scikit-learn\n", - "cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n", - "run_config_system_managed.environment.python.conda_dependencies = cd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Submit script to run in the system-managed environment\n", - "A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n", - "run = exp.submit(src)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get run history details" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Block and wait till run finishes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run.wait_for_completion(show_output = True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Docker-based execution\n", - "**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n", - "\n", - "You can also ask the system to pull down a Docker image and execute your scripts in it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run_config_docker = RunConfiguration()\n", - "run_config_docker.environment.python.user_managed_dependencies = False\n", - "run_config_docker.auto_prepare_environment = True\n", - "run_config_docker.environment.docker.enabled = True\n", - "run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", - "\n", - "# Specify conda dependencies with scikit-learn\n", - "cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n", - "run_config_docker.environment.python.conda_dependencies = cd\n", - "\n", - "src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit script to run in the system-managed environment\n", - "A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import subprocess\n", - "\n", - "# Check if Docker is installed and Linux containers are enables\n", - "if subprocess.run(\"docker -v\", shell=True) == 0:\n", - " out = subprocess.check_output(\"docker system info\", shell=True, encoding=\"ascii\").split(\"\\n\")\n", - " if not \"OSType: linux\" in out:\n", - " print(\"Switch Docker engine to use Linux containers.\")\n", - " else:\n", - " run = exp.submit(src)\n", - "else:\n", - " print(\"Docker engine not installed.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Get run history details\n", - "run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run.wait_for_completion(show_output=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Query run metrics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "query history", - "get metrics" - ] - }, - "outputs": [], - "source": [ - "# get all metris logged in the run\n", - "run.get_metrics()\n", - "metrics = run.get_metrics()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's find the model that has the lowest MSE value logged." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "best_alpha = metrics['alpha'][np.argmin(metrics['mse'])]\n", - "\n", - "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n", - " min(metrics['mse']), \n", - " best_alpha\n", - "))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also list all the files that are associated with this run record" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run.get_file_names()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# supply a model name, and the full path to the serialized model file.\n", - "model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(model.name, model.version, model.url)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now you can deploy this model following the example in the 01 notebook." - ] - } - ], - "metadata": { - "authors": [ - { - "name": "roastala" - } - ], - "kernelspec": { - "display_name": "Python 3.6", - "language": "python", - "name": "python36" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}