Update 04.train-on-remote-vm.ipynb

2018-09-26 14:01:39 -04:00
parent 5598e07729
commit c85e7e52af
1 changed files with 209 additions and 39 deletions
--- a/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb
+++ b/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb
@@ -13,12 +13,15 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# 04. Train in a remote VM (MLC managed DSVM)\n",
+    "# 04. Train in a remote Linux VM\n",
    "* Create Workspace\n",
-    "* Create Project\n",
    "* Create `train.py` file\n",
-    "* Create DSVM as Machine Learning Compute (MLC) resource\n",
-    "* Configure & execute a run in a conda environment in the default miniconda Docker container on DSVM"
+    "* Create (or attach) DSVM as compute resource.\n",
+    "* Configure & execute a run in a few different ways\n",
+    "    - Use system-built conda\n",
+    "    - Use existing Python environment\n",
+    "    - Use Docker \n",
+    "* Find the best model in the run"
   ]
  },
  {
@@ -59,7 +62,7 @@
    "from azureml.core import Workspace\n",
    "\n",
    "ws = Workspace.from_config()\n",
-    "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
+    "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
   ]
  },
  {
@@ -80,8 +83,7 @@
    "experiment_name = 'train-on-remote-vm'\n",
    "\n",
    "from azureml.core import Experiment\n",
-    "\n",
-    "exp = Experiment(workspace = ws, name = experiment_name)"
+    "exp = Experiment(workspace=ws, name=experiment_name)"
   ]
  },
  {
@@ -123,23 +125,24 @@
    "from azureml.core.compute import DsvmCompute\n",
    "from azureml.core.compute_target import ComputeTargetException\n",
    "\n",
-    "compute_target_name = 'mydsvm'\n",
+    "compute_target_name = 'mysupervm'\n",
    "\n",
    "try:\n",
-    "    dsvm_compute = DsvmCompute(workspace = ws, name = compute_target_name)\n",
+    "    dsvm_compute = DsvmCompute(workspace=ws, name=compute_target_name)\n",
    "    print('found existing:', dsvm_compute.name)\n",
    "except ComputeTargetException:\n",
    "    print('creating new.')\n",
-    "    dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
-    "    dsvm_compute = DsvmCompute.create(ws, name = compute_target_name, provisioning_configuration = dsvm_config)\n",
-    "    dsvm_compute.wait_for_completion(show_output = True)"
+    "    dsvm_config = DsvmCompute.provisioning_configuration(vm_size=\"Standard_D2_v2\", ssh_port=\"5022\")\n",
+    "    dsvm_compute = DsvmCompute.create(ws, name=compute_target_name, provisioning_configuration=dsvm_config)\n",
+    "    dsvm_compute.wait_for_completion(show_output=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Attach an existing Linux DSVM as a compute target\n"
+    "## Attach an existing Linux DSVM\n",
+    "You can also attach an existing Linux VM as a compute target. The default port is 22, but below we are setting to 5022."
   ]
  },
  {
@@ -148,18 +151,178 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "'''\n",
-    " from azureml.core.compute import RemoteCompute \n",
-    " # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n",
-    " dsvm_compute = RemoteCompute.attach(ws,name=\"attach-from-sdk6\",username=<username>,address=<ipaddress>,ssh_port=22,password=<password>)\n",
-    "'''"
+    "from azureml.core.compute import RemoteCompute \n",
+    "# if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n",
+    "attached_dsvm_compute = RemoteCompute.attach(workspace=ws,\n",
+    "                                             name=\"attached_vm\",\n",
+    "                                             username='<usename>',\n",
+    "                                             address='<ip_adress>',\n",
+    "                                             ssh_port=5022,\n",
+    "                                             password='<ip_address>')\n",
+    "attached_dsvm_compute.wait_for_completion(show_output=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Configure & Run"
+    "## Configure & Run\n",
+    "There are many ways to execute script on a remote VM."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Conda run\n",
+    "You can ask the system to build a conda environment based on your dependency specification, and submit your script to run there. Once the environment is built, and if you don't change your dependencies, it will be reused in subsequent runs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azureml.core.runconfig import RunConfiguration\n",
+    "from azureml.core.conda_dependencies import CondaDependencies\n",
+    "\n",
+    "# create a new RunConfig object\n",
+    "conda_run_config = RunConfiguration(framework=\"python\")\n",
+    "\n",
+    "# Set compute target to the Linux DSVM\n",
+    "conda_run_config.target = dsvm_compute.name\n",
+    "\n",
+    "# specify CondaDependencies obj\n",
+    "conda_run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azureml.core import Run\n",
+    "from azureml.core import ScriptRunConfig\n",
+    "\n",
+    "src = ScriptRunConfig(source_directory='.', script='train.py', run_config=conda_run_config)\n",
+    "run = exp.submit(config=src)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run.wait_for_completion(show_output=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Native VM run\n",
+    "You can also configure to use an exiting Python environment in the VM to execute the script without asking the system to create a conda environment for you."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a new RunConfig object\n",
+    "vm_run_config = RunConfiguration(framework=\"python\")\n",
+    "\n",
+    "# Set compute target to the Linux DSVM\n",
+    "vm_run_config.target = dsvm_compute.name\n",
+    "\n",
+    "# Let system know that you will configure the Python environment yourself.\n",
+    "vm_run_config.environment.python.user_managed_dependencies = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The below run will likely fail because `train.py` needs dependency `azureml`, `scikit-learn` and others, which are not found in that Python environment. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azureml.core import Run\n",
+    "from azureml.core import ScriptRunConfig\n",
+    "\n",
+    "src = ScriptRunConfig(source_directory='.', script='train.py', run_config=vm_run_config)\n",
+    "run = exp.submit(config=src)\n",
+    "run.wait_for_completion(show_output=True)"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {},
+   "source": [
+    "You can choose to SSH into the VM and install Azure ML SDK, and any other missing dependencies, in that Python environment. For demonstration purposes, we simply are going to create another script `train2.py` that doesn't have azureml dependencies, and submit it instead."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile ./train2.py\n",
+    "\n",
+    "print('Hello World (without Azure ML SDK)!')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's try again. And this time it should work fine."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "src = ScriptRunConfig(source_directory='.', script='train2.py', run_config=vm_run_config)\n",
+    "run = exp.submit(config=src)\n",
+    "run.wait_for_completion(show_output=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note even in this case you get a run record with some basic statistics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run"
   ]
  },
  {
@@ -167,7 +330,7 @@
   "metadata": {},
   "source": [
    "### Configure a Docker run with new conda environment on the VM\n",
-    "You can execute in a Docker container in the VM. If you choose this route, you don't need to install anything on the VM yourself. Azure ML execution service will take care of it for you."
+    "You can execute in a Docker container in the VM. If you choose this option, the system will pull down a base Docker image, build a new conda environment in it if you ask for (you can also skip this if you are using a customer Docker image when a preconfigured Python environment), start a container, and run your script in there. This image is also uploaded into your ACR (Azure Container Registry) assoicated with your workspace, an reused if your dependencies don't change in the subsequent runs."
   ]
  },
  {
@@ -181,26 +344,26 @@
    "\n",
    "\n",
    "# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
-    "run_config = RunConfiguration(framework = \"python\")\n",
+    "docker_run_config = RunConfiguration(framework=\"python\")\n",
    "\n",
    "# Set compute target to the Linux DSVM\n",
-    "run_config.target = compute_target_name\n",
+    "docker_run_config.target = dsvm_compute.name\n",
    "\n",
    "# Use Docker in the remote VM\n",
-    "run_config.environment.docker.enabled = True\n",
+    "docker_run_config.environment.docker.enabled = True\n",
    "\n",
    "# Use CPU base image from DockerHub\n",
-    "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
-    "print('Base Docker image is:', run_config.environment.docker.base_image)\n",
+    "docker_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
+    "print('Base Docker image is:', docker_run_config.environment.docker.base_image)\n",
    "\n",
    "# Ask system to provision a new one based on the conda_dependencies.yml file\n",
-    "run_config.environment.python.user_managed_dependencies = False\n",
+    "docker_run_config.environment.python.user_managed_dependencies = False\n",
    "\n",
    "# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
-    "run_config.prepare_environment = True\n",
+    "docker_run_config.prepare_environment = True\n",
    "\n",
    "# specify CondaDependencies obj\n",
-    "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
+    "docker_run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
   ]
  },
  {
@@ -217,11 +380,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from azureml.core import Run\n",
-    "from azureml.core import ScriptRunConfig\n",
-    "\n",
-    "src = ScriptRunConfig(source_directory = '.', script = 'train.py', run_config = run_config)\n",
-    "run = exp.submit(src)"
+    "src = ScriptRunConfig(source_directory='.', script='train.py', run_config=docker_run_config)\n",
+    "run = exp.submit(config=src)"
   ]
  },
  {
@@ -246,14 +406,21 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "run.wait_for_completion(show_output = True)"
+    "run.wait_for_completion(show_output=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "### Find the best run"
+    "### Find the best model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we have tried various execution modes, we can find the best model from the last run."
   ]
  },
  {
@@ -273,10 +440,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import numpy as np\n",
+    "# find the index where MSE is the smallest\n",
+    "indices = list(range(0, len(metrics['mse'])))\n",
+    "min_mse_index = min(indices, key=lambda x: metrics['mse'][x])\n",
+    "\n",
    "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
-    "    min(metrics['mse']), \n",
-    "    metrics['alpha'][np.argmin(metrics['mse'])]\n",
+    "    metrics['mse'][min_mse_index], \n",
+    "    metrics['alpha'][min_mse_index]\n",
    "))"
   ]
  },
@@ -313,7 +483,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.6"
  }
 },
 "nbformat": 4,