diff --git a/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb b/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb index 5ee02180..45af3acc 100644 --- a/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb +++ b/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb @@ -17,6 +17,7 @@ "* Create Workspace\n", "* Create `train.py` file\n", "* Create (or attach) DSVM as compute resource.\n", + "* Upoad data files into default datastore\n", "* Configure & execute a run in a few different ways\n", " - Use system-built conda\n", " - Use existing Python environment\n", @@ -90,9 +91,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## View `train.py`\n", - "\n", - "For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train.py` in a cell to show the file." + "Let's also create a local folder to hold the training script." ] }, { @@ -101,7 +100,87 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./train.py', 'r') as training_script:\n", + "import os\n", + "script_folder = './vm-run'\n", + "os.makedirs(script_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload data files into datastore\n", + "Every workspace comes with a default datastore (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and access it from the compute target." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get the default datastore\n", + "ds = ws.get_default_datastore()\n", + "print(ds.name, ds.datastore_type, ds.account_name, ds.container_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load diabetes data from `scikit-learn` and save it as 2 local files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_diabetes\n", + "import numpy as np\n", + "\n", + "training_data = load_diabetes()\n", + "np.save(file='./feeatures.npy', arr=training_data['data'])\n", + "np.save(file='./labels.npy', arr=training_data['target'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's upload the 2 files into the default datastore under a path named `diabetes`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds.upload_files(['./feeatures.npy', './labels.npy'], target_path='diabetes', overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## View `train.py`\n", + "\n", + "For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train.py` in a cell to show the file. Please pay special attention on how we are loading the features and labels from files in the `data_folder` path, which is passed in as an argument of the training script (shown later)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# copy train.py into the script folder\n", + "import shutil\n", + "shutil.copy('./train.py', os.path.join(script_folder, 'train.py'))\n", + "\n", + "with open(os.path.join(script_folder, './train.py'), 'r') as training_script:\n", " print(training_script.read())" ] }, @@ -113,7 +192,7 @@ "\n", "**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n", " \n", - "**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below." + "**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can specify the port number in the provisioning configuration object." ] }, { @@ -125,14 +204,14 @@ "from azureml.core.compute import DsvmCompute\n", "from azureml.core.compute_target import ComputeTargetException\n", "\n", - "compute_target_name = 'mysupervm'\n", + "compute_target_name = 'mydsvm'\n", "\n", "try:\n", " dsvm_compute = DsvmCompute(workspace=ws, name=compute_target_name)\n", " print('found existing:', dsvm_compute.name)\n", "except ComputeTargetException:\n", " print('creating new.')\n", - " dsvm_config = DsvmCompute.provisioning_configuration(vm_size=\"Standard_D2_v2\", ssh_port=\"5022\")\n", + " dsvm_config = DsvmCompute.provisioning_configuration(vm_size=\"Standard_D2_v2\")\n", " dsvm_compute = DsvmCompute.create(ws, name=compute_target_name, provisioning_configuration=dsvm_config)\n", " dsvm_compute.wait_for_completion(show_output=True)" ] @@ -142,7 +221,7 @@ "metadata": {}, "source": [ "## Attach an existing Linux DSVM\n", - "You can also attach an existing Linux VM as a compute target. The default port is 22, but below we are setting to 5022." + "You can also attach an existing Linux VM as a compute target. The default port is 22." ] }, { @@ -155,9 +234,9 @@ "# if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n", "attached_dsvm_compute = RemoteCompute.attach(workspace=ws,\n", " name=\"attached_vm\",\n", - " username='',\n", - " address='',\n", - " ssh_port=5022,\n", + " username='',\n", + " address='',\n", + " ssh_port=22,\n", " password='')\n", "attached_dsvm_compute.wait_for_completion(show_output=True)" ] @@ -167,7 +246,27 @@ "metadata": {}, "source": [ "## Configure & Run\n", - "There are many ways to execute script on a remote VM." + "First let's create a `DataReferenceConfigruation` object to inform the system what data folder to download to the copmute target." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import DataReferenceConfiguration\n", + "dr = DataReferenceConfiguration(datastore_name=ds.name, \n", + " path_on_datastore='diabetes', \n", + " mode='download', # download files from datastore to compute target\n", + " overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can try a few different ways to run the training script in the VM." ] }, { @@ -193,6 +292,9 @@ "# Set compute target to the Linux DSVM\n", "conda_run_config.target = dsvm_compute.name\n", "\n", + "# set the data reference of the run coonfiguration\n", + "conda_run_config.data_references = {ds.name: dr}\n", + "\n", "# specify CondaDependencies obj\n", "conda_run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])" ] @@ -206,7 +308,12 @@ "from azureml.core import Run\n", "from azureml.core import ScriptRunConfig\n", "\n", - "src = ScriptRunConfig(source_directory='.', script='train.py', run_config=conda_run_config)\n", + "src = ScriptRunConfig(source_directory=script_folder, \n", + " script='train.py', \n", + " run_config=conda_run_config, \n", + " # pass the datastore reference as a parameter to the training script\n", + " arguments=['--data-folder', str(ds.as_download())] \n", + " ) \n", "run = exp.submit(config=src)" ] }, @@ -216,7 +323,14 @@ "metadata": {}, "outputs": [], "source": [ - "run" + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show the run object. You can navigate to the Azure portal to see detailed information about the run." ] }, { @@ -225,7 +339,7 @@ "metadata": {}, "outputs": [], "source": [ - "run.wait_for_completion(show_output=True)" + "run" ] }, { @@ -248,6 +362,9 @@ "# Set compute target to the Linux DSVM\n", "vm_run_config.target = dsvm_compute.name\n", "\n", + "# set the data reference of the run coonfiguration\n", + "conda_run_config.data_references = {ds.name: dr}\n", + "\n", "# Let system know that you will configure the Python environment yourself.\n", "vm_run_config.environment.python.user_managed_dependencies = True" ] @@ -265,10 +382,11 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.core import Run\n", - "from azureml.core import ScriptRunConfig\n", - "\n", - "src = ScriptRunConfig(source_directory='.', script='train.py', run_config=vm_run_config)\n", + "src = ScriptRunConfig(source_directory=script_folder, \n", + " script='train.py', \n", + " run_config=vm_run_config,\n", + " # pass the datastore reference as a parameter to the training script\n", + " arguments=['--data-folder', str(ds.as_download())])\n", "run = exp.submit(config=src)\n", "run.wait_for_completion(show_output=True)" ] @@ -286,9 +404,10 @@ "metadata": {}, "outputs": [], "source": [ - "%%writefile ./train2.py\n", - "\n", - "print('Hello World (without Azure ML SDK)!')" + "%%writefile $script_folder/train2.py\n", + "print('####################################')\n", + "print('Hello World (without Azure ML SDK)!')\n", + "print('####################################')" ] }, { @@ -304,7 +423,9 @@ "metadata": {}, "outputs": [], "source": [ - "src = ScriptRunConfig(source_directory='.', script='train2.py', run_config=vm_run_config)\n", + "src = ScriptRunConfig(source_directory=script_folder, \n", + " script='train2.py', \n", + " run_config=vm_run_config)\n", "run = exp.submit(config=src)\n", "run.wait_for_completion(show_output=True)" ] @@ -356,11 +477,8 @@ "docker_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", "print('Base Docker image is:', docker_run_config.environment.docker.base_image)\n", "\n", - "# Ask system to provision a new one based on the conda_dependencies.yml file\n", - "docker_run_config.environment.python.user_managed_dependencies = False\n", - "\n", - "# Prepare the Docker and conda environment automatically when executingfor the first time.\n", - "docker_run_config.prepare_environment = True\n", + "# set the data reference of the run coonfiguration\n", + "docker_run_config.data_references = {ds.name: dr}\n", "\n", "# specify CondaDependencies obj\n", "docker_run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])" @@ -380,10 +498,23 @@ "metadata": {}, "outputs": [], "source": [ - "src = ScriptRunConfig(source_directory='.', script='train.py', run_config=docker_run_config)\n", + "src = ScriptRunConfig(source_directory=script_folder, \n", + " script='train.py', \n", + " run_config=docker_run_config,\n", + " # pass the datastore reference as a parameter to the training script\n", + " arguments=['--data-folder', str(ds.as_download())])\n", "run = exp.submit(config=src)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -400,15 +531,6 @@ "run" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run.wait_for_completion(show_output=True)" - ] - }, { "cell_type": "markdown", "metadata": {},