hdi run config code

2019-01-07 11:29:40 -06:00
parent e3a64b1f16
commit 53dbd0afcf
18 changed files with 691 additions and 1 deletions
--- a/.amlignore
+++ b/.amlignore
@@ -0,0 +1,7 @@
+.ipynb_checkpoints
+azureml-logs
+.azureml
+.git
+outputs
+azureml-setup
+docs
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "C:\\Users\\sgilley\\.azureml\\envs\\jan3\\python.exe"
+}
--- a/aml_config/conda_dependencies.yml
+++ b/aml_config/conda_dependencies.yml
@@ -0,0 +1,15 @@
+# Conda environment specification. The dependencies defined in this file will
+
+# be automatically provisioned for runs with userManagedDependencies=False.
+
+
+# Details about the Conda environment file format:
+
+# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
+
+
+name: project_environment
+dependencies:
+  # The python interpreter version.
+
+  # Currently Azure ML only supports 3.5.2 and later.
--- a/aml_config/docker.runconfig
+++ b/aml_config/docker.runconfig
@@ -0,0 +1,115 @@
+# The script to run.
+script: train.py
+# The arguments to the script file.
+arguments: []
+# The name of the compute target to use for this run.
+target: local
+# Framework to execute inside. Allowed values are "Python" ,  "PySpark", "CNTK",  "TensorFlow", and "PyTorch".
+framework: PySpark
+# Communicator for the given framework. Allowed values are "None" ,  "ParameterServer", "OpenMpi", and "IntelMpi".
+communicator: None
+# Automatically prepare the run environment as part of the run itself.
+autoPrepareEnvironment: true
+# Maximum allowed duration for the run.
+maxRunDurationSeconds:
+# Number of nodes to use for running job.
+nodeCount: 1
+# Environment details.
+environment:
+# Environment variables set for the run.
+  environmentVariables:
+    EXAMPLE_ENV_VAR: EXAMPLE_VALUE
+# Python details
+  python:
+# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
+    userManagedDependencies: false
+# The python interpreter path
+    interpreterPath: python
+# Path to the conda dependencies file to use for this run. If a project
+# contains multiple programs with different sets of dependencies, it may be
+# convenient to manage those environments with separate files.
+    condaDependenciesFile: aml_config/conda_dependencies.yml
+# Docker details
+  docker:
+# Set True to perform this run inside a Docker container.
+    enabled: true
+# Base image used for Docker-based runs.
+    baseImage: mcr.microsoft.com/azureml/base:0.2.0
+# Set False if necessary to work around shared volume bugs.
+    sharedVolumes: true
+# Run with NVidia Docker extension to support GPUs.
+    gpuSupport: false
+# Extra arguments to the Docker run command.
+    arguments: []
+# Image registry that contains the base image.
+    baseImageRegistry:
+# DNS name or IP address of azure container registry(ACR)
+      address:
+# The username for ACR
+      username:
+# The password for ACR
+      password:
+# Spark details
+  spark:
+# List of spark repositories.
+    repositories:
+    - https://mmlspark.azureedge.net/maven
+    packages:
+    - group: com.microsoft.ml.spark
+      artifact: mmlspark_2.11
+      version: '0.12'
+    precachePackages: true
+# Databricks details
+  databricks:
+# List of maven libraries.
+    mavenLibraries: []
+# List of PyPi libraries
+    pypiLibraries: []
+# List of RCran libraries
+    rcranLibraries: []
+# List of JAR libraries
+    jarLibraries: []
+# List of Egg libraries
+    eggLibraries: []
+# History details.
+history:
+# Enable history tracking -- this allows status, logs, metrics, and outputs
+# to be collected for a run.
+  outputCollection: true
+# whether to take snapshots for history.
+  snapshotProject: true
+# Spark configuration details.
+spark:
+  configuration:
+    spark.app.name: Azure ML Experiment
+    spark.yarn.maxAppAttempts: 1
+# HDI details.
+hdi:
+# Yarn deploy mode. Options are cluster and client.
+  yarnDeployMode: cluster
+# Tensorflow details.
+tensorflow:
+# The number of worker tasks.
+  workerCount: 1
+# The number of parameter server tasks.
+  parameterServerCount: 1
+# Mpi details.
+mpi:
+# When using MPI, number of processes per node.
+  processCountPerNode: 1
+# data reference configuration details
+dataReferences: {}
+# Project share datastore reference.
+sourceDirectoryDataStore:
+# AmlCompute details.
+amlcompute:
+# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
+  vmSize:
+# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
+  vmPriority:
+# A bool that indicates if the cluster has to be retained after job completion.
+  retainCluster: false
+# Name of the cluster to be created. If not specified, runId will be used as cluster name.
+  name:
+# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
+  clusterMaxNodeCount: 1
--- a/aml_config/local.runconfig
+++ b/aml_config/local.runconfig
@@ -0,0 +1,115 @@
+# The script to run.
+script: train.py
+# The arguments to the script file.
+arguments: []
+# The name of the compute target to use for this run.
+target: local
+# Framework to execute inside. Allowed values are "Python" ,  "PySpark", "CNTK",  "TensorFlow", and "PyTorch".
+framework: Python
+# Communicator for the given framework. Allowed values are "None" ,  "ParameterServer", "OpenMpi", and "IntelMpi".
+communicator: None
+# Automatically prepare the run environment as part of the run itself.
+autoPrepareEnvironment: true
+# Maximum allowed duration for the run.
+maxRunDurationSeconds:
+# Number of nodes to use for running job.
+nodeCount: 1
+# Environment details.
+environment:
+# Environment variables set for the run.
+  environmentVariables:
+    EXAMPLE_ENV_VAR: EXAMPLE_VALUE
+# Python details
+  python:
+# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
+    userManagedDependencies: false
+# The python interpreter path
+    interpreterPath: python
+# Path to the conda dependencies file to use for this run. If a project
+# contains multiple programs with different sets of dependencies, it may be
+# convenient to manage those environments with separate files.
+    condaDependenciesFile: aml_config/conda_dependencies.yml
+# Docker details
+  docker:
+# Set True to perform this run inside a Docker container.
+    enabled: false
+# Base image used for Docker-based runs.
+    baseImage: mcr.microsoft.com/azureml/base:0.2.0
+# Set False if necessary to work around shared volume bugs.
+    sharedVolumes: true
+# Run with NVidia Docker extension to support GPUs.
+    gpuSupport: false
+# Extra arguments to the Docker run command.
+    arguments: []
+# Image registry that contains the base image.
+    baseImageRegistry:
+# DNS name or IP address of azure container registry(ACR)
+      address:
+# The username for ACR
+      username:
+# The password for ACR
+      password:
+# Spark details
+  spark:
+# List of spark repositories.
+    repositories:
+    - https://mmlspark.azureedge.net/maven
+    packages:
+    - group: com.microsoft.ml.spark
+      artifact: mmlspark_2.11
+      version: '0.12'
+    precachePackages: true
+# Databricks details
+  databricks:
+# List of maven libraries.
+    mavenLibraries: []
+# List of PyPi libraries
+    pypiLibraries: []
+# List of RCran libraries
+    rcranLibraries: []
+# List of JAR libraries
+    jarLibraries: []
+# List of Egg libraries
+    eggLibraries: []
+# History details.
+history:
+# Enable history tracking -- this allows status, logs, metrics, and outputs
+# to be collected for a run.
+  outputCollection: true
+# whether to take snapshots for history.
+  snapshotProject: true
+# Spark configuration details.
+spark:
+  configuration:
+    spark.app.name: Azure ML Experiment
+    spark.yarn.maxAppAttempts: 1
+# HDI details.
+hdi:
+# Yarn deploy mode. Options are cluster and client.
+  yarnDeployMode: cluster
+# Tensorflow details.
+tensorflow:
+# The number of worker tasks.
+  workerCount: 1
+# The number of parameter server tasks.
+  parameterServerCount: 1
+# Mpi details.
+mpi:
+# When using MPI, number of processes per node.
+  processCountPerNode: 1
+# data reference configuration details
+dataReferences: {}
+# Project share datastore reference.
+sourceDirectoryDataStore:
+# AmlCompute details.
+amlcompute:
+# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
+  vmSize:
+# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
+  vmPriority:
+# A bool that indicates if the cluster has to be retained after job completion.
+  retainCluster: false
+# Name of the cluster to be created. If not specified, runId will be used as cluster name.
+  name:
+# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
+  clusterMaxNodeCount: 1
--- a/aml_config/project.json
+++ b/aml_config/project.json
@@ -0,0 +1 @@
+{"Id": "local-compute", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/local-compute"}
--- a/ignore/doc-qa/how-to-set-up-training-targets/.amlignore
+++ b/ignore/doc-qa/how-to-set-up-training-targets/.amlignore
@@ -0,0 +1,7 @@
+.ipynb_checkpoints
+azureml-logs
+.azureml
+.git
+outputs
+azureml-setup
+docs
--- a/ignore/doc-qa/how-to-set-up-training-targets/aml_config/conda_dependencies.yml
+++ b/ignore/doc-qa/how-to-set-up-training-targets/aml_config/conda_dependencies.yml
@@ -0,0 +1,15 @@
+# Conda environment specification. The dependencies defined in this file will
+
+# be automatically provisioned for runs with userManagedDependencies=False.
+
+
+# Details about the Conda environment file format:
+
+# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
+
+
+name: project_environment
+dependencies:
+  # The python interpreter version.
+
+  # Currently Azure ML only supports 3.5.2 and later.
--- a/ignore/doc-qa/how-to-set-up-training-targets/aml_config/docker.runconfig
+++ b/ignore/doc-qa/how-to-set-up-training-targets/aml_config/docker.runconfig
@@ -0,0 +1,115 @@
+# The script to run.
+script: train.py
+# The arguments to the script file.
+arguments: []
+# The name of the compute target to use for this run.
+target: local
+# Framework to execute inside. Allowed values are "Python" ,  "PySpark", "CNTK",  "TensorFlow", and "PyTorch".
+framework: PySpark
+# Communicator for the given framework. Allowed values are "None" ,  "ParameterServer", "OpenMpi", and "IntelMpi".
+communicator: None
+# Automatically prepare the run environment as part of the run itself.
+autoPrepareEnvironment: true
+# Maximum allowed duration for the run.
+maxRunDurationSeconds:
+# Number of nodes to use for running job.
+nodeCount: 1
+# Environment details.
+environment:
+# Environment variables set for the run.
+  environmentVariables:
+    EXAMPLE_ENV_VAR: EXAMPLE_VALUE
+# Python details
+  python:
+# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
+    userManagedDependencies: false
+# The python interpreter path
+    interpreterPath: python
+# Path to the conda dependencies file to use for this run. If a project
+# contains multiple programs with different sets of dependencies, it may be
+# convenient to manage those environments with separate files.
+    condaDependenciesFile: aml_config/conda_dependencies.yml
+# Docker details
+  docker:
+# Set True to perform this run inside a Docker container.
+    enabled: true
+# Base image used for Docker-based runs.
+    baseImage: mcr.microsoft.com/azureml/base:0.2.0
+# Set False if necessary to work around shared volume bugs.
+    sharedVolumes: true
+# Run with NVidia Docker extension to support GPUs.
+    gpuSupport: false
+# Extra arguments to the Docker run command.
+    arguments: []
+# Image registry that contains the base image.
+    baseImageRegistry:
+# DNS name or IP address of azure container registry(ACR)
+      address:
+# The username for ACR
+      username:
+# The password for ACR
+      password:
+# Spark details
+  spark:
+# List of spark repositories.
+    repositories:
+    - https://mmlspark.azureedge.net/maven
+    packages:
+    - group: com.microsoft.ml.spark
+      artifact: mmlspark_2.11
+      version: '0.12'
+    precachePackages: true
+# Databricks details
+  databricks:
+# List of maven libraries.
+    mavenLibraries: []
+# List of PyPi libraries
+    pypiLibraries: []
+# List of RCran libraries
+    rcranLibraries: []
+# List of JAR libraries
+    jarLibraries: []
+# List of Egg libraries
+    eggLibraries: []
+# History details.
+history:
+# Enable history tracking -- this allows status, logs, metrics, and outputs
+# to be collected for a run.
+  outputCollection: true
+# whether to take snapshots for history.
+  snapshotProject: true
+# Spark configuration details.
+spark:
+  configuration:
+    spark.app.name: Azure ML Experiment
+    spark.yarn.maxAppAttempts: 1
+# HDI details.
+hdi:
+# Yarn deploy mode. Options are cluster and client.
+  yarnDeployMode: cluster
+# Tensorflow details.
+tensorflow:
+# The number of worker tasks.
+  workerCount: 1
+# The number of parameter server tasks.
+  parameterServerCount: 1
+# Mpi details.
+mpi:
+# When using MPI, number of processes per node.
+  processCountPerNode: 1
+# data reference configuration details
+dataReferences: {}
+# Project share datastore reference.
+sourceDirectoryDataStore:
+# AmlCompute details.
+amlcompute:
+# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
+  vmSize:
+# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
+  vmPriority:
+# A bool that indicates if the cluster has to be retained after job completion.
+  retainCluster: false
+# Name of the cluster to be created. If not specified, runId will be used as cluster name.
+  name:
+# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
+  clusterMaxNodeCount: 1
--- a/ignore/doc-qa/how-to-set-up-training-targets/aml_config/local.runconfig
+++ b/ignore/doc-qa/how-to-set-up-training-targets/aml_config/local.runconfig
@@ -0,0 +1,115 @@
+# The script to run.
+script: train.py
+# The arguments to the script file.
+arguments: []
+# The name of the compute target to use for this run.
+target: local
+# Framework to execute inside. Allowed values are "Python" ,  "PySpark", "CNTK",  "TensorFlow", and "PyTorch".
+framework: Python
+# Communicator for the given framework. Allowed values are "None" ,  "ParameterServer", "OpenMpi", and "IntelMpi".
+communicator: None
+# Automatically prepare the run environment as part of the run itself.
+autoPrepareEnvironment: true
+# Maximum allowed duration for the run.
+maxRunDurationSeconds:
+# Number of nodes to use for running job.
+nodeCount: 1
+# Environment details.
+environment:
+# Environment variables set for the run.
+  environmentVariables:
+    EXAMPLE_ENV_VAR: EXAMPLE_VALUE
+# Python details
+  python:
+# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
+    userManagedDependencies: false
+# The python interpreter path
+    interpreterPath: python
+# Path to the conda dependencies file to use for this run. If a project
+# contains multiple programs with different sets of dependencies, it may be
+# convenient to manage those environments with separate files.
+    condaDependenciesFile: aml_config/conda_dependencies.yml
+# Docker details
+  docker:
+# Set True to perform this run inside a Docker container.
+    enabled: false
+# Base image used for Docker-based runs.
+    baseImage: mcr.microsoft.com/azureml/base:0.2.0
+# Set False if necessary to work around shared volume bugs.
+    sharedVolumes: true
+# Run with NVidia Docker extension to support GPUs.
+    gpuSupport: false
+# Extra arguments to the Docker run command.
+    arguments: []
+# Image registry that contains the base image.
+    baseImageRegistry:
+# DNS name or IP address of azure container registry(ACR)
+      address:
+# The username for ACR
+      username:
+# The password for ACR
+      password:
+# Spark details
+  spark:
+# List of spark repositories.
+    repositories:
+    - https://mmlspark.azureedge.net/maven
+    packages:
+    - group: com.microsoft.ml.spark
+      artifact: mmlspark_2.11
+      version: '0.12'
+    precachePackages: true
+# Databricks details
+  databricks:
+# List of maven libraries.
+    mavenLibraries: []
+# List of PyPi libraries
+    pypiLibraries: []
+# List of RCran libraries
+    rcranLibraries: []
+# List of JAR libraries
+    jarLibraries: []
+# List of Egg libraries
+    eggLibraries: []
+# History details.
+history:
+# Enable history tracking -- this allows status, logs, metrics, and outputs
+# to be collected for a run.
+  outputCollection: true
+# whether to take snapshots for history.
+  snapshotProject: true
+# Spark configuration details.
+spark:
+  configuration:
+    spark.app.name: Azure ML Experiment
+    spark.yarn.maxAppAttempts: 1
+# HDI details.
+hdi:
+# Yarn deploy mode. Options are cluster and client.
+  yarnDeployMode: cluster
+# Tensorflow details.
+tensorflow:
+# The number of worker tasks.
+  workerCount: 1
+# The number of parameter server tasks.
+  parameterServerCount: 1
+# Mpi details.
+mpi:
+# When using MPI, number of processes per node.
+  processCountPerNode: 1
+# data reference configuration details
+dataReferences: {}
+# Project share datastore reference.
+sourceDirectoryDataStore:
+# AmlCompute details.
+amlcompute:
+# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
+  vmSize:
+# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
+  vmPriority:
+# A bool that indicates if the cluster has to be retained after job completion.
+  retainCluster: false
+# Name of the cluster to be created. If not specified, runId will be used as cluster name.
+  name:
+# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
+  clusterMaxNodeCount: 1
--- a/ignore/doc-qa/how-to-set-up-training-targets/aml_config/project.json
+++ b/ignore/doc-qa/how-to-set-up-training-targets/aml_config/project.json
@@ -0,0 +1 @@
+{"Id": "my-experiment", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/my-experiment"}
--- a/ignore/doc-qa/how-to-set-up-training-targets/donotupload.py
+++ b/ignore/doc-qa/how-to-set-up-training-targets/donotupload.py
@@ -0,0 +1,40 @@
+
+from azureml.core import Workspace
+ws = Workspace.from_config()
+
+from azureml.core.compute import ComputeTarget, HDInsightCompute
+from azureml.exceptions import ComputeTargetException
+
+try:
+    # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase
+    attach_config = HDInsightCompute.attach_configuration(address='sheri2-ssh.azurehdinsight.net', 
+                                                          ssh_port=22, 
+                                                          username='sshuser', 
+                                                          password='ChangePassw)rd12')
+    hdi_compute = ComputeTarget.attach(workspace=ws, 
+                                       name='sherihdi2', 
+                                       attach_configuration=attach_config)
+
+except ComputeTargetException as e:
+    print("Caught = {}".format(e.message))
+    hdi_compute = ComputeTarget(workspace=ws, name='sherihdi')
+    
+        
+hdi_compute.wait_for_completion(show_output=True)
+
+#<run_hdi>
+from azureml.core.runconfig import RunConfiguration
+from azureml.core.conda_dependencies import CondaDependencies
+
+
+# use pyspark framework
+run_hdi = RunConfiguration(framework="pyspark")
+
+# Set compute target to the HDI cluster
+run_hdi.target = hdi_compute.name
+
+# specify CondaDependencies object to ask system installing numpy
+cd = CondaDependencies()
+cd.add_conda_package('numpy')
+run_hdi.environment.python.conda_dependencies = cd
+#</run_hdi>
--- a/ignore/doc-qa/how-to-set-up-training-targets/dsvm.py
+++ b/ignore/doc-qa/how-to-set-up-training-targets/dsvm.py
@@ -1,6 +1,6 @@
 # Code for Remote virtual machines

-compute_target_name = "attach-dsvm"
+compute_target_name = "sheri-linuxvm"

 #<run_dsvm>  
 import azureml.core
--- a/ignore/doc-qa/how-to-set-up-training-targets/hdi.py
+++ b/ignore/doc-qa/how-to-set-up-training-targets/hdi.py
@@ -0,0 +1,27 @@
+
+from azureml.core import Workspace
+ws = Workspace.from_config()
+
+from azureml.core.compute import ComputeTarget
+
+# refers to an existing compute resource attached to the workspace!
+hdi_compute = ComputeTarget(workspace=ws, name='sherihdi')
+    
+        
+#<run_hdi>
+from azureml.core.runconfig import RunConfiguration
+from azureml.core.conda_dependencies import CondaDependencies
+
+
+# use pyspark framework
+run_hdi = RunConfiguration(framework="pyspark")
+
+# Set compute target to the HDI cluster
+run_hdi.target = hdi_compute.name
+
+# specify CondaDependencies object to ask system installing numpy
+cd = CondaDependencies()
+cd.add_conda_package('numpy')
+run_hdi.environment.python.conda_dependencies = cd
+#</run_hdi>
+print(run_hdi)
--- a/ignore/doc-qa/how-to-set-up-training-targets/mylib.py
+++ b/ignore/doc-qa/how-to-set-up-training-targets/mylib.py
@@ -0,0 +1,9 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license.
+
+import numpy as np
+
+
+def get_alphas():
+    # list of numbers from 0.0 to 1.0 with a 0.05 interval
+    return np.arange(0.0, 1.0, 0.05)
--- a/ignore/doc-qa/how-to-set-up-training-targets/remote.py
+++ b/ignore/doc-qa/how-to-set-up-training-targets/remote.py
@@ -0,0 +1,52 @@
+# Code for Remote virtual machines
+
+compute_target_name = "attach-dsvm"
+
+#<run_dsvm>  
+import azureml.core
+from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE
+from azureml.core.conda_dependencies import CondaDependencies
+
+run_dsvm = RunConfiguration(framework = "python")
+
+# Set the compute target to the Linux DSVM
+run_dsvm.target = compute_target_name 
+
+# Use Docker in the remote VM
+run_dsvm.environment.docker.enabled = True
+
+# Use the CPU base image 
+# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE"
+run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
+print('Base Docker image is:', run_dsvm.environment.docker.base_image)
+
+# Prepare the Docker and conda environment automatically when they're used for the first time 
+run_dsvm.prepare_environment = True
+
+# Specify the CondaDependencies object
+run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
+#</run_dsvm>
+hdi_compute.name = "blah"
+from azureml.core.runconfig import RunConfiguration
+from azureml.core.conda_dependencies import CondaDependencies
+
+
+# use pyspark framework
+hdi_run_config = RunConfiguration(framework="pyspark")
+
+# Set compute target to the HDI cluster
+hdi_run_config.target = hdi_compute.name
+
+# specify CondaDependencies object to ask system installing numpy
+cd = CondaDependencies()
+cd.add_conda_package('numpy')
+hdi_run_config.environment.python.conda_dependencies = cd
+
+#<run_hdi>
+from azureml.core.runconfig import RunConfiguration
+# Configure the HDInsight run 
+# Load the runconfig object from the myhdi.runconfig file generated in the previous attach operation
+run_hdi = RunConfiguration.load(project_object = project, run_name = 'myhdi')
+
+# Ask the system to prepare the conda environment automatically when it's used for the first time
+run_hdi.auto_prepare_environment = True>
--- a/ignore/doc-qa/how-to-set-up-training-targets/temp.py
+++ b/ignore/doc-qa/how-to-set-up-training-targets/temp.py
@@ -0,0 +1,8 @@
+from azureml.core import Workspace
+ws = Workspace.from_config()
+
+#<amlcompute_temp>
+from azureml.core.compute import ComputeTarget, AmlCompute
+
+# First, list the supported VM families for Azure Machine Learning Compute
+print(AmlCompute.supported_vmsizes(workspace=ws))
--- a/ignore/doc-qa/how-to-set-up-training-targets/train.py
+++ b/ignore/doc-qa/how-to-set-up-training-targets/train.py
@@ -0,0 +1,45 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license.
+
+from sklearn.datasets import load_diabetes
+from sklearn.linear_model import Ridge
+from sklearn.metrics import mean_squared_error
+from sklearn.model_selection import train_test_split
+from azureml.core.run import Run
+from sklearn.externals import joblib
+import os
+import numpy as np
+import mylib
+
+os.makedirs('./outputs', exist_ok=True)
+
+X, y = load_diabetes(return_X_y=True)
+
+run = Run.get_context()
+
+X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                    test_size=0.2,
+                                                    random_state=0)
+data = {"train": {"X": X_train, "y": y_train},
+        "test": {"X": X_test, "y": y_test}}
+
+# list of numbers from 0.0 to 1.0 with a 0.05 interval
+alphas = mylib.get_alphas()
+
+for alpha in alphas:
+    # Use Ridge algorithm to create a regression model
+    reg = Ridge(alpha=alpha)
+    reg.fit(data["train"]["X"], data["train"]["y"])
+
+    preds = reg.predict(data["test"]["X"])
+    mse = mean_squared_error(preds, data["test"]["y"])
+    run.log('alpha', alpha)
+    run.log('mse', mse)
+
+    model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
+    # save model in the outputs folder so it automatically get uploaded
+    with open(model_file_name, "wb") as file:
+        joblib.dump(value=reg, filename=os.path.join('./outputs/',
+                                                     model_file_name))
+
+    print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))
				`@@ -0,0 +1 @@`
				`{"Id": "local-compute", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/local-compute"}`
				`@@ -0,0 +1 @@`
				`{"Id": "my-experiment", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/my-experiment"}`