hdi run config code
This commit is contained in:
7
.amlignore
Normal file
7
.amlignore
Normal file
@@ -0,0 +1,7 @@
|
||||
.ipynb_checkpoints
|
||||
azureml-logs
|
||||
.azureml
|
||||
.git
|
||||
outputs
|
||||
azureml-setup
|
||||
docs
|
||||
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.pythonPath": "C:\\Users\\sgilley\\.azureml\\envs\\jan3\\python.exe"
|
||||
}
|
||||
15
aml_config/conda_dependencies.yml
Normal file
15
aml_config/conda_dependencies.yml
Normal file
@@ -0,0 +1,15 @@
|
||||
# Conda environment specification. The dependencies defined in this file will
|
||||
|
||||
# be automatically provisioned for runs with userManagedDependencies=False.
|
||||
|
||||
|
||||
# Details about the Conda environment file format:
|
||||
|
||||
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
|
||||
|
||||
|
||||
name: project_environment
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
115
aml_config/docker.runconfig
Normal file
115
aml_config/docker.runconfig
Normal file
@@ -0,0 +1,115 @@
|
||||
# The script to run.
|
||||
script: train.py
|
||||
# The arguments to the script file.
|
||||
arguments: []
|
||||
# The name of the compute target to use for this run.
|
||||
target: local
|
||||
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
|
||||
framework: PySpark
|
||||
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
|
||||
communicator: None
|
||||
# Automatically prepare the run environment as part of the run itself.
|
||||
autoPrepareEnvironment: true
|
||||
# Maximum allowed duration for the run.
|
||||
maxRunDurationSeconds:
|
||||
# Number of nodes to use for running job.
|
||||
nodeCount: 1
|
||||
# Environment details.
|
||||
environment:
|
||||
# Environment variables set for the run.
|
||||
environmentVariables:
|
||||
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
|
||||
# Python details
|
||||
python:
|
||||
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
|
||||
userManagedDependencies: false
|
||||
# The python interpreter path
|
||||
interpreterPath: python
|
||||
# Path to the conda dependencies file to use for this run. If a project
|
||||
# contains multiple programs with different sets of dependencies, it may be
|
||||
# convenient to manage those environments with separate files.
|
||||
condaDependenciesFile: aml_config/conda_dependencies.yml
|
||||
# Docker details
|
||||
docker:
|
||||
# Set True to perform this run inside a Docker container.
|
||||
enabled: true
|
||||
# Base image used for Docker-based runs.
|
||||
baseImage: mcr.microsoft.com/azureml/base:0.2.0
|
||||
# Set False if necessary to work around shared volume bugs.
|
||||
sharedVolumes: true
|
||||
# Run with NVidia Docker extension to support GPUs.
|
||||
gpuSupport: false
|
||||
# Extra arguments to the Docker run command.
|
||||
arguments: []
|
||||
# Image registry that contains the base image.
|
||||
baseImageRegistry:
|
||||
# DNS name or IP address of azure container registry(ACR)
|
||||
address:
|
||||
# The username for ACR
|
||||
username:
|
||||
# The password for ACR
|
||||
password:
|
||||
# Spark details
|
||||
spark:
|
||||
# List of spark repositories.
|
||||
repositories:
|
||||
- https://mmlspark.azureedge.net/maven
|
||||
packages:
|
||||
- group: com.microsoft.ml.spark
|
||||
artifact: mmlspark_2.11
|
||||
version: '0.12'
|
||||
precachePackages: true
|
||||
# Databricks details
|
||||
databricks:
|
||||
# List of maven libraries.
|
||||
mavenLibraries: []
|
||||
# List of PyPi libraries
|
||||
pypiLibraries: []
|
||||
# List of RCran libraries
|
||||
rcranLibraries: []
|
||||
# List of JAR libraries
|
||||
jarLibraries: []
|
||||
# List of Egg libraries
|
||||
eggLibraries: []
|
||||
# History details.
|
||||
history:
|
||||
# Enable history tracking -- this allows status, logs, metrics, and outputs
|
||||
# to be collected for a run.
|
||||
outputCollection: true
|
||||
# whether to take snapshots for history.
|
||||
snapshotProject: true
|
||||
# Spark configuration details.
|
||||
spark:
|
||||
configuration:
|
||||
spark.app.name: Azure ML Experiment
|
||||
spark.yarn.maxAppAttempts: 1
|
||||
# HDI details.
|
||||
hdi:
|
||||
# Yarn deploy mode. Options are cluster and client.
|
||||
yarnDeployMode: cluster
|
||||
# Tensorflow details.
|
||||
tensorflow:
|
||||
# The number of worker tasks.
|
||||
workerCount: 1
|
||||
# The number of parameter server tasks.
|
||||
parameterServerCount: 1
|
||||
# Mpi details.
|
||||
mpi:
|
||||
# When using MPI, number of processes per node.
|
||||
processCountPerNode: 1
|
||||
# data reference configuration details
|
||||
dataReferences: {}
|
||||
# Project share datastore reference.
|
||||
sourceDirectoryDataStore:
|
||||
# AmlCompute details.
|
||||
amlcompute:
|
||||
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
|
||||
vmSize:
|
||||
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
|
||||
vmPriority:
|
||||
# A bool that indicates if the cluster has to be retained after job completion.
|
||||
retainCluster: false
|
||||
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
|
||||
name:
|
||||
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
|
||||
clusterMaxNodeCount: 1
|
||||
115
aml_config/local.runconfig
Normal file
115
aml_config/local.runconfig
Normal file
@@ -0,0 +1,115 @@
|
||||
# The script to run.
|
||||
script: train.py
|
||||
# The arguments to the script file.
|
||||
arguments: []
|
||||
# The name of the compute target to use for this run.
|
||||
target: local
|
||||
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
|
||||
framework: Python
|
||||
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
|
||||
communicator: None
|
||||
# Automatically prepare the run environment as part of the run itself.
|
||||
autoPrepareEnvironment: true
|
||||
# Maximum allowed duration for the run.
|
||||
maxRunDurationSeconds:
|
||||
# Number of nodes to use for running job.
|
||||
nodeCount: 1
|
||||
# Environment details.
|
||||
environment:
|
||||
# Environment variables set for the run.
|
||||
environmentVariables:
|
||||
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
|
||||
# Python details
|
||||
python:
|
||||
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
|
||||
userManagedDependencies: false
|
||||
# The python interpreter path
|
||||
interpreterPath: python
|
||||
# Path to the conda dependencies file to use for this run. If a project
|
||||
# contains multiple programs with different sets of dependencies, it may be
|
||||
# convenient to manage those environments with separate files.
|
||||
condaDependenciesFile: aml_config/conda_dependencies.yml
|
||||
# Docker details
|
||||
docker:
|
||||
# Set True to perform this run inside a Docker container.
|
||||
enabled: false
|
||||
# Base image used for Docker-based runs.
|
||||
baseImage: mcr.microsoft.com/azureml/base:0.2.0
|
||||
# Set False if necessary to work around shared volume bugs.
|
||||
sharedVolumes: true
|
||||
# Run with NVidia Docker extension to support GPUs.
|
||||
gpuSupport: false
|
||||
# Extra arguments to the Docker run command.
|
||||
arguments: []
|
||||
# Image registry that contains the base image.
|
||||
baseImageRegistry:
|
||||
# DNS name or IP address of azure container registry(ACR)
|
||||
address:
|
||||
# The username for ACR
|
||||
username:
|
||||
# The password for ACR
|
||||
password:
|
||||
# Spark details
|
||||
spark:
|
||||
# List of spark repositories.
|
||||
repositories:
|
||||
- https://mmlspark.azureedge.net/maven
|
||||
packages:
|
||||
- group: com.microsoft.ml.spark
|
||||
artifact: mmlspark_2.11
|
||||
version: '0.12'
|
||||
precachePackages: true
|
||||
# Databricks details
|
||||
databricks:
|
||||
# List of maven libraries.
|
||||
mavenLibraries: []
|
||||
# List of PyPi libraries
|
||||
pypiLibraries: []
|
||||
# List of RCran libraries
|
||||
rcranLibraries: []
|
||||
# List of JAR libraries
|
||||
jarLibraries: []
|
||||
# List of Egg libraries
|
||||
eggLibraries: []
|
||||
# History details.
|
||||
history:
|
||||
# Enable history tracking -- this allows status, logs, metrics, and outputs
|
||||
# to be collected for a run.
|
||||
outputCollection: true
|
||||
# whether to take snapshots for history.
|
||||
snapshotProject: true
|
||||
# Spark configuration details.
|
||||
spark:
|
||||
configuration:
|
||||
spark.app.name: Azure ML Experiment
|
||||
spark.yarn.maxAppAttempts: 1
|
||||
# HDI details.
|
||||
hdi:
|
||||
# Yarn deploy mode. Options are cluster and client.
|
||||
yarnDeployMode: cluster
|
||||
# Tensorflow details.
|
||||
tensorflow:
|
||||
# The number of worker tasks.
|
||||
workerCount: 1
|
||||
# The number of parameter server tasks.
|
||||
parameterServerCount: 1
|
||||
# Mpi details.
|
||||
mpi:
|
||||
# When using MPI, number of processes per node.
|
||||
processCountPerNode: 1
|
||||
# data reference configuration details
|
||||
dataReferences: {}
|
||||
# Project share datastore reference.
|
||||
sourceDirectoryDataStore:
|
||||
# AmlCompute details.
|
||||
amlcompute:
|
||||
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
|
||||
vmSize:
|
||||
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
|
||||
vmPriority:
|
||||
# A bool that indicates if the cluster has to be retained after job completion.
|
||||
retainCluster: false
|
||||
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
|
||||
name:
|
||||
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
|
||||
clusterMaxNodeCount: 1
|
||||
1
aml_config/project.json
Normal file
1
aml_config/project.json
Normal file
@@ -0,0 +1 @@
|
||||
{"Id": "local-compute", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/local-compute"}
|
||||
7
ignore/doc-qa/how-to-set-up-training-targets/.amlignore
Normal file
7
ignore/doc-qa/how-to-set-up-training-targets/.amlignore
Normal file
@@ -0,0 +1,7 @@
|
||||
.ipynb_checkpoints
|
||||
azureml-logs
|
||||
.azureml
|
||||
.git
|
||||
outputs
|
||||
azureml-setup
|
||||
docs
|
||||
@@ -0,0 +1,15 @@
|
||||
# Conda environment specification. The dependencies defined in this file will
|
||||
|
||||
# be automatically provisioned for runs with userManagedDependencies=False.
|
||||
|
||||
|
||||
# Details about the Conda environment file format:
|
||||
|
||||
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
|
||||
|
||||
|
||||
name: project_environment
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
@@ -0,0 +1,115 @@
|
||||
# The script to run.
|
||||
script: train.py
|
||||
# The arguments to the script file.
|
||||
arguments: []
|
||||
# The name of the compute target to use for this run.
|
||||
target: local
|
||||
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
|
||||
framework: PySpark
|
||||
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
|
||||
communicator: None
|
||||
# Automatically prepare the run environment as part of the run itself.
|
||||
autoPrepareEnvironment: true
|
||||
# Maximum allowed duration for the run.
|
||||
maxRunDurationSeconds:
|
||||
# Number of nodes to use for running job.
|
||||
nodeCount: 1
|
||||
# Environment details.
|
||||
environment:
|
||||
# Environment variables set for the run.
|
||||
environmentVariables:
|
||||
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
|
||||
# Python details
|
||||
python:
|
||||
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
|
||||
userManagedDependencies: false
|
||||
# The python interpreter path
|
||||
interpreterPath: python
|
||||
# Path to the conda dependencies file to use for this run. If a project
|
||||
# contains multiple programs with different sets of dependencies, it may be
|
||||
# convenient to manage those environments with separate files.
|
||||
condaDependenciesFile: aml_config/conda_dependencies.yml
|
||||
# Docker details
|
||||
docker:
|
||||
# Set True to perform this run inside a Docker container.
|
||||
enabled: true
|
||||
# Base image used for Docker-based runs.
|
||||
baseImage: mcr.microsoft.com/azureml/base:0.2.0
|
||||
# Set False if necessary to work around shared volume bugs.
|
||||
sharedVolumes: true
|
||||
# Run with NVidia Docker extension to support GPUs.
|
||||
gpuSupport: false
|
||||
# Extra arguments to the Docker run command.
|
||||
arguments: []
|
||||
# Image registry that contains the base image.
|
||||
baseImageRegistry:
|
||||
# DNS name or IP address of azure container registry(ACR)
|
||||
address:
|
||||
# The username for ACR
|
||||
username:
|
||||
# The password for ACR
|
||||
password:
|
||||
# Spark details
|
||||
spark:
|
||||
# List of spark repositories.
|
||||
repositories:
|
||||
- https://mmlspark.azureedge.net/maven
|
||||
packages:
|
||||
- group: com.microsoft.ml.spark
|
||||
artifact: mmlspark_2.11
|
||||
version: '0.12'
|
||||
precachePackages: true
|
||||
# Databricks details
|
||||
databricks:
|
||||
# List of maven libraries.
|
||||
mavenLibraries: []
|
||||
# List of PyPi libraries
|
||||
pypiLibraries: []
|
||||
# List of RCran libraries
|
||||
rcranLibraries: []
|
||||
# List of JAR libraries
|
||||
jarLibraries: []
|
||||
# List of Egg libraries
|
||||
eggLibraries: []
|
||||
# History details.
|
||||
history:
|
||||
# Enable history tracking -- this allows status, logs, metrics, and outputs
|
||||
# to be collected for a run.
|
||||
outputCollection: true
|
||||
# whether to take snapshots for history.
|
||||
snapshotProject: true
|
||||
# Spark configuration details.
|
||||
spark:
|
||||
configuration:
|
||||
spark.app.name: Azure ML Experiment
|
||||
spark.yarn.maxAppAttempts: 1
|
||||
# HDI details.
|
||||
hdi:
|
||||
# Yarn deploy mode. Options are cluster and client.
|
||||
yarnDeployMode: cluster
|
||||
# Tensorflow details.
|
||||
tensorflow:
|
||||
# The number of worker tasks.
|
||||
workerCount: 1
|
||||
# The number of parameter server tasks.
|
||||
parameterServerCount: 1
|
||||
# Mpi details.
|
||||
mpi:
|
||||
# When using MPI, number of processes per node.
|
||||
processCountPerNode: 1
|
||||
# data reference configuration details
|
||||
dataReferences: {}
|
||||
# Project share datastore reference.
|
||||
sourceDirectoryDataStore:
|
||||
# AmlCompute details.
|
||||
amlcompute:
|
||||
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
|
||||
vmSize:
|
||||
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
|
||||
vmPriority:
|
||||
# A bool that indicates if the cluster has to be retained after job completion.
|
||||
retainCluster: false
|
||||
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
|
||||
name:
|
||||
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
|
||||
clusterMaxNodeCount: 1
|
||||
@@ -0,0 +1,115 @@
|
||||
# The script to run.
|
||||
script: train.py
|
||||
# The arguments to the script file.
|
||||
arguments: []
|
||||
# The name of the compute target to use for this run.
|
||||
target: local
|
||||
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
|
||||
framework: Python
|
||||
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
|
||||
communicator: None
|
||||
# Automatically prepare the run environment as part of the run itself.
|
||||
autoPrepareEnvironment: true
|
||||
# Maximum allowed duration for the run.
|
||||
maxRunDurationSeconds:
|
||||
# Number of nodes to use for running job.
|
||||
nodeCount: 1
|
||||
# Environment details.
|
||||
environment:
|
||||
# Environment variables set for the run.
|
||||
environmentVariables:
|
||||
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
|
||||
# Python details
|
||||
python:
|
||||
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
|
||||
userManagedDependencies: false
|
||||
# The python interpreter path
|
||||
interpreterPath: python
|
||||
# Path to the conda dependencies file to use for this run. If a project
|
||||
# contains multiple programs with different sets of dependencies, it may be
|
||||
# convenient to manage those environments with separate files.
|
||||
condaDependenciesFile: aml_config/conda_dependencies.yml
|
||||
# Docker details
|
||||
docker:
|
||||
# Set True to perform this run inside a Docker container.
|
||||
enabled: false
|
||||
# Base image used for Docker-based runs.
|
||||
baseImage: mcr.microsoft.com/azureml/base:0.2.0
|
||||
# Set False if necessary to work around shared volume bugs.
|
||||
sharedVolumes: true
|
||||
# Run with NVidia Docker extension to support GPUs.
|
||||
gpuSupport: false
|
||||
# Extra arguments to the Docker run command.
|
||||
arguments: []
|
||||
# Image registry that contains the base image.
|
||||
baseImageRegistry:
|
||||
# DNS name or IP address of azure container registry(ACR)
|
||||
address:
|
||||
# The username for ACR
|
||||
username:
|
||||
# The password for ACR
|
||||
password:
|
||||
# Spark details
|
||||
spark:
|
||||
# List of spark repositories.
|
||||
repositories:
|
||||
- https://mmlspark.azureedge.net/maven
|
||||
packages:
|
||||
- group: com.microsoft.ml.spark
|
||||
artifact: mmlspark_2.11
|
||||
version: '0.12'
|
||||
precachePackages: true
|
||||
# Databricks details
|
||||
databricks:
|
||||
# List of maven libraries.
|
||||
mavenLibraries: []
|
||||
# List of PyPi libraries
|
||||
pypiLibraries: []
|
||||
# List of RCran libraries
|
||||
rcranLibraries: []
|
||||
# List of JAR libraries
|
||||
jarLibraries: []
|
||||
# List of Egg libraries
|
||||
eggLibraries: []
|
||||
# History details.
|
||||
history:
|
||||
# Enable history tracking -- this allows status, logs, metrics, and outputs
|
||||
# to be collected for a run.
|
||||
outputCollection: true
|
||||
# whether to take snapshots for history.
|
||||
snapshotProject: true
|
||||
# Spark configuration details.
|
||||
spark:
|
||||
configuration:
|
||||
spark.app.name: Azure ML Experiment
|
||||
spark.yarn.maxAppAttempts: 1
|
||||
# HDI details.
|
||||
hdi:
|
||||
# Yarn deploy mode. Options are cluster and client.
|
||||
yarnDeployMode: cluster
|
||||
# Tensorflow details.
|
||||
tensorflow:
|
||||
# The number of worker tasks.
|
||||
workerCount: 1
|
||||
# The number of parameter server tasks.
|
||||
parameterServerCount: 1
|
||||
# Mpi details.
|
||||
mpi:
|
||||
# When using MPI, number of processes per node.
|
||||
processCountPerNode: 1
|
||||
# data reference configuration details
|
||||
dataReferences: {}
|
||||
# Project share datastore reference.
|
||||
sourceDirectoryDataStore:
|
||||
# AmlCompute details.
|
||||
amlcompute:
|
||||
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
|
||||
vmSize:
|
||||
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
|
||||
vmPriority:
|
||||
# A bool that indicates if the cluster has to be retained after job completion.
|
||||
retainCluster: false
|
||||
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
|
||||
name:
|
||||
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
|
||||
clusterMaxNodeCount: 1
|
||||
@@ -0,0 +1 @@
|
||||
{"Id": "my-experiment", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/my-experiment"}
|
||||
40
ignore/doc-qa/how-to-set-up-training-targets/donotupload.py
Normal file
40
ignore/doc-qa/how-to-set-up-training-targets/donotupload.py
Normal file
@@ -0,0 +1,40 @@
|
||||
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
|
||||
from azureml.core.compute import ComputeTarget, HDInsightCompute
|
||||
from azureml.exceptions import ComputeTargetException
|
||||
|
||||
try:
|
||||
# if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase
|
||||
attach_config = HDInsightCompute.attach_configuration(address='sheri2-ssh.azurehdinsight.net',
|
||||
ssh_port=22,
|
||||
username='sshuser',
|
||||
password='ChangePassw)rd12')
|
||||
hdi_compute = ComputeTarget.attach(workspace=ws,
|
||||
name='sherihdi2',
|
||||
attach_configuration=attach_config)
|
||||
|
||||
except ComputeTargetException as e:
|
||||
print("Caught = {}".format(e.message))
|
||||
hdi_compute = ComputeTarget(workspace=ws, name='sherihdi')
|
||||
|
||||
|
||||
hdi_compute.wait_for_completion(show_output=True)
|
||||
|
||||
#<run_hdi>
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
|
||||
# use pyspark framework
|
||||
run_hdi = RunConfiguration(framework="pyspark")
|
||||
|
||||
# Set compute target to the HDI cluster
|
||||
run_hdi.target = hdi_compute.name
|
||||
|
||||
# specify CondaDependencies object to ask system installing numpy
|
||||
cd = CondaDependencies()
|
||||
cd.add_conda_package('numpy')
|
||||
run_hdi.environment.python.conda_dependencies = cd
|
||||
#</run_hdi>
|
||||
@@ -1,6 +1,6 @@
|
||||
# Code for Remote virtual machines
|
||||
|
||||
compute_target_name = "attach-dsvm"
|
||||
compute_target_name = "sheri-linuxvm"
|
||||
|
||||
#<run_dsvm>
|
||||
import azureml.core
|
||||
|
||||
27
ignore/doc-qa/how-to-set-up-training-targets/hdi.py
Normal file
27
ignore/doc-qa/how-to-set-up-training-targets/hdi.py
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
|
||||
from azureml.core.compute import ComputeTarget
|
||||
|
||||
# refers to an existing compute resource attached to the workspace!
|
||||
hdi_compute = ComputeTarget(workspace=ws, name='sherihdi')
|
||||
|
||||
|
||||
#<run_hdi>
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
|
||||
# use pyspark framework
|
||||
run_hdi = RunConfiguration(framework="pyspark")
|
||||
|
||||
# Set compute target to the HDI cluster
|
||||
run_hdi.target = hdi_compute.name
|
||||
|
||||
# specify CondaDependencies object to ask system installing numpy
|
||||
cd = CondaDependencies()
|
||||
cd.add_conda_package('numpy')
|
||||
run_hdi.environment.python.conda_dependencies = cd
|
||||
#</run_hdi>
|
||||
print(run_hdi)
|
||||
9
ignore/doc-qa/how-to-set-up-training-targets/mylib.py
Normal file
9
ignore/doc-qa/how-to-set-up-training-targets/mylib.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_alphas():
|
||||
# list of numbers from 0.0 to 1.0 with a 0.05 interval
|
||||
return np.arange(0.0, 1.0, 0.05)
|
||||
52
ignore/doc-qa/how-to-set-up-training-targets/remote.py
Normal file
52
ignore/doc-qa/how-to-set-up-training-targets/remote.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# Code for Remote virtual machines
|
||||
|
||||
compute_target_name = "attach-dsvm"
|
||||
|
||||
#<run_dsvm>
|
||||
import azureml.core
|
||||
from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
run_dsvm = RunConfiguration(framework = "python")
|
||||
|
||||
# Set the compute target to the Linux DSVM
|
||||
run_dsvm.target = compute_target_name
|
||||
|
||||
# Use Docker in the remote VM
|
||||
run_dsvm.environment.docker.enabled = True
|
||||
|
||||
# Use the CPU base image
|
||||
# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE"
|
||||
run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
|
||||
print('Base Docker image is:', run_dsvm.environment.docker.base_image)
|
||||
|
||||
# Prepare the Docker and conda environment automatically when they're used for the first time
|
||||
run_dsvm.prepare_environment = True
|
||||
|
||||
# Specify the CondaDependencies object
|
||||
run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
|
||||
#</run_dsvm>
|
||||
hdi_compute.name = "blah"
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
|
||||
# use pyspark framework
|
||||
hdi_run_config = RunConfiguration(framework="pyspark")
|
||||
|
||||
# Set compute target to the HDI cluster
|
||||
hdi_run_config.target = hdi_compute.name
|
||||
|
||||
# specify CondaDependencies object to ask system installing numpy
|
||||
cd = CondaDependencies()
|
||||
cd.add_conda_package('numpy')
|
||||
hdi_run_config.environment.python.conda_dependencies = cd
|
||||
|
||||
#<run_hdi>
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
# Configure the HDInsight run
|
||||
# Load the runconfig object from the myhdi.runconfig file generated in the previous attach operation
|
||||
run_hdi = RunConfiguration.load(project_object = project, run_name = 'myhdi')
|
||||
|
||||
# Ask the system to prepare the conda environment automatically when it's used for the first time
|
||||
run_hdi.auto_prepare_environment = True>
|
||||
8
ignore/doc-qa/how-to-set-up-training-targets/temp.py
Normal file
8
ignore/doc-qa/how-to-set-up-training-targets/temp.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
|
||||
#<amlcompute_temp>
|
||||
from azureml.core.compute import ComputeTarget, AmlCompute
|
||||
|
||||
# First, list the supported VM families for Azure Machine Learning Compute
|
||||
print(AmlCompute.supported_vmsizes(workspace=ws))
|
||||
45
ignore/doc-qa/how-to-set-up-training-targets/train.py
Normal file
45
ignore/doc-qa/how-to-set-up-training-targets/train.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.linear_model import Ridge
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import train_test_split
|
||||
from azureml.core.run import Run
|
||||
from sklearn.externals import joblib
|
||||
import os
|
||||
import numpy as np
|
||||
import mylib
|
||||
|
||||
os.makedirs('./outputs', exist_ok=True)
|
||||
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||||
test_size=0.2,
|
||||
random_state=0)
|
||||
data = {"train": {"X": X_train, "y": y_train},
|
||||
"test": {"X": X_test, "y": y_test}}
|
||||
|
||||
# list of numbers from 0.0 to 1.0 with a 0.05 interval
|
||||
alphas = mylib.get_alphas()
|
||||
|
||||
for alpha in alphas:
|
||||
# Use Ridge algorithm to create a regression model
|
||||
reg = Ridge(alpha=alpha)
|
||||
reg.fit(data["train"]["X"], data["train"]["y"])
|
||||
|
||||
preds = reg.predict(data["test"]["X"])
|
||||
mse = mean_squared_error(preds, data["test"]["y"])
|
||||
run.log('alpha', alpha)
|
||||
run.log('mse', mse)
|
||||
|
||||
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
|
||||
# save model in the outputs folder so it automatically get uploaded
|
||||
with open(model_file_name, "wb") as file:
|
||||
joblib.dump(value=reg, filename=os.path.join('./outputs/',
|
||||
model_file_name))
|
||||
|
||||
print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))
|
||||
Reference in New Issue
Block a user