diff --git a/configuration.ipynb b/configuration.ipynb index cd34ebf9..555b0c73 100644 --- a/configuration.ipynb +++ b/configuration.ipynb @@ -103,7 +103,7 @@ "source": [ "import azureml.core\n", "\n", - "print(\"This notebook was created using version 1.0.60 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.0.62 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, diff --git a/how-to-use-azureml/automated-machine-learning/automl_env.yml b/how-to-use-azureml/automated-machine-learning/automl_env.yml index 5e280f0c..8114c9d8 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_env.yml +++ b/how-to-use-azureml/automated-machine-learning/automl_env.yml @@ -21,5 +21,6 @@ dependencies: - azureml-train-automl - azureml-widgets - azureml-explain-model + - azureml-contrib-explain-model - pandas_ml diff --git a/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml b/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml index 3a2c2498..36114400 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml +++ b/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml @@ -22,5 +22,6 @@ dependencies: - azureml-train-automl - azureml-widgets - azureml-explain-model + - azureml-contrib-explain-model - pandas_ml diff --git a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb index 64750a56..d13b3bb2 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb @@ -92,8 +92,6 @@ "\n", "# choose a name for experiment\n", "experiment_name = 'automl-classification-bmarketing'\n", - "# project folder\n", - "project_folder = './sample_projects/automl-classification-bankmarketing'\n", "\n", "experiment=Experiment(ws, experiment_name)\n", "\n", @@ -103,7 +101,6 @@ "output['Workspace'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -164,20 +161,7 @@ "source": [ "# Data\n", "\n", - "Here load the data in the get_data() script to be utilized in azure compute. To do this first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_Run_config." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if not os.path.isdir('data'):\n", - " os.mkdir('data')\n", - " \n", - "if not os.path.exists(project_folder):\n", - " os.makedirs(project_folder)" + "Create a run configuration for the remote run." ] }, { @@ -207,7 +191,7 @@ "source": [ "### Load Data\n", "\n", - "Here we create the script to be run in azure comput for loading the data, we load the bank marketing dataset into X_train and y_train. Next X_train and y_train is returned for training the model." + "Load the bank marketing dataset into X_train and y_train. X_train contains the training features, which are inputs to the model. y_train contains the training labels, which are the expected output of the model." ] }, { @@ -240,7 +224,6 @@ "|**n_cross_validations**|Number of cross validation splits.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n", "\n", "**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)" ] @@ -263,7 +246,6 @@ "\n", "automl_config = AutoMLConfig(task = 'classification',\n", " debug_log = 'automl_errors.log',\n", - " path = project_folder,\n", " run_configuration=conda_run_config,\n", " X = X_train,\n", " y = y_train,\n", diff --git a/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb b/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb index 952e9de4..ffcf6261 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb @@ -92,8 +92,6 @@ "\n", "# choose a name for experiment\n", "experiment_name = 'automl-classification-ccard'\n", - "# project folder\n", - "project_folder = './sample_projects/automl-classification-creditcard'\n", "\n", "experiment=Experiment(ws, experiment_name)\n", "\n", @@ -103,7 +101,6 @@ "output['Workspace'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -164,20 +161,7 @@ "source": [ "# Data\n", "\n", - "Here load the data in the get_data script to be utilized in azure compute. To do this, first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_run_config." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if not os.path.isdir('data'):\n", - " os.mkdir('data')\n", - " \n", - "if not os.path.exists(project_folder):\n", - " os.makedirs(project_folder)" + "Create a run configuration for the remote run." ] }, { @@ -207,7 +191,7 @@ "source": [ "### Load Data\n", "\n", - "Here create the script to be run in azure compute for loading the data, load the credit card dataset into cards and store the Class column (y) in the y variable and store the remaining data in the x variable. Next split the data using random_split and return X_train and y_train for training the model." + "Load the credit card dataset into X and y. X contains the features, which are inputs to the model. y contains the labels, which are the expected output of the model. Next split the data using random_split and return X_train and y_train for training the model." ] }, { @@ -241,7 +225,6 @@ "|**n_cross_validations**|Number of cross validation splits.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n", "\n", "**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)" ] @@ -270,8 +253,7 @@ "}\n", "\n", "automl_config = AutoMLConfig(task = 'classification',\n", - " debug_log = 'automl_errors_20190417.log',\n", - " path = project_folder,\n", + " debug_log = 'automl_errors.log',\n", " run_configuration=conda_run_config,\n", " X = X_train,\n", " y = y_train,\n", diff --git a/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb b/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb index 3dd3b13f..930fb4f1 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb @@ -92,8 +92,6 @@ "\n", "# choose a name for experiment\n", "experiment_name = 'automl-classification-deployment'\n", - "# project folder\n", - "project_folder = './sample_projects/automl-classification-deployment'\n", "\n", "experiment=Experiment(ws, experiment_name)\n", "\n", @@ -103,7 +101,6 @@ "output['Workspace'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -126,8 +123,7 @@ "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n", "|**n_cross_validations**|Number of cross validation splits.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", - "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|" + "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|" ] }, { @@ -148,8 +144,7 @@ " iterations = 10,\n", " verbosity = logging.INFO,\n", " X = X_train, \n", - " y = y_train,\n", - " path = project_folder)" + " y = y_train)" ] }, { diff --git a/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb b/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb index 63b71c31..464e4e9d 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb @@ -89,9 +89,8 @@ "source": [ "ws = Workspace.from_config()\n", "\n", - "# Choose a name for the experiment and specify the project folder.\n", + "# Choose a name for the experiment.\n", "experiment_name = 'automl-classification-onnx'\n", - "project_folder = './sample_projects/automl-classification-onnx'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -101,7 +100,6 @@ "output['Workspace Name'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -127,9 +125,7 @@ "X_train, X_test, y_train, y_test = train_test_split(iris.data, \n", " iris.target, \n", " test_size=0.2, \n", - " random_state=0)\n", - "\n", - "\n" + " random_state=0)" ] }, { @@ -170,8 +166,7 @@ "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", - "|**enable_onnx_compatible_models**|Enable the ONNX compatible models in the experiment.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|" + "|**enable_onnx_compatible_models**|Enable the ONNX compatible models in the experiment.|" ] }, { @@ -196,8 +191,7 @@ " X = X_train, \n", " y = y_train,\n", " preprocess=True,\n", - " enable_onnx_compatible_models=True,\n", - " path = project_folder)" + " enable_onnx_compatible_models=True)" ] }, { diff --git a/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb b/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb index 59d6d57e..2f841c92 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb @@ -100,9 +100,8 @@ "source": [ "ws = Workspace.from_config()\n", "\n", - "# Choose a name for the experiment and specify the project folder.\n", + "# Choose a name for the experiment.\n", "experiment_name = 'automl-local-whitelist'\n", - "project_folder = './sample_projects/automl-local-whitelist'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -112,7 +111,6 @@ "output['Workspace Name'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -158,7 +156,6 @@ "|**n_cross_validations**|Number of cross validation splits.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n", "|**whitelist_models**|List of models that AutoML should use. The possible values are listed [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings).|" ] }, @@ -177,8 +174,7 @@ " X = X_train, \n", " y = y_train,\n", " enable_tf=True,\n", - " whitelist_models=whitelist_models,\n", - " path = project_folder)" + " whitelist_models=whitelist_models)" ] }, { diff --git a/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb b/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb index 38a02b26..34a6bf37 100644 --- a/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb @@ -113,9 +113,8 @@ "source": [ "ws = Workspace.from_config()\n", "\n", - "# Choose a name for the experiment and specify the project folder.\n", + "# Choose a name for the experiment.\n", "experiment_name = 'automl-classification'\n", - "project_folder = './sample_projects/automl-classification'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -125,7 +124,6 @@ "output['Workspace Name'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", diff --git a/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb b/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb index 39742e9b..2dd27e1f 100644 --- a/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb +++ b/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb @@ -87,8 +87,6 @@ "\n", "# choose a name for experiment\n", "experiment_name = 'automl-dataset-remote-bai'\n", - "# project folder\n", - "project_folder = './sample_projects/automl-dataprep-remote-bai'\n", " \n", "experiment = Experiment(ws, experiment_name)\n", " \n", @@ -98,7 +96,6 @@ "output['Workspace Name'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -253,7 +250,6 @@ "source": [ "automl_config = AutoMLConfig(task = 'classification',\n", " debug_log = 'automl_errors.log',\n", - " path = project_folder,\n", " run_configuration=conda_run_config,\n", " X = X,\n", " y = y,\n", diff --git a/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb b/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb index 03499dad..89ac30d8 100644 --- a/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb +++ b/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb @@ -87,8 +87,6 @@ " \n", "# choose a name for experiment\n", "experiment_name = 'automl-dataset-local'\n", - "# project folder\n", - "project_folder = './sample_projects/automl-dataset-local'\n", " \n", "experiment = Experiment(ws, experiment_name)\n", " \n", @@ -98,7 +96,6 @@ "output['Workspace Name'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb b/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb index 46d8fdbc..e15f87cd 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb +++ b/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb @@ -97,8 +97,6 @@ "\n", "# choose a name for the run history container in the workspace\n", "experiment_name = 'automl-bikeshareforecasting'\n", - "# project folder\n", - "project_folder = './sample_projects/automl-local-bikeshareforecasting'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -108,7 +106,6 @@ "output['Workspace'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Run History Name'] = experiment_name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -225,7 +222,6 @@ "|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n", "|**n_cross_validations**|Number of cross validation splits.|\n", "|**country_or_region**|The country/region used to generate holiday features. These should be ISO 3166 two-letter country/region codes (i.e. 'US', 'GB').|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. \n", "\n", "This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the iteration_timeout_minutes parameter value to get results." ] @@ -253,8 +249,7 @@ " iteration_timeout_minutes=5,\n", " X=X_train,\n", " y=y_train,\n", - " n_cross_validations=3, \n", - " path=project_folder,\n", + " n_cross_validations=3,\n", " verbosity=logging.INFO,\n", " **automl_settings)" ] diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb index 958bbd96..bf7764e5 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb +++ b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb @@ -93,8 +93,6 @@ "\n", "# choose a name for the run history container in the workspace\n", "experiment_name = 'automl-energydemandforecasting'\n", - "# project folder\n", - "project_folder = './sample_projects/automl-local-energydemandforecasting'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -104,7 +102,6 @@ "output['Workspace'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Run History Name'] = experiment_name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -213,8 +210,7 @@ "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", "|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n", - "|**n_cross_validations**|Number of cross validation splits. Rolling Origin Validation is used to split time-series in a temporally consistent way.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. " + "|**n_cross_validations**|Number of cross validation splits. Rolling Origin Validation is used to split time-series in a temporally consistent way.|" ] }, { @@ -237,7 +233,6 @@ " X=X_train,\n", " y=y_train,\n", " n_cross_validations=3,\n", - " path=project_folder,\n", " verbosity = logging.INFO,\n", " **time_series_settings)" ] @@ -490,7 +485,6 @@ " X=X_train,\n", " y=y_train,\n", " n_cross_validations=3,\n", - " path=project_folder,\n", " verbosity=logging.INFO,\n", " **time_series_settings_with_lags)" ] @@ -558,7 +552,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### What features matter for the forecast?" + "### What features matter for the forecast?\n", + "The following steps will allow you to compute and visualize engineered feature importance based on your test data for forecasting. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Setup the model explanations for AutoML models\n", + "The *fitted_model* can generate the following which will be used for getting the engineered and raw feature explanations using *automl_setup_model_explanations*:-\n", + "1. Featurized data from train samples/test samples \n", + "2. Gather engineered and raw feature name lists\n", + "3. Find the classes in your labeled column in classification scenarios\n", + "\n", + "The *automl_explainer_setup_obj* contains all the structures from above list. " ] }, { @@ -567,14 +575,74 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.train.automl.automlexplainer import explain_model\n", - "\n", - "# feature names are everything in the transformed data except the target\n", - "features = X_trans_lags.columns[:-1]\n", - "expl = explain_model(fitted_model_lags, X_train.copy(), X_test.copy(), features=features, best_run=best_run_lags, y_train=y_train)\n", - "# unpack the tuple\n", - "shap_values, expected_values, feat_overall_imp, feat_names, per_class_summary, per_class_imp = expl\n", - "best_run_lags" + "from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations\n", + "automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, X=X_train.copy(), \n", + " X_test=X_test.copy(), y=y_train, \n", + " task='forecasting')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Initialize the Mimic Explainer for feature importance\n", + "For explaining the AutoML models, use the *MimicWrapper* from *azureml.explain.model* package. The *MimicWrapper* can be initialized with fields in *automl_explainer_setup_obj*, your workspace and a LightGBM model which acts as a surrogate model to explain the AutoML model (*fitted_model* here). The *MimicWrapper* also takes the *best_run* object where the raw and engineered explanations will be uploaded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel\n", + "from azureml.explain.model.mimic_wrapper import MimicWrapper\n", + "explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel, \n", + " init_dataset=automl_explainer_setup_obj.X_transform, run=best_run,\n", + " features=automl_explainer_setup_obj.engineered_feature_names, \n", + " feature_maps=[automl_explainer_setup_obj.feature_map])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use Mimic Explainer for computing and visualizing engineered feature importance\n", + "The *explain()* method in *MimicWrapper* can be called with the transformed test samples to get the feature importance for the generated engineered features. You can also use *ExplanationDashboard* to view the dash board visualization of the feature importance values of the generated engineered features by AutoML featurizers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "engineered_explanations = explainer.explain(['local', 'global'], eval_dataset=automl_explainer_setup_obj.X_test_transform)\n", + "print(engineered_explanations.get_feature_importance_dict())\n", + "from azureml.contrib.explain.model.visualize import ExplanationDashboard\n", + "ExplanationDashboard(engineered_explanations, automl_explainer_setup_obj.automl_estimator, automl_explainer_setup_obj.X_test_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use Mimic Explainer for computing and visualizing raw feature importance\n", + "The *explain()* method in *MimicWrapper* can be again called with the transformed test samples and setting *get_raw* to *True* to get the feature importance for the raw features. You can also use *ExplanationDashboard* to view the dash board visualization of the feature importance values of the raw features." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raw_explanations = explainer.explain(['local', 'global'], get_raw=True, \n", + " raw_feature_names=automl_explainer_setup_obj.raw_feature_names,\n", + " eval_dataset=automl_explainer_setup_obj.X_test_transform)\n", + "print(raw_explanations.get_feature_importance_dict())\n", + "from azureml.contrib.explain.model.visualize import ExplanationDashboard\n", + "ExplanationDashboard(raw_explanations, automl_explainer_setup_obj.automl_pipeline, automl_explainer_setup_obj.X_test_raw)" ] }, { diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.yml b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.yml index 5a2fda3d..693b5f4d 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.yml +++ b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.yml @@ -8,3 +8,4 @@ dependencies: - pandas_ml - statsmodels - azureml-explain-model + - azureml-contrib-explain-model diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb b/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb index 23c13fc9..eec96f7f 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb +++ b/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb @@ -89,8 +89,6 @@ "\n", "# choose a name for the run history container in the workspace\n", "experiment_name = 'automl-ojforecasting'\n", - "# project folder\n", - "project_folder = './sample_projects/automl-local-ojforecasting'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -100,7 +98,6 @@ "output['Workspace'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Run History Name'] = experiment_name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -247,7 +244,6 @@ "|**enable_voting_ensemble**|Allow AutoML to create a Voting ensemble of the best performing models\n", "|**enable_stack_ensemble**|Allow AutoML to create a Stack ensemble of the best performing models\n", "|**debug_log**|Log file path for writing debugging information\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n", "|**time_column_name**|Name of the datetime column in the input data|\n", "|**grain_column_names**|Name(s) of the columns defining individual series in the input data|\n", "|**drop_column_names**|Name(s) of columns to drop prior to modeling|\n", @@ -276,7 +272,6 @@ " n_cross_validations=3,\n", " enable_voting_ensemble=False,\n", " enable_stack_ensemble=False,\n", - " path=project_folder,\n", " verbosity=logging.INFO,\n", " **time_series_settings)" ] diff --git a/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb b/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb index d407d90f..2fee05c3 100644 --- a/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb +++ b/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb @@ -93,7 +93,6 @@ "\n", "# Choose a name for the experiment.\n", "experiment_name = 'automl-local-missing-data'\n", - "project_folder = './sample_projects/automl-local-missing-data'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -103,7 +102,6 @@ "output['Workspace'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -166,8 +164,7 @@ "|**experiment_exit_score**|*double* value indicating the target for *primary_metric*.
Once the target is surpassed the run terminates.|\n", "|**blacklist_models**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.

Allowed values for **Classification**
LogisticRegression
SGD
MultinomialNaiveBayes
BernoulliNaiveBayes
SVM
LinearSVM
KNN
DecisionTree
RandomForest
ExtremeRandomTrees
LightGBM
GradientBoosting
TensorFlowDNN
TensorFlowLinearClassifier

Allowed values for **Regression**
ElasticNet
GradientBoosting
DecisionTree
KNN
LassoLars
SGD
RandomForest
ExtremeRandomTrees
LightGBM
TensorFlowLinearRegressor
TensorFlowDNN|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", - "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|" + "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|" ] }, { @@ -186,8 +183,7 @@ " blacklist_models = ['KNN','LinearSVM'],\n", " verbosity = logging.INFO,\n", " X = X_train, \n", - " y = y_train,\n", - " path = project_folder)" + " y = y_train)" ] }, { diff --git a/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb b/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb index fff6cc0d..58d00ff6 100644 --- a/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb +++ b/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb @@ -69,7 +69,8 @@ "import azureml.core\n", "from azureml.core.experiment import Experiment\n", "from azureml.core.workspace import Workspace\n", - "from azureml.train.automl import AutoMLConfig" + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.core.dataset import Dataset" ] }, { @@ -107,29 +108,42 @@ "## Data" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training Data" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from sklearn import datasets\n", - "\n", - "iris = datasets.load_iris()\n", - "y = iris.target\n", - "X = iris.data\n", - "\n", - "features = iris.feature_names\n", - "\n", - "from sklearn.model_selection import train_test_split\n", - "X_train, X_test, y_train, y_test = train_test_split(X,\n", - " y,\n", - " test_size=0.1,\n", - " random_state=100,\n", - " stratify=y)\n", - "\n", - "X_train = pd.DataFrame(X_train, columns=features)\n", - "X_test = pd.DataFrame(X_test, columns=features)" + "train_data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\"\n", + "train_dataset = Dataset.Tabular.from_delimited_files(train_data)\n", + "X_train = train_dataset.drop_columns(columns=['y']).to_pandas_dataframe()\n", + "y_train = train_dataset.keep_columns(columns=['y'], validate=True).to_pandas_dataframe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_validate.csv\"\n", + "test_dataset = Dataset.Tabular.from_delimited_files(test_data)\n", + "X_test = test_dataset.drop_columns(columns=['y']).to_pandas_dataframe()\n", + "y_test = test_dataset.keep_columns(columns=['y'], validate=True).to_pandas_dataframe()" ] }, { @@ -148,8 +162,6 @@ "|**iterations**|Number of iterations. In each iteration Auto ML trains the data with a specific pipeline|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", - "|**X_valid**|(sparse) array-like, shape = [n_samples, n_features]|\n", - "|**y_valid**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", "|**model_explainability**|Indicate to explain each trained pipeline or not |\n", "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. |" ] @@ -166,10 +178,10 @@ " iteration_timeout_minutes = 200,\n", " iterations = 10,\n", " verbosity = logging.INFO,\n", + " preprocess = True,\n", " X = X_train, \n", " y = y_train,\n", - " X_valid = X_test,\n", - " y_valid = y_test,\n", + " n_cross_validations = 5,\n", " model_explainability=True,\n", " path=project_folder)" ] @@ -197,7 +209,7 @@ "metadata": {}, "outputs": [], "source": [ - "local_run" + "best_run, fitted_model = local_run.get_output()" ] }, { @@ -302,19 +314,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Beside retrieve the existed model explanation information, explain the model with different train/test data" + "### Computing model explanations and visualizing the explanations using azureml-explain-model package\n", + "Beside retrieve the existed model explanation information, explain the model with different train/test data. The following steps will allow you to compute and visualize engineered feature importance and raw feature importance based on your test data. " ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "from azureml.train.automl.automlexplainer import explain_model\n", + "#### Setup the model explanations for AutoML models\n", + "The *fitted_model* can generate the following which will be used for getting the engineered and raw feature explanations using *automl_setup_model_explanations*:-\n", + "1. Featurized data from train samples/test samples \n", + "2. Gather engineered and raw feature name lists\n", + "3. Find the classes in your labeled column in classification scenarios\n", "\n", - "shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \\\n", - " explain_model(fitted_model, X_train, X_test, features=features)" + "The *automl_explainer_setup_obj* contains all the structures from above list. " ] }, { @@ -323,8 +337,116 @@ "metadata": {}, "outputs": [], "source": [ - "print(overall_summary)\n", - "print(overall_imp)" + "from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations\n", + "\n", + "automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, X=X_train, \n", + " X_test=X_test, y=y_train, \n", + " task='classification')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Initialize the Mimic Explainer for feature importance\n", + "For explaining the AutoML models, use the *MimicWrapper* from *azureml.explain.model* package. The *MimicWrapper* can be initialized with fields in *automl_explainer_setup_obj*, your workspace and a LightGBM model which acts as a surrogate model to explain the AutoML model (*fitted_model* here). The *MimicWrapper* also takes the *best_run* object where the raw and engineered explanations will be uploaded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel\n", + "from azureml.explain.model.mimic_wrapper import MimicWrapper\n", + "explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel, \n", + " init_dataset=automl_explainer_setup_obj.X_transform, run=best_run,\n", + " features=automl_explainer_setup_obj.engineered_feature_names, \n", + " feature_maps=[automl_explainer_setup_obj.feature_map],\n", + " classes=automl_explainer_setup_obj.classes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use Mimic Explainer for computing and visualizing engineered feature importance\n", + "The *explain()* method in *MimicWrapper* can be called with the transformed test samples to get the feature importance for the generated engineered features. You can also use *ExplanationDashboard* to view the dash board visualization of the feature importance values of the generated engineered features by AutoML featurizers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "engineered_explanations = explainer.explain(['local', 'global'], eval_dataset=automl_explainer_setup_obj.X_test_transform)\n", + "print(engineered_explanations.get_feature_importance_dict())\n", + "from azureml.contrib.explain.model.visualize import ExplanationDashboard\n", + "ExplanationDashboard(engineered_explanations, automl_explainer_setup_obj.automl_estimator, automl_explainer_setup_obj.X_test_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use Mimic Explainer for computing and visualizing raw feature importance\n", + "The *explain()* method in *MimicWrapper* can be again called with the transformed test samples and setting *get_raw* to *True* to get the feature importance for the raw features. You can also use *ExplanationDashboard* to view the dash board visualization of the feature importance values of the raw features." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raw_explanations = explainer.explain(['local', 'global'], get_raw=True, \n", + " raw_feature_names=automl_explainer_setup_obj.raw_feature_names,\n", + " eval_dataset=automl_explainer_setup_obj.X_test_transform)\n", + "print(raw_explanations.get_feature_importance_dict())\n", + "from azureml.contrib.explain.model.visualize import ExplanationDashboard\n", + "ExplanationDashboard(raw_explanations, automl_explainer_setup_obj.automl_pipeline, automl_explainer_setup_obj.X_test_raw)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download engineered feature importance from artifact store\n", + "You can use *ExplanationClient* to download the engineered feature explanations from the artifact store of the *best_run*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.explain.model._internal.explanation_client import ExplanationClient\n", + "client = ExplanationClient.from_run(best_run)\n", + "engineered_explanations = client.download_model_explanation(raw=False)\n", + "print(engineered_explanations.get_feature_importance_dict())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download raw feature importance from artifact store\n", + "You can use *ExplanationClient* to download the raw feature explanations from the artifact store of the *best_run*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.explain.model._internal.explanation_client import ExplanationClient\n", + "client = ExplanationClient.from_run(best_run)\n", + "raw_explanations = client.download_model_explanation(raw=True)\n", + "print(raw_explanations.get_feature_importance_dict())" ] } ], diff --git a/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.yml b/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.yml index 1c4e89af..2d0c7623 100644 --- a/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.yml +++ b/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.yml @@ -7,3 +7,4 @@ dependencies: - matplotlib - pandas_ml - azureml-explain-model + - azureml-contrib-explain-model diff --git a/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb b/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb index bdf37d20..832902ae 100644 --- a/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb +++ b/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb @@ -87,9 +87,8 @@ "source": [ "ws = Workspace.from_config()\n", "\n", - "# Choose a name for the experiment and specify the project folder.\n", + "# Choose a name for the experiment.\n", "experiment_name = 'automl-regression-concrete'\n", - "project_folder = './sample_projects/automl-regression-concrete'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -99,7 +98,6 @@ "output['Workspace Name'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -160,20 +158,7 @@ "source": [ "# Data\n", "\n", - "Here load the data in the get_data script to be utilized in azure compute. To do this, first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_run_config." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if not os.path.isdir('data'):\n", - " os.mkdir('data')\n", - " \n", - "if not os.path.exists(project_folder):\n", - " os.makedirs(project_folder)" + "Create a run configuration for the remote run." ] }, { @@ -203,7 +188,7 @@ "source": [ "### Load Data\n", "\n", - "Here create the script to be run in azure compute for loading the data, load the concrete strength dataset into the X and y variables. Next, split the data using random_split and return X_train and y_train for training the model. Finally, return X_train and y_train for training the model." + "Load the concrete strength dataset into X and y. X contains the training features, which are inputs to the model. y contains the training labels, which are the expected output of the model." ] }, { @@ -238,7 +223,6 @@ "|**n_cross_validations**|Number of cross validation splits.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", "|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n", "\n", "**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)" ] @@ -268,7 +252,6 @@ "\n", "automl_config = AutoMLConfig(task = 'regression',\n", " debug_log = 'automl.log',\n", - " path = project_folder,\n", " run_configuration=conda_run_config,\n", " X = X_train,\n", " y = y_train,\n", diff --git a/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb b/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb index 84d88ed4..13d7581a 100644 --- a/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb +++ b/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb @@ -87,9 +87,8 @@ "source": [ "ws = Workspace.from_config()\n", "\n", - "# Choose a name for the experiment and specify the project folder.\n", + "# Choose a name for the experiment.\n", "experiment_name = 'automl-regression-hardware'\n", - "project_folder = './sample_projects/automl-remote-regression'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -99,7 +98,6 @@ "output['Workspace Name'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -160,20 +158,7 @@ "source": [ "# Data\n", "\n", - "Here load the data in the get_data script to be utilized in azure compute. To do this, first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_run_config." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if not os.path.isdir('data'):\n", - " os.mkdir('data')\n", - " \n", - "if not os.path.exists(project_folder):\n", - " os.makedirs(project_folder)" + "Create a run configuration for the remote run." ] }, { @@ -203,7 +188,7 @@ "source": [ "### Load Data\n", "\n", - "Here create the script to be run in azure compute for loading the data, load the hardware dataset into the X and y variables. Next split the data using random_split and return X_train and y_train for training the model." + "Load the hardware performance dataset into X and y. X contains the training features, which are inputs to the model. y contains the training labels, which are the expected output of the model." ] }, { @@ -239,7 +224,6 @@ "|**n_cross_validations**|Number of cross validation splits.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", "|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n", "\n", "**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)" ] @@ -268,8 +252,7 @@ "}\n", "\n", "automl_config = AutoMLConfig(task = 'regression',\n", - " debug_log = 'automl_errors_20190417.log',\n", - " path = project_folder,\n", + " debug_log = 'automl_errors.log',\n", " run_configuration=conda_run_config,\n", " X = X_train,\n", " y = y_train,\n", diff --git a/how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb b/how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb index 804e8ff7..56b14d9e 100644 --- a/how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb +++ b/how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb @@ -84,9 +84,8 @@ "source": [ "ws = Workspace.from_config()\n", "\n", - "# Choose a name for the experiment and specify the project folder.\n", + "# Choose a name for the experiment.\n", "experiment_name = 'automl-local-regression'\n", - "project_folder = './sample_projects/automl-local-regression'\n", "\n", "experiment = Experiment(ws, experiment_name)\n", "\n", @@ -96,7 +95,6 @@ "output['Workspace Name'] = ws.name\n", "output['Resource Group'] = ws.resource_group\n", "output['Location'] = ws.location\n", - "output['Project Directory'] = project_folder\n", "output['Experiment Name'] = experiment.name\n", "pd.set_option('display.max_colwidth', -1)\n", "outputDf = pd.DataFrame(data = output, index = [''])\n", @@ -144,8 +142,7 @@ "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n", "|**n_cross_validations**|Number of cross validation splits.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", - "|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n", - "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|" + "|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|" ] }, { @@ -162,8 +159,7 @@ " debug_log = 'automl.log',\n", " verbosity = logging.INFO,\n", " X = X_train, \n", - " y = y_train,\n", - " path = project_folder)" + " y = y_train)" ] }, { diff --git a/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb b/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb index 0f7f20b4..fdac2adf 100644 --- a/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb +++ b/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb @@ -543,7 +543,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.6" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb index 2760c29f..fbec37b1 100644 --- a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb +++ b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb @@ -70,23 +70,11 @@ "If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't.\n", "\n", "\n", - "You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n", - "```\n", - "(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "```\n", - "Or\n", - "\n", - "```\n", - "(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n", - "(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n", - "```\n", - "\n", - "If you are using Jupyter Labs run the following commands instead:\n", + "If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n", + "If you are using Jupyter Labs run the following command:\n", "```\n", "(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", - "(myenv) $ jupyter labextension install microsoft-mli-widget\n", - "```" + "```\n" ] }, { @@ -634,7 +622,7 @@ "# retrieve model for visualization and deployment\n", "from azureml.core.model import Model\n", "from sklearn.externals import joblib\n", - "original_model = Model(ws, 'original_model')\n", + "original_model = Model(ws, 'model_explain_model_on_amlcomp')\n", "model_path = original_model.download(exist_ok=True)\n", "original_model = joblib.load(model_path)" ] diff --git a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/train_explain.py b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/train_explain.py index beefff8e..c38839cc 100644 --- a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/train_explain.py +++ b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/train_explain.py @@ -46,7 +46,8 @@ with open(model_file_name, 'wb') as file: # register the model run.upload_file('original_model.pkl', os.path.join('./outputs/', model_file_name)) -original_model = run.register_model(model_name='original_model', model_path='original_model.pkl') +original_model = run.register_model(model_name='model_explain_model_on_amlcomp', + model_path='original_model.pkl') # Explain predictions on your local machine tabular_explainer = TabularExplainer(model, X_train, features=boston_data.feature_names) diff --git a/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb b/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb index 485f8e71..d8fbce8a 100644 --- a/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb +++ b/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb @@ -60,25 +60,11 @@ "2. Run 'explain_model' with AML Run History, which leverages run history service to store and manage the explanation data\n", "---\n", "\n", - "## Setup\n", - "\n", - "You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n", - "```\n", - "(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "```\n", - "Or\n", - "\n", - "```\n", - "(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n", - "(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n", - "```\n", - "\n", - "If you are using Jupyter Labs run the following commands instead:\n", + "Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n", + "If you are using Jupyter Labs run the following command:\n", "```\n", "(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", - "(myenv) $ jupyter labextension install microsoft-mli-widget\n", - "```" + "```\n" ] }, { diff --git a/how-to-use-azureml/explain-model/azure-integration/scoring-time/score_local_explain.py b/how-to-use-azureml/explain-model/azure-integration/scoring-time/score_local_explain.py new file mode 100644 index 00000000..c102f909 --- /dev/null +++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/score_local_explain.py @@ -0,0 +1,33 @@ +import json +import numpy as np +import pandas as pd +import os +import pickle +from sklearn.externals import joblib +from sklearn.linear_model import LogisticRegression +from azureml.core.model import Model + + +def init(): + + global original_model + global scoring_explainer + + # Retrieve the path to the model file using the model name + # Assume original model is named original_prediction_model + original_model_path = Model.get_model_path('local_deploy_model') + scoring_explainer_path = Model.get_model_path('IBM_attrition_explainer') + + original_model = joblib.load(original_model_path) + scoring_explainer = joblib.load(scoring_explainer_path) + + +def run(raw_data): + # Get predictions and explanations for each data point + data = pd.read_json(raw_data) + # Make prediction + predictions = original_model.predict(data) + # Retrieve model explanations + local_importance_values = scoring_explainer.explain(data) + # You can return any data type as long as it is JSON-serializable + return {'predictions': predictions.tolist(), 'local_importance_values': local_importance_values} diff --git a/how-to-use-azureml/explain-model/azure-integration/scoring-time/score_remote_explain.py b/how-to-use-azureml/explain-model/azure-integration/scoring-time/score_remote_explain.py new file mode 100644 index 00000000..7ffc21b3 --- /dev/null +++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/score_remote_explain.py @@ -0,0 +1,33 @@ +import json +import numpy as np +import pandas as pd +import os +import pickle +from sklearn.externals import joblib +from sklearn.linear_model import LogisticRegression +from azureml.core.model import Model + + +def init(): + + global original_model + global scoring_explainer + + # Retrieve the path to the model file using the model name + # Assume original model is named original_prediction_model + original_model_path = Model.get_model_path('amlcompute_deploy_model') + scoring_explainer_path = Model.get_model_path('IBM_attrition_explainer') + + original_model = joblib.load(original_model_path) + scoring_explainer = joblib.load(scoring_explainer_path) + + +def run(raw_data): + # Get predictions and explanations for each data point + data = pd.read_json(raw_data) + # Make prediction + predictions = original_model.predict(data) + # Retrieve model explanations + local_importance_values = scoring_explainer.explain(data) + # You can return any data type as long as it is JSON-serializable + return {'predictions': predictions.tolist(), 'local_importance_values': local_importance_values} diff --git a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb index 4c7bdd53..37f41b7f 100644 --- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb +++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb @@ -268,7 +268,8 @@ "\n", "# Register original model\n", "run.upload_file('original_model.pkl', os.path.join('./outputs/', model_file_name))\n", - "original_model = run.register_model(model_name='original_model', model_path='original_model.pkl')\n", + "original_model = run.register_model(model_name='local_deploy_model', \n", + " model_path='original_model.pkl')\n", "\n", "# Register scoring explainer\n", "run.upload_file('IBM_attrition_explainer.pkl', 'scoring_explainer.pkl')\n", @@ -383,7 +384,7 @@ "from azureml.core.image import ContainerImage\n", "\n", "# Use the custom scoring, docker, and conda files we created above\n", - "image_config = ContainerImage.image_configuration(execution_script=\"score.py\",\n", + "image_config = ContainerImage.image_configuration(execution_script=\"score_local_explain.py\",\n", " docker_file=\"dockerfile\", \n", " runtime=\"python\", \n", " conda_file=\"myenv.yml\")\n", diff --git a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb index 4432d4f4..33e5d191 100644 --- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb +++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb @@ -309,7 +309,7 @@ "# retrieve model for visualization and deployment\n", "from azureml.core.model import Model\n", "from sklearn.externals import joblib\n", - "original_model = Model(ws, 'original_model')\n", + "original_model = Model(ws, 'amlcompute_deploy_model')\n", "model_path = original_model.download(exist_ok=True)\n", "original_svm_model = joblib.load(model_path)" ] @@ -447,7 +447,7 @@ "from azureml.core.image import ContainerImage\n", "\n", "# Use the custom scoring, docker, and conda files we created above\n", - "image_config = ContainerImage.image_configuration(execution_script=\"score.py\",\n", + "image_config = ContainerImage.image_configuration(execution_script=\"score_remote_explain.py\",\n", " docker_file=\"dockerfile\", \n", " runtime=\"python\", \n", " conda_file=\"myenv.yml\")\n", diff --git a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train_explain.py b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train_explain.py index f46e5ee7..b8fb1bd8 100644 --- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train_explain.py +++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train_explain.py @@ -99,7 +99,8 @@ with open(model_file_name, 'wb') as file: # register the model with the model management service for later use run.upload_file('original_model.pkl', os.path.join(OUTPUT_DIR, model_file_name)) -original_model = run.register_model(model_name='original_model', model_path='original_model.pkl') +original_model = run.register_model(model_name='amlcompute_deploy_model', + model_path='original_model.pkl') # create an explainer to validate or debug the model tabular_explainer = TabularExplainer(model, diff --git a/how-to-use-azureml/explain-model/tabular-data/advanced-feature-transformations-explain-local.ipynb b/how-to-use-azureml/explain-model/tabular-data/advanced-feature-transformations-explain-local.ipynb index 3f3e7469..32364bdb 100644 --- a/how-to-use-azureml/explain-model/tabular-data/advanced-feature-transformations-explain-local.ipynb +++ b/how-to-use-azureml/explain-model/tabular-data/advanced-feature-transformations-explain-local.ipynb @@ -62,24 +62,10 @@ "4. Visualize the global and local explanations with the visualization dashboard.\n", "---\n", "\n", - "## Setup\n", - "\n", - "You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n", - "```\n", - "(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "```\n", - "Or\n", - "\n", - "```\n", - "(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n", - "(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n", - "```\n", - "\n", - "If you are using Jupyter Labs run the following commands instead:\n", + "Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n", + "If you are using Jupyter Labs run the following command:\n", "```\n", "(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", - "(myenv) $ jupyter labextension install microsoft-mli-widget\n", "```\n" ] }, diff --git a/how-to-use-azureml/explain-model/tabular-data/explain-binary-classification-local.ipynb b/how-to-use-azureml/explain-model/tabular-data/explain-binary-classification-local.ipynb index 782e348c..ef7003fa 100644 --- a/how-to-use-azureml/explain-model/tabular-data/explain-binary-classification-local.ipynb +++ b/how-to-use-azureml/explain-model/tabular-data/explain-binary-classification-local.ipynb @@ -59,24 +59,10 @@ "3. Visualize the global and local explanations with the visualization dashboard.\n", "---\n", "\n", - "## Setup\n", - "\n", - "You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n", - "```\n", - "(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "```\n", - "Or\n", - "\n", - "```\n", - "(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n", - "(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n", - "```\n", - "\n", - "If you are using Jupyter Labs run the following commands instead:\n", + "Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n", + "If you are using Jupyter Labs run the following command:\n", "```\n", "(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", - "(myenv) $ jupyter labextension install microsoft-mli-widget\n", "```\n" ] }, diff --git a/how-to-use-azureml/explain-model/tabular-data/explain-multiclass-classification-local.ipynb b/how-to-use-azureml/explain-model/tabular-data/explain-multiclass-classification-local.ipynb index 51f13324..a5d7e7f9 100644 --- a/how-to-use-azureml/explain-model/tabular-data/explain-multiclass-classification-local.ipynb +++ b/how-to-use-azureml/explain-model/tabular-data/explain-multiclass-classification-local.ipynb @@ -60,24 +60,10 @@ "3. Visualize the global and local explanations with the visualization dashboard.\n", "---\n", "\n", - "## Setup\n", - "\n", - "You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n", - "```\n", - "(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "```\n", - "Or\n", - "\n", - "```\n", - "(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n", - "(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n", - "```\n", - "\n", - "If you are using Jupyter Labs run the following commands instead:\n", + "Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n", + "If you are using Jupyter Labs run the following command:\n", "```\n", "(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", - "(myenv) $ jupyter labextension install microsoft-mli-widget\n", "```\n" ] }, diff --git a/how-to-use-azureml/explain-model/tabular-data/explain-regression-local.ipynb b/how-to-use-azureml/explain-model/tabular-data/explain-regression-local.ipynb index da78126f..655c21fe 100644 --- a/how-to-use-azureml/explain-model/tabular-data/explain-regression-local.ipynb +++ b/how-to-use-azureml/explain-model/tabular-data/explain-regression-local.ipynb @@ -59,24 +59,10 @@ "3. Visualize the global and local explanations with the visualization dashboard.\n", "---\n", "\n", - "## Setup\n", - "\n", - "You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n", - "```\n", - "(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "```\n", - "Or\n", - "\n", - "```\n", - "(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n", - "(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n", - "```\n", - "\n", - "If you are using Jupyter Labs run the following commands instead:\n", + "Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n", + "If you are using Jupyter Labs run the following command:\n", "```\n", "(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", - "(myenv) $ jupyter labextension install microsoft-mli-widget\n", "```\n" ] }, diff --git a/how-to-use-azureml/explain-model/tabular-data/simple-feature-transformations-explain-local.ipynb b/how-to-use-azureml/explain-model/tabular-data/simple-feature-transformations-explain-local.ipynb index a7ddecdd..6e4b280f 100644 --- a/how-to-use-azureml/explain-model/tabular-data/simple-feature-transformations-explain-local.ipynb +++ b/how-to-use-azureml/explain-model/tabular-data/simple-feature-transformations-explain-local.ipynb @@ -61,24 +61,10 @@ "4. Visualize the global and local explanations with the visualization dashboard.\n", "---\n", "\n", - "## Setup\n", - "\n", - "You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n", - "```\n", - "(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n", - "```\n", - "Or\n", - "\n", - "```\n", - "(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n", - "(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n", - "```\n", - "\n", - "If you are using Jupyter Labs run the following commands instead:\n", + "Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n", + "If you are using Jupyter Labs run the following command:\n", "```\n", "(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", - "(myenv) $ jupyter labextension install microsoft-mli-widget\n", "```\n" ] }, diff --git a/how-to-use-azureml/machine-learning-pipelines/README.md b/how-to-use-azureml/machine-learning-pipelines/README.md index 2caedc3c..094b21a1 100644 --- a/how-to-use-azureml/machine-learning-pipelines/README.md +++ b/how-to-use-azureml/machine-learning-pipelines/README.md @@ -36,8 +36,7 @@ Azure Machine Learning Pipelines optimize for simplicity, speed, and efficiency. In this directory, there are two types of notebooks: -* The first type of notebooks will introduce you to core Azure Machine Learning Pipelines features. These notebooks below belong in this category, and are designed to go in sequence; they're all located in the "intro-to-pipelines" folder: -Take a look at [intro-to-pipelines](./intro-to-pipelines/) for the list of notebooks that introduce Azure Machine Learning concepts for you. +* The first type of notebooks will introduce you to core Azure Machine Learning Pipelines features. Notebooks in this category are designed to go in sequence; they're all located in the [intro-to-pipelines](./intro-to-pipelines/) folder. * The second type of notebooks illustrate more sophisticated scenarios, and are independent of each other. These notebooks include: diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/README.md b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/README.md index 8bb46a69..6437e363 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/README.md +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/README.md @@ -15,6 +15,7 @@ These notebooks below are designed to go in sequence. 10. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule): Once you publish a Pipeline, you can schedule it to trigger based on an interval or on data change in a defined datastore. 11. [aml-pipelines-with-automated-machine-learning-step.ipynb](https://aka.ms/pl-automl): AutoMLStep in Pipelines shows how you can do automated machine learning using Pipelines. 12. [aml-pipelines-setup-versioned-pipeline-endpoints.ipynb](https://aka.ms/pl-ver-endpoint): This notebook shows how you can setup PipelineEndpoint and submit a Pipeline using the PipelineEndpoint. - +13. [aml-pipelines-showcasing-datapath-and-pipelineparameter.ipynb](https://aka.ms/pl-datapath): This notebook showcases how to use DataPath and PipelineParameter in AML Pipeline. +14. [aml-pipelines-how-to-use-pipeline-drafts.ipynb](http://aka.ms/pl-pl-draft): This notebook shows how to use Pipeline Drafts. Pipeline Drafts are mutable pipelines which can be used to submit runs and create Published Pipelines. ![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/README.png) diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb index a0f413dc..2b5139e0 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb @@ -333,7 +333,7 @@ " policy=early_termination_policy,\n", " primary_metric_name='validation_acc', \n", " primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n", - " max_total_runs=10,\n", + " max_total_runs=4,\n", " max_concurrent_runs=4)" ] }, @@ -441,8 +441,7 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# pipeline_run.wait_for_completion()" + "pipeline_run.wait_for_completion()" ] }, { @@ -459,9 +458,8 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n", - "# num_file_downloaded = metrics_output.download('.', show_progress=True)" + "metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n", + "num_file_downloaded = metrics_output.download('.', show_progress=True)" ] }, { @@ -470,15 +468,14 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# import pandas as pd\n", - "# import json\n", - "# with open(metrics_output._path_on_datastore) as f: \n", - "# metrics_output_result = f.read()\n", + "import pandas as pd\n", + "import json\n", + "with open(metrics_output._path_on_datastore) as f: \n", + " metrics_output_result = f.read()\n", " \n", - "# deserialized_metrics_output = json.loads(metrics_output_result)\n", - "# df = pd.DataFrame(deserialized_metrics_output)\n", - "# df" + "deserialized_metrics_output = json.loads(metrics_output_result)\n", + "df = pd.DataFrame(deserialized_metrics_output)\n", + "df" ] }, { @@ -495,10 +492,9 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# hd_step_run = HyperDriveStepRun(step_run=pipeline_run.find_step_run(hd_step_name)[0])\n", - "# best_run = hd_step_run.get_best_run_by_primary_metric()\n", - "# best_run" + "hd_step_run = HyperDriveStepRun(step_run=pipeline_run.find_step_run(hd_step_name)[0])\n", + "best_run = hd_step_run.get_best_run_by_primary_metric()\n", + "best_run" ] }, { @@ -514,8 +510,7 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# print(best_run.get_file_names())" + "print(best_run.get_file_names())" ] }, { @@ -531,8 +526,7 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# model = best_run.register_model(model_name='tf-dnn-mnist', model_path='outputs/model')" + "model = best_run.register_model(model_name='tf-dnn-mnist', model_path='outputs/model')" ] }, { @@ -596,15 +590,14 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# from azureml.core.runconfig import CondaDependencies\n", + "from azureml.core.runconfig import CondaDependencies\n", "\n", - "# cd = CondaDependencies.create()\n", - "# cd.add_conda_package('numpy')\n", - "# cd.add_tensorflow_conda_package()\n", - "# cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n", + "cd = CondaDependencies.create()\n", + "cd.add_conda_package('numpy')\n", + "cd.add_tensorflow_conda_package()\n", + "cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n", "\n", - "# print(cd.serialize_to_string())" + "print(cd.serialize_to_string())" ] }, { @@ -621,13 +614,12 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# from azureml.core.webservice import AciWebservice\n", + "from azureml.core.webservice import AciWebservice\n", "\n", - "# aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", - "# memory_gb=1, \n", - "# tags={'name':'mnist', 'framework': 'TensorFlow DNN'},\n", - "# description='Tensorflow DNN on MNIST')" + "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", + " memory_gb=1, \n", + " tags={'name':'mnist', 'framework': 'TensorFlow DNN'},\n", + " description='Tensorflow DNN on MNIST')" ] }, { @@ -652,12 +644,11 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# from azureml.core.image import ContainerImage\n", + "from azureml.core.image import ContainerImage\n", "\n", - "# imgconfig = ContainerImage.image_configuration(execution_script=\"score.py\", \n", - "# runtime=\"python\", \n", - "# conda_file=\"myenv.yml\")" + "imgconfig = ContainerImage.image_configuration(execution_script=\"score.py\", \n", + " runtime=\"python\", \n", + " conda_file=\"myenv.yml\")" ] }, { @@ -666,17 +657,16 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# %%time\n", - "# from azureml.core.webservice import Webservice\n", + "%%time\n", + "from azureml.core.webservice import Webservice\n", "\n", - "# service = Webservice.deploy_from_model(workspace=ws,\n", - "# name='tf-mnist-svc',\n", - "# deployment_config=aciconfig,\n", - "# models=[model],\n", - "# image_config=imgconfig)\n", + "service = Webservice.deploy_from_model(workspace=ws,\n", + " name='tf-mnist-svc',\n", + " deployment_config=aciconfig,\n", + " models=[model],\n", + " image_config=imgconfig)\n", "\n", - "# service.wait_for_deployment(show_output=True)" + "service.wait_for_deployment(show_output=True)" ] }, { @@ -692,8 +682,7 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# print(service.get_logs())" + "print(service.get_logs())" ] }, { @@ -709,8 +698,7 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# print(service.scoring_uri)" + "print(service.scoring_uri)" ] }, { @@ -729,37 +717,36 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# import json\n", + "import json\n", "\n", - "# # find 30 random samples from test set\n", - "# n = 30\n", - "# sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n", + "# find 30 random samples from test set\n", + "n = 30\n", + "sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n", "\n", - "# test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n", - "# test_samples = bytes(test_samples, encoding='utf8')\n", + "test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n", + "test_samples = bytes(test_samples, encoding='utf8')\n", "\n", - "# # predict using the deployed model\n", - "# result = service.run(input_data=test_samples)\n", + "# predict using the deployed model\n", + "result = service.run(input_data=test_samples)\n", "\n", - "# # compare actual value vs. the predicted values:\n", - "# i = 0\n", - "# plt.figure(figsize = (20, 1))\n", + "# compare actual value vs. the predicted values:\n", + "i = 0\n", + "plt.figure(figsize = (20, 1))\n", "\n", - "# for s in sample_indices:\n", - "# plt.subplot(1, n, i + 1)\n", - "# plt.axhline('')\n", - "# plt.axvline('')\n", + "for s in sample_indices:\n", + " plt.subplot(1, n, i + 1)\n", + " plt.axhline('')\n", + " plt.axvline('')\n", " \n", - "# # use different color for misclassified sample\n", - "# font_color = 'red' if y_test[s] != result[i] else 'black'\n", - "# clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n", + " # use different color for misclassified sample\n", + " font_color = 'red' if y_test[s] != result[i] else 'black'\n", + " clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n", " \n", - "# plt.text(x=10, y=-10, s=y_hat[s], fontsize=18, color=font_color)\n", - "# plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n", + " plt.text(x=10, y=-10, s=y_hat[s], fontsize=18, color=font_color)\n", + " plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n", " \n", - "# i = i + 1\n", - "# plt.show()" + " i = i + 1\n", + "plt.show()" ] }, { @@ -775,21 +762,20 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# import requests\n", + "import requests\n", "\n", - "# # send a random row from the test set to score\n", - "# random_index = np.random.randint(0, len(X_test)-1)\n", - "# input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n", + "# send a random row from the test set to score\n", + "random_index = np.random.randint(0, len(X_test)-1)\n", + "input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n", "\n", - "# headers = {'Content-Type':'application/json'}\n", + "headers = {'Content-Type':'application/json'}\n", "\n", - "# resp = requests.post(service.scoring_uri, input_data, headers=headers)\n", + "resp = requests.post(service.scoring_uri, input_data, headers=headers)\n", "\n", - "# print(\"POST to url\", service.scoring_uri)\n", - "# print(\"input data:\", input_data)\n", - "# print(\"label:\", y_test[random_index])\n", - "# print(\"prediction:\", resp.text)" + "print(\"POST to url\", service.scoring_uri)\n", + "print(\"input data:\", input_data)\n", + "print(\"label:\", y_test[random_index])\n", + "print(\"prediction:\", resp.text)" ] }, { @@ -808,18 +794,17 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# models = ws.models\n", - "# for name, model in models.items():\n", - "# print(\"Model: {}, ID: {}\".format(name, model.id))\n", + "models = ws.models\n", + "for name, model in models.items():\n", + " print(\"Model: {}, ID: {}\".format(name, model.id))\n", " \n", - "# images = ws.images\n", - "# for name, image in images.items():\n", - "# print(\"Image: {}, location: {}\".format(name, image.image_location))\n", + "images = ws.images\n", + "for name, image in images.items():\n", + " print(\"Image: {}, location: {}\".format(name, image.image_location))\n", " \n", - "# webservices = ws.webservices\n", - "# for name, webservice in webservices.items():\n", - "# print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))" + "webservices = ws.webservices\n", + "for name, webservice in webservices.items():\n", + " print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))" ] }, { @@ -836,15 +821,14 @@ "metadata": {}, "outputs": [], "source": [ - "# PUBLISHONLY\n", - "# service.delete()" + "service.delete()" ] } ], "metadata": { "authors": [ { - "name": "sonnyp" + "name": "sanpil" } ], "kernelspec": { diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.yml b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.yml new file mode 100644 index 00000000..95c1bc95 --- /dev/null +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.yml @@ -0,0 +1,8 @@ +name: aml-pipelines-parameter-tuning-with-hyperdrive +dependencies: +- pip: + - azureml-sdk + - azureml-widgets + - matplotlib + - numpy + - pandas_ml diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.yml b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.yml new file mode 100644 index 00000000..f35bb648 --- /dev/null +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.yml @@ -0,0 +1,5 @@ +name: aml-pipelines-setup-schedule-for-a-published-pipeline +dependencies: +- pip: + - azureml-sdk + - azureml-widgets diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.ipynb index ee0bbb8d..4b956e86 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.ipynb @@ -21,10 +21,10 @@ "source": [ "\n", "# How to Setup a PipelineEndpoint and Submit a Pipeline Using the PipelineEndpoint.\n", - "In this notebook, we will see how to setup a PipelineEndpoint and run specific pipeline version.\n", + "In this notebook, we will see how to setup a PipelineEndpoint and run a specific pipeline version.\n", "\n", - "PipelineEndpoint can be used to update a published pipeline while maintaining same endpoint.\n", - "PipelineEndpoint, provides a way to keep track of [PublishedPipelines](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.publishedpipeline) using versions. PipelineEndpoint uses endpoint with version information to trigger underlying published pipeline. Pipeline endpoints are uniquely named within a workspace. \n" + "PipelineEndpoint can be used to update a published pipeline while maintaining the same endpoint.\n", + "PipelineEndpoint provides a way to keep track of [PublishedPipelines](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.publishedpipeline) using versions. PipelineEndpoint uses endpoint with version information to trigger an underlying published pipeline. Pipeline endpoints are uniquely named within a workspace. \n" ] }, { @@ -433,7 +433,7 @@ "metadata": {}, "outputs": [], "source": [ - "pipeline_endpoint_by_name = PipelineEndpoint.get(workspace=ws, name=\"PipelineEndpointTest\")\n", + "pipeline_endpoint_by_name = PipelineEndpoint.get(workspace=ws, name=\"NewName\")\n", "\n", "# endpoint with id \n", "rest_endpoint_id = pipeline_endpoint_by_name.endpoint\n", @@ -515,11 +515,11 @@ "outputs": [], "source": [ "# submit pipeline with specific version\n", - "run_id = pipeline_endpoint_by_name.submit(\"TestPipelineEndpoint\", pipeline_version=\"0\")\n", + "run_id = pipeline_endpoint_by_name.submit(\"NewName\", pipeline_version=\"0\")\n", "print(run_id)\n", "\n", "# submit pipeline with default version\n", - "run_id = pipeline_endpoint_by_name.submit(\"TestPipelineEndpoint\")\n", + "run_id = pipeline_endpoint_by_name.submit(\"NewName\")\n", "print(run_id)" ] } diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.yml b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.yml new file mode 100644 index 00000000..aae504eb --- /dev/null +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.yml @@ -0,0 +1,6 @@ +name: aml-pipelines-setup-versioned-pipeline-endpoints +dependencies: +- pip: + - azureml-sdk + - azureml-widgets + - requests diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-showcasing-datapath-and-pipelineparameter.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-showcasing-datapath-and-pipelineparameter.ipynb new file mode 100644 index 00000000..903362cf --- /dev/null +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-showcasing-datapath-and-pipelineparameter.ipynb @@ -0,0 +1,479 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved. \n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Showcasing DataPath and PipelineParameter\n", + "\n", + "This notebook demonstrateas the use of [**DataPath**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.datapath.datapath?view=azure-ml-py) and [**PipelineParameters**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelineparameter?view=azure-ml-py) in AML Pipeline. You will learn how strings and [**DataPath**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.datapath.datapath?view=azure-ml-py) can be parameterized and submitted to AML Pipelines via [**PipelineParameters**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelineparameter?view=azure-ml-py).\n", + "To see more about how parameters work between steps, please refer [aml-pipelines-with-data-dependency-steps](https://aka.ms/pl-data-dep).\n", + "\n", + "* [How to create a Pipeline with a DataPath PipelineParameter](#index1)\n", + "* [How to submit a Pipeline with a DataPath PipelineParameter](#index2)\n", + "* [How to submit a Pipeline and change the DataPath PipelineParameter value from the sdk](#index3)\n", + "* [How to submit a Pipeline and change the DataPath PipelineParameter value using a REST call](#index4)\n", + "* [How to create a datastore trigger schedule and use the data_path_parameter_name to get the path of the changed blob in the Pipeline](#index5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Azure Machine Learning and Pipeline SDK-specific imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import azureml.core\n", + "from azureml.core import Workspace, Experiment\n", + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.data.datapath import DataPath, DataPathComputeBinding\n", + "from azureml.widgets import RunDetails\n", + "\n", + "from azureml.pipeline.core import PipelineParameter\n", + "from azureml.pipeline.core import Pipeline, PipelineRun\n", + "from azureml.pipeline.steps import PythonScriptStep\n", + "\n", + "# Check core SDK version number\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration. If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure the config file is present at .\\config.json\n", + "\n", + "If you don't have a config.json file, please go through the configuration Notebook first.\n", + "\n", + "This sets you up with a working config file that has information on your workspace, subscription id, etc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an Azure ML experiment\n", + "\n", + "Let's create an experiment named \"automl-classification\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Choose a name for the run history container in the workspace.\n", + "experiment_name = 'showcasing-datapath'\n", + "source_directory = '.'\n", + "\n", + "experiment = Experiment(ws, experiment_name)\n", + "experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or Attach an AmlCompute cluster\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you get the default `AmlCompute` as your training compute resource." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Choose a name for your cluster.\n", + "amlcompute_cluster_name = \"cpu-cluster\"\n", + "\n", + "found = False\n", + "# Check if this compute target already exists in the workspace.\n", + "cts = ws.compute_targets\n", + "if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n", + " found = True\n", + " print('Found existing compute target.')\n", + " compute_target = cts[amlcompute_cluster_name]\n", + " \n", + "if not found:\n", + " print('Creating a new compute target...')\n", + " provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n", + " #vm_priority = 'lowpriority', # optional\n", + " max_nodes = 4)\n", + "\n", + " # Create the cluster.\n", + " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", + " \n", + " # Can poll for a minimum number of nodes and for a specific timeout.\n", + " # If no min_node_count is provided, it will use the scale settings for the cluster.\n", + " compute_target.wait_for_completion(show_output = True, timeout_in_minutes = 10)\n", + " \n", + " # For a more detailed view of current AmlCompute status, use get_status()." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data and arguments setup \n", + "\n", + "We will setup a trining script to run and its arguments to be used. The sample training script below will print the two arguments to show what has been passed to pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile train_with_datapath.py\n", + "import argparse\n", + "import os\n", + "\n", + "parser = argparse.ArgumentParser(\"train\")\n", + "parser.add_argument(\"--arg1\", type=str, help=\"sample string argument\")\n", + "parser.add_argument(\"--arg2\", type=str, help=\"sample datapath argument\")\n", + "args = parser.parse_args()\n", + "\n", + "print(\"Sample string argument : %s\" % args.arg1)\n", + "print(\"Sample datapath argument: %s\" % args.arg2)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's setup string and DataPath arguments using PipelineParameter. \n", + "\n", + "Note that Pipeline accepts a tuple of the form ([**PipelineParameters**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelineparameter?view=azure-ml-py) , [**DataPathComputeBinding**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.datapath.datapathcomputebinding?view=azure-ml-py)) as an input. DataPath defines the location of input data. DataPathComputeBinding defines how the data is consumed during step execution. The DataPath can be modified at pipeline submission time with a DataPath parameter, while the compute binding does not change. For static data inputs, we use [**DataReference**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.data_reference.datareference?view=azure-ml-py) which defines both the data location and compute binding." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def_blob_store = ws.get_default_datastore()\n", + "print(\"Default datastore's name: {}\".format(def_blob_store.name))\n", + "\n", + "data_path = DataPath(datastore=def_blob_store, path_on_datastore='sample_datapath1')\n", + "datapath1_pipeline_param = PipelineParameter(name=\"input_datapath\", default_value=data_path)\n", + "datapath_input = (datapath1_pipeline_param, DataPathComputeBinding(mode='mount'))\n", + "\n", + "string_pipeline_param = PipelineParameter(name=\"input_string\", default_value='sample_string1')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Pipeline with a DataPath PipelineParameter\n", + "\n", + "Note that the ```datapath_input``` is specified on both arguments and inputs to create a step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_step = PythonScriptStep(\n", + " name='train_step',\n", + " script_name=\"train_with_datapath.py\",\n", + " arguments=[\"--arg1\", string_pipeline_param, \"--arg2\", datapath_input],\n", + " inputs=[datapath_input],\n", + " compute_target=compute_target, \n", + " source_directory=source_directory)\n", + "print(\"train_step created\")\n", + "\n", + "pipeline = Pipeline(workspace=ws, steps=[train_step])\n", + "print(\"pipeline with the train_step created\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit a Pipeline with a DataPath PipelineParameter\n", + "\n", + "Pipelines can be submitted with default values of PipelineParameters by not specifying any parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_run = experiment.submit(pipeline)\n", + "print(\"Pipeline is submitted for execution\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RunDetails(pipeline_run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_run.wait_for_completion()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit a Pipeline and change the DataPath PipelineParameter value from the sdk\n", + "\n", + "Or Pipelines can be submitted with values other than default ones by using pipeline_parameters. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_run_with_params = experiment.submit(pipeline, \\\n", + " pipeline_parameters={'input_datapath': DataPath(datastore=def_blob_store, path_on_datastore='sample_datapath2'),\n", + " 'input_string': 'sample_string2'}) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RunDetails(pipeline_run_with_params).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_run_with_params.wait_for_completion()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit a Pipeline and change the DataPath PipelineParameter value using a REST call\n", + "\n", + "Let's published the pipeline to use the rest endpoint of the published pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "published_pipeline = pipeline.publish(name=\"DataPath_Pipeline\", description=\"Pipeline to test Datapath\", continue_on_step_failure=True)\n", + "published_pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.authentication import InteractiveLoginAuthentication\n", + "import requests\n", + "\n", + "auth = InteractiveLoginAuthentication()\n", + "aad_token = auth.get_authentication_header()\n", + "\n", + "rest_endpoint = published_pipeline.endpoint\n", + "\n", + "print(\"You can perform HTTP POST on URL {} to trigger this pipeline\".format(rest_endpoint))\n", + "\n", + "# specify the param when running the pipeline\n", + "response = requests.post(rest_endpoint, \n", + " headers=aad_token, \n", + " json={\"ExperimentName\": \"MyRestPipeline\",\n", + " \"RunSource\": \"SDK\",\n", + " \"DataPathAssignments\": {\n", + " \"input_datapath\": { \n", + " \"DataStoreName\": def_blob_store.name,\n", + " \"RelativePath\": 'sample_datapath3'\n", + " }\n", + " },\n", + " \"ParameterAssignments\": {\"input_string\": \"sample_string3\"}\n", + " }\n", + " )\n", + "\n", + "run_id = response.json()[\"Id\"]\n", + "print('Submitted pipeline run: ', run_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "published_pipeline_run_via_rest = PipelineRun(ws.experiments[\"MyRestPipeline\"], run_id)\n", + "RunDetails(published_pipeline_run_via_rest).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "published_pipeline_run_via_rest.wait_for_completion()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Datastore trigger schedule and use data path parameter\n", + "\n", + "When the Pipeline is scheduled with DataPath parameter, it will be triggered by the modified or added data in the DataPath. ```path_on_datastore``` should be a folder and the value of the DataPath will be replaced by the path of the modified data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core import Schedule\n", + "\n", + "schedule = Schedule.create(workspace=ws, \n", + " name=\"Datastore_trigger_schedule\",\n", + " pipeline_id=published_pipeline.id, \n", + " experiment_name='Scheduled_Pipeline',\n", + " datastore=def_blob_store,\n", + " wait_for_provisioning=True,\n", + " description=\"Datastore trigger schedule demo\",\n", + " path_on_datastore=\"sample_datapath_for_folder\",\n", + " data_path_parameter_name=\"input_datapath\") #Same name as used above to create PipelineParameter\n", + "\n", + "print(\"Created schedule with id: {}\".format(schedule.id))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "schedule.disable()\n", + "schedule" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "sanpil" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-showcasing-datapath-and-pipelineparameter.yml b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-showcasing-datapath-and-pipelineparameter.yml new file mode 100644 index 00000000..0463f025 --- /dev/null +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-showcasing-datapath-and-pipelineparameter.yml @@ -0,0 +1,5 @@ +name: aml-pipelines-showcasing-datapath-and-pipelineparameter +dependencies: +- pip: + - azureml-sdk + - azureml-widgets diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb index 8131b6e0..3a32e59a 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb @@ -20,7 +20,7 @@ "metadata": {}, "source": [ "# Azure Machine Learning Pipelines with Data Dependency\n", - "In this notebook, we will see how we can build a pipeline with implicit data dependancy." + "In this notebook, we will see how we can build a pipeline with implicit data dependency." ] }, { diff --git a/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb b/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb index fa5d1121..371b8411 100644 --- a/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb @@ -248,7 +248,7 @@ "# Specify CondaDependencies obj, add necessary packages\n", "aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n", " conda_packages=['pandas','scikit-learn'], \n", - " pip_packages=['azureml-sdk', 'azureml-dataprep', 'azureml-train-automl==1.0.33'], \n", + " pip_packages=['azureml-sdk', 'azureml-dataprep', 'azureml-train-automl'], \n", " pin_sdk_version=False)\n", "\n", "print (\"Run configuration created.\")" diff --git a/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/chainer_mnist.py b/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/chainer_mnist.py new file mode 100644 index 00000000..df2d6a6e --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/chainer_mnist.py @@ -0,0 +1,139 @@ + +import argparse +import os + +import numpy as np + +import chainer +from chainer import backend +from chainer import backends +from chainer.backends import cuda +from chainer import Function, gradient_check, report, training, utils, Variable +from chainer import datasets, iterators, optimizers, serializers +from chainer import Link, Chain, ChainList +import chainer.functions as F +import chainer.links as L +from chainer.training import extensions +from chainer.dataset import concat_examples +from chainer.backends.cuda import to_cpu + +from azureml.core.run import Run +run = Run.get_context() + + +class MyNetwork(Chain): + + def __init__(self, n_mid_units=100, n_out=10): + super(MyNetwork, self).__init__() + with self.init_scope(): + self.l1 = L.Linear(None, n_mid_units) + self.l2 = L.Linear(n_mid_units, n_mid_units) + self.l3 = L.Linear(n_mid_units, n_out) + + def forward(self, x): + h = F.relu(self.l1(x)) + h = F.relu(self.l2(h)) + return self.l3(h) + + +def main(): + parser = argparse.ArgumentParser(description='Chainer example: MNIST') + parser.add_argument('--batchsize', '-b', type=int, default=100, + help='Number of images in each mini-batch') + parser.add_argument('--epochs', '-e', type=int, default=20, + help='Number of sweeps over the dataset to train') + parser.add_argument('--output_dir', '-o', default='./outputs', + help='Directory to output the result') + parser.add_argument('--gpu_id', '-g', default=0, + help='ID of the GPU to be used. Set to -1 if you use CPU') + args = parser.parse_args() + + # Download the MNIST data if you haven't downloaded it yet + train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1) + + gpu_id = args.gpu_id + batchsize = args.batchsize + epochs = args.epochs + run.log('Batch size', np.int(batchsize)) + run.log('Epochs', np.int(epochs)) + + train_iter = iterators.SerialIterator(train, batchsize) + test_iter = iterators.SerialIterator(test, batchsize, + repeat=False, shuffle=False) + + model = MyNetwork() + + if gpu_id >= 0: + # Make a specified GPU current + chainer.backends.cuda.get_device_from_id(0).use() + model.to_gpu() # Copy the model to the GPU + + # Choose an optimizer algorithm + optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9) + + # Give the optimizer a reference to the model so that it + # can locate the model's parameters. + optimizer.setup(model) + + while train_iter.epoch < epochs: + # ---------- One iteration of the training loop ---------- + train_batch = train_iter.next() + image_train, target_train = concat_examples(train_batch, gpu_id) + + # Calculate the prediction of the network + prediction_train = model(image_train) + + # Calculate the loss with softmax_cross_entropy + loss = F.softmax_cross_entropy(prediction_train, target_train) + + # Calculate the gradients in the network + model.cleargrads() + loss.backward() + + # Update all the trainable parameters + optimizer.update() + # --------------------- until here --------------------- + + # Check the validation accuracy of prediction after every epoch + if train_iter.is_new_epoch: # If this iteration is the final iteration of the current epoch + + # Display the training loss + print('epoch:{:02d} train_loss:{:.04f} '.format( + train_iter.epoch, float(to_cpu(loss.array))), end='') + + test_losses = [] + test_accuracies = [] + while True: + test_batch = test_iter.next() + image_test, target_test = concat_examples(test_batch, gpu_id) + + # Forward the test data + prediction_test = model(image_test) + + # Calculate the loss + loss_test = F.softmax_cross_entropy(prediction_test, target_test) + test_losses.append(to_cpu(loss_test.array)) + + # Calculate the accuracy + accuracy = F.accuracy(prediction_test, target_test) + accuracy.to_cpu() + test_accuracies.append(accuracy.array) + + if test_iter.is_new_epoch: + test_iter.epoch = 0 + test_iter.current_position = 0 + test_iter.is_new_epoch = False + test_iter._pushed_position = None + break + + val_accuracy = np.mean(test_accuracies) + print('val_loss:{:.04f} val_accuracy:{:.04f}'.format( + np.mean(test_losses), val_accuracy)) + + run.log("Accuracy", np.float(val_accuracy)) + + serializers.save_npz(os.path.join(args.output_dir, 'model.npz'), model) + + +if __name__ == '__main__': + main() diff --git a/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/chainer_score.py b/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/chainer_score.py new file mode 100644 index 00000000..f6ec3a6c --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/chainer_score.py @@ -0,0 +1,45 @@ +import numpy as np +import os +import json + +from chainer import serializers, using_config, Variable, datasets +import chainer.functions as F +import chainer.links as L +from chainer import Chain + +from azureml.core.model import Model + + +class MyNetwork(Chain): + + def __init__(self, n_mid_units=100, n_out=10): + super(MyNetwork, self).__init__() + with self.init_scope(): + self.l1 = L.Linear(None, n_mid_units) + self.l2 = L.Linear(n_mid_units, n_mid_units) + self.l3 = L.Linear(n_mid_units, n_out) + + def forward(self, x): + h = F.relu(self.l1(x)) + h = F.relu(self.l2(h)) + return self.l3(h) + + +def init(): + global model + + model_root = Model.get_model_path('chainer-dnn-mnist') + + # Load our saved artifacts + model = MyNetwork() + serializers.load_npz(model_root, model) + + +def run(input_data): + i = np.array(json.loads(input_data)['data']) + + _, test = datasets.get_mnist() + x = Variable(np.asarray([test[i][0]])) + y = model(x) + + return np.ndarray.tolist(y.data.argmax(axis=1)) diff --git a/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.ipynb b/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.ipynb new file mode 100644 index 00000000..db24ffee --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.ipynb @@ -0,0 +1,725 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved. \n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train and hyperparameter tune with Chainer\n", + "\n", + "In this tutorial, we demonstrate how to use the Azure ML Python SDK to train a Convolutional Neural Network (CNN) on a single-node GPU with Chainer to perform handwritten digit recognition on the popular MNIST dataset. We will also demonstrate how to perform hyperparameter tuning of the model using Azure ML's HyperDrive service." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!jupyter nbextension install --py --user azureml.widgets\n", + "!jupyter nbextension enable --py --user azureml.widgets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Diagnostics" + ] + }, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or Attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource.\n", + "\n", + "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n", + "\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"gpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target.')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n", + " min_nodes=2,\n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " compute_target.wait_for_completion(show_output=True)\n", + "\n", + "# use get_status() to get a detailed status for the current cluster. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code creates a GPU cluster. If you instead want to create a CPU cluster, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train model on the remote compute\n", + "Now that you have your data and training script prepared, you are ready to train on your remote compute cluster. You can take advantage of Azure compute to leverage GPUs to cut down your training time. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a project directory\n", + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "project_folder = './chainer-mnist'\n", + "os.makedirs(project_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare training script\n", + "Now you will need to create your training script. In this tutorial, the training script is already provided for you at `chainer_mnist.py`. In practice, you should be able to take any custom training script as is and run it with Azure ML without having to modify your code.\n", + "\n", + "However, if you would like to use Azure ML's [tracking and metrics](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#metrics) capabilities, you will have to add a small amount of Azure ML code inside your training script. \n", + "\n", + "In `chainer_mnist.py`, we will log some metrics to our Azure ML run. To do so, we will access the Azure ML `Run` object within the script:\n", + "```Python\n", + "from azureml.core.run import Run\n", + "run = Run.get_context()\n", + "```\n", + "Further within `chainer_mnist.py`, we log the batchsize and epochs parameters, and the highest accuracy the model achieves:\n", + "```Python\n", + "run.log('Batch size', np.int(args.batchsize))\n", + "run.log('Epochs', np.int(args.epochs))\n", + "\n", + "run.log('Accuracy', np.float(val_accuracy))\n", + "```\n", + "These run metrics will become particularly important when we begin hyperparameter tuning our model in the \"Tune model hyperparameters\" section." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once your script is ready, copy the training script `chainer_mnist.py` into your project directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.copy('chainer_mnist.py', project_folder)\n", + "shutil.copy('chainer_score.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an experiment\n", + "Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this Chainer tutorial. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "experiment_name = 'chainer-mnist'\n", + "experiment = Experiment(ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a Chainer estimator\n", + "The Azure ML SDK's Chainer estimator enables you to easily submit Chainer training jobs for both single-node and distributed runs. The following code will define a single-node Chainer job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "dnn-chainer-remarks-sample" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.dnn import Chainer\n", + "\n", + "script_params = {\n", + " '--epochs': 10,\n", + " '--batchsize': 128,\n", + " '--output_dir': './outputs'\n", + "}\n", + "\n", + "estimator = Chainer(source_directory=project_folder, \n", + " script_params=script_params,\n", + " compute_target=compute_target,\n", + " pip_packages=['numpy', 'pytest'],\n", + " entry_script='chainer_mnist.py',\n", + " use_gpu=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`. To leverage the Azure VM's GPU for training, we set `use_gpu=True`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job\n", + "Run your experiment by submitting your estimator object. Note that this call is asynchronous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(estimator)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor your run\n", + "You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# to get more details of your run\n", + "print(run.get_details())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tune model hyperparameters\n", + "Now that we've seen how to do a simple Chainer training run using the SDK, let's see if we can further improve the accuracy of our model. We can optimize our model's hyperparameters using Azure Machine Learning's hyperparameter tuning capabilities." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Start a hyperparameter sweep\n", + "First, we will define the hyperparameter space to sweep over. Let's tune the batch size and epochs parameters. In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, accuracy.\n", + "\n", + "Then, we specify the early termination policy to use to early terminate poorly performing runs. Here we use the `BanditPolicy`, which will terminate any run that doesn't fall within the slack factor of our primary evaluation metric. In this tutorial, we will apply this policy every epoch (since we report our `Accuracy` metric every epoch and `evaluation_interval=1`). Notice we will delay the first policy evaluation until after the first `3` epochs (`delay_evaluation=3`).\n", + "Refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-tune-hyperparameters#specify-an-early-termination-policy) for more information on the BanditPolicy and other policies available." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.hyperdrive.runconfig import HyperDriveConfig\n", + "from azureml.train.hyperdrive.sampling import RandomParameterSampling\n", + "from azureml.train.hyperdrive.policy import BanditPolicy\n", + "from azureml.train.hyperdrive.run import PrimaryMetricGoal\n", + "from azureml.train.hyperdrive.parameter_expressions import choice\n", + " \n", + "\n", + "param_sampling = RandomParameterSampling( {\n", + " \"--batchsize\": choice(128, 256),\n", + " \"--epochs\": choice(5, 10, 20, 40)\n", + " }\n", + ")\n", + "\n", + "hyperdrive_config = HyperDriveConfig(estimator=estimator,\n", + " hyperparameter_sampling=param_sampling, \n", + " primary_metric_name='Accuracy',\n", + " policy=BanditPolicy(evaluation_interval=1, slack_factor=0.1, delay_evaluation=3),\n", + " primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n", + " max_total_runs=8,\n", + " max_concurrent_runs=4)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, lauch the hyperparameter tuning job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# start the HyperDrive run\n", + "hyperdrive_run = experiment.submit(hyperdrive_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor HyperDrive runs\n", + "You can monitor the progress of the runs with the following Jupyter widget. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RunDetails(hyperdrive_run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hyperdrive_run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find and register best model\n", + "When all jobs finish, we can find out the one that has the highest accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run = hyperdrive_run.get_best_run_by_primary_metric()\n", + "print(best_run.get_details()['runDefinition']['arguments'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's list the model files uploaded during the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(best_run.get_file_names())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can then register the folder (and all files in it) as a model named `chainer-dnn-mnist` under the workspace for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = best_run.register_model(model_name='chainer-dnn-mnist', model_path='outputs/model.npz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deploy the model in ACI\n", + "Now, we are ready to deploy the model as a web service running in Azure Container Instance, [ACI](https://azure.microsoft.com/en-us/services/container-instances/). Azure Machine Learning accomplishes this by constructing a Docker image with the scoring logic and model baked in.\n", + "\n", + "### Create scoring script\n", + "First, we will create a scoring script that will be invoked by the web service call.\n", + "+ Now that the scoring script must have two required functions, `init()` and `run(input_data)`.\n", + " + In `init()`, you typically load the model into a global object. This function is executed only once when the Docker contianer is started.\n", + " + In `run(input_data)`, the model is used to predict a value based on the input data. The input and output to `run` uses NPZ as the serialization and de-serialization format because it is the preferred format for Chainer, but you are not limited to it.\n", + " \n", + "Refer to the scoring script `chainer_score.py` for this tutorial. Our web service will use this file to predict. When writing your own scoring script, don't forget to test it locally first before you go and deploy the web service." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shutil.copy('chainer_score.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create myenv.yml\n", + "We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify conda packages `numpy` and `chainer`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import CondaDependencies\n", + "\n", + "cd = CondaDependencies.create()\n", + "cd.add_conda_package('numpy')\n", + "cd.add_conda_package('chainer')\n", + "cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n", + "\n", + "print(cd.serialize_to_string())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deploy to ACI\n", + "We are almost ready to deploy. Create the inference configuration and deployment configuration and deploy to ACI. This cell will run for about 7-8 minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.webservice import AciWebservice\n", + "from azureml.core.model import InferenceConfig\n", + "from azureml.core.webservice import Webservice\n", + "from azureml.core.model import Model\n", + "\n", + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"chainer_score.py\",\n", + " conda_file=\"myenv.yml\")\n", + "\n", + "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n", + " auth_enabled=True, # this flag generates API keys to secure access\n", + " memory_gb=1,\n", + " tags={'name': 'mnist', 'framework': 'Chainer'},\n", + " description='Chainer DNN with MNIST')\n", + "\n", + "service = Model.deploy(workspace=ws, \n", + " name='chainer-mnist-1', \n", + " models=[model], \n", + " inference_config=inference_config, \n", + " deployment_config=aciconfig)\n", + "service.wait_for_deployment(True)\n", + "print(service.state)\n", + "print(service.scoring_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:** `print(service.get_logs())`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is the scoring web service endpoint: `print(service.scoring_uri)`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test the deployed model\n", + "Let's test the deployed model. Pick a random sample from the test set, and send it to the web service hosted in ACI for a prediction. Note, here we are using the an HTTP request to invoke the service.\n", + "\n", + "We can retrieve the API keys used for accessing the HTTP endpoint and construct a raw HTTP request to send to the service. Don't forget to add key to the HTTP header." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# retreive the API keys. two keys were generated.\n", + "key1, Key2 = service.get_keys()\n", + "print(key1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import urllib\n", + "import gzip\n", + "import numpy as np\n", + "import struct\n", + "import requests\n", + "\n", + "\n", + "# load compressed MNIST gz files and return numpy arrays\n", + "def load_data(filename, label=False):\n", + " with gzip.open(filename) as gz:\n", + " struct.unpack('I', gz.read(4))\n", + " n_items = struct.unpack('>I', gz.read(4))\n", + " if not label:\n", + " n_rows = struct.unpack('>I', gz.read(4))[0]\n", + " n_cols = struct.unpack('>I', gz.read(4))[0]\n", + " res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)\n", + " res = res.reshape(n_items[0], n_rows * n_cols)\n", + " else:\n", + " res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)\n", + " res = res.reshape(n_items[0], 1)\n", + " return res\n", + "\n", + "os.makedirs('./data/mnist', exist_ok=True)\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')\n", + "\n", + "X_test = load_data('./data/mnist/test-images.gz', False)\n", + "y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n", + "\n", + "\n", + "# send a random row from the test set to score\n", + "random_index = np.random.randint(0, len(X_test)-1)\n", + "input_data = \"{\\\"data\\\": [\" + str(random_index) + \"]}\"\n", + "\n", + "headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n", + "\n", + "# send sample to service for scoring\n", + "resp = requests.post(service.scoring_uri, input_data, headers=headers)\n", + "\n", + "print(\"label:\", y_test[random_index])\n", + "print(\"prediction:\", resp.text[1])\n", + "\n", + "plt.imshow(X_test[random_index].reshape((28,28)), cmap='gray')\n", + "plt.axis('off')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's look at the workspace after the web service was deployed. You should see\n", + "\n", + " + a registered model named 'chainer-dnn-mnist' and with the id 'chainer-dnn-mnist:1'\n", + " + a webservice called 'chainer-mnist-svc' with some scoring URL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "models = ws.models\n", + "for name, model in models.items():\n", + " print(\"Model: {}, ID: {}\".format(name, model.id))\n", + " \n", + "webservices = ws.webservices\n", + "for name, webservice in webservices.items():\n", + " print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can delete the ACI deployment with a simple delete API call." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "service.delete()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "dipeck" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + }, + "msauthor": "dipeck" + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.yml b/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.yml new file mode 100644 index 00000000..6024bba0 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.yml @@ -0,0 +1,12 @@ +name: train-hyperparameter-tune-deploy-with-chainer +dependencies: +- pip: + - azureml-sdk + - azureml-widgets + - numpy + - matplotlib + - json + - urllib + - gzip + - struct + - requests diff --git a/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/distributed-chainer.ipynb b/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/distributed-chainer.ipynb new file mode 100644 index 00000000..012cc9d2 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/distributed-chainer.ipynb @@ -0,0 +1,320 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/distributed-chainer.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distributed Chainer\n", + "In this tutorial, you will run a Chainer training example on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using ChainerMN distributed training across a GPU cluster." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Diagnostics" + ] + }, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n", + "\n", + "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n", + "\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"gpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target.')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " compute_target.wait_for_completion(show_output=True)\n", + "\n", + "# use get_status() to get a detailed status for the current AmlCompute. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train model on the remote compute\n", + "Now that we have the AmlCompute ready to go, let's run our distributed training job." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a project directory\n", + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "project_folder = './chainer-distr'\n", + "os.makedirs(project_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare training script\n", + "Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `train_mnist.py`. In practice, you should be able to take any custom Chainer training script as is and run it with Azure ML without having to modify your code." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once your script is ready, copy the training script `train_mnist.py` into the project directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.copy('train_mnist.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an experiment\n", + "Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed Chainer tutorial. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "experiment_name = 'chainer-distr'\n", + "experiment = Experiment(ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a Chainer estimator\n", + "The Azure ML SDK's Chainer estimator enables you to easily submit Chainer training jobs for both single-node and distributed runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import Chainer, Mpi\n", + "\n", + "estimator = Chainer(source_directory=project_folder,\n", + " compute_target=compute_target,\n", + " entry_script='train_mnist.py',\n", + " node_count=2,\n", + " distributed_training=Mpi(),\n", + " use_gpu=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI, you must provide the argument `distributed_backend=Mpi()`. To specify `i` workers per node, you must provide the argument `distributed_backend=Mpi(process_count_per_node=i)`.Using this estimator with these settings, Chainer and its dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `Chainer` constructor's `pip_packages` or `conda_packages` parameters." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job\n", + "Run your experiment by submitting your estimator object. Note that this call is asynchronous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor your run\n", + "You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True)" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "ninhu" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/distributed-chainer.yml b/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/distributed-chainer.yml new file mode 100644 index 00000000..0c2ef761 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/distributed-chainer.yml @@ -0,0 +1,5 @@ +name: distributed-chainer +dependencies: +- pip: + - azureml-sdk + - azureml-widgets diff --git a/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/train_mnist.py b/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/train_mnist.py new file mode 100644 index 00000000..29c77f2d --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/train_mnist.py @@ -0,0 +1,125 @@ +# Official ChainerMN example taken from +# https://github.com/chainer/chainer/blob/master/examples/chainermn/mnist/train_mnist.py + +from __future__ import print_function + +import argparse + +import chainer +import chainer.functions as F +import chainer.links as L +from chainer import training +from chainer.training import extensions + +import chainermn + + +class MLP(chainer.Chain): + + def __init__(self, n_units, n_out): + super(MLP, self).__init__( + # the size of the inputs to each layer will be inferred + l1=L.Linear(784, n_units), # n_in -> n_units + l2=L.Linear(n_units, n_units), # n_units -> n_units + l3=L.Linear(n_units, n_out), # n_units -> n_out + ) + + def __call__(self, x): + h1 = F.relu(self.l1(x)) + h2 = F.relu(self.l2(h1)) + return self.l3(h2) + + +def main(): + parser = argparse.ArgumentParser(description='ChainerMN example: MNIST') + parser.add_argument('--batchsize', '-b', type=int, default=100, + help='Number of images in each mini-batch') + parser.add_argument('--communicator', type=str, + default='non_cuda_aware', help='Type of communicator') + parser.add_argument('--epoch', '-e', type=int, default=20, + help='Number of sweeps over the dataset to train') + parser.add_argument('--gpu', '-g', default=True, + help='Use GPU') + parser.add_argument('--out', '-o', default='result', + help='Directory to output the result') + parser.add_argument('--resume', '-r', default='', + help='Resume the training from snapshot') + parser.add_argument('--unit', '-u', type=int, default=1000, + help='Number of units') + args = parser.parse_args() + + # Prepare ChainerMN communicator. + + if args.gpu: + if args.communicator == 'naive': + print("Error: 'naive' communicator does not support GPU.\n") + exit(-1) + comm = chainermn.create_communicator(args.communicator) + device = comm.intra_rank + else: + if args.communicator != 'naive': + print('Warning: using naive communicator ' + 'because only naive supports CPU-only execution') + comm = chainermn.create_communicator('naive') + device = -1 + + if comm.rank == 0: + print('==========================================') + print('Num process (COMM_WORLD): {}'.format(comm.size)) + if args.gpu: + print('Using GPUs') + print('Using {} communicator'.format(args.communicator)) + print('Num unit: {}'.format(args.unit)) + print('Num Minibatch-size: {}'.format(args.batchsize)) + print('Num epoch: {}'.format(args.epoch)) + print('==========================================') + + model = L.Classifier(MLP(args.unit, 10)) + if device >= 0: + chainer.cuda.get_device_from_id(device).use() + model.to_gpu() + + # Create a multi node optimizer from a standard Chainer optimizer. + optimizer = chainermn.create_multi_node_optimizer( + chainer.optimizers.Adam(), comm) + optimizer.setup(model) + + # Split and distribute the dataset. Only worker 0 loads the whole dataset. + # Datasets of worker 0 are evenly split and distributed to all workers. + if comm.rank == 0: + train, test = chainer.datasets.get_mnist() + else: + train, test = None, None + train = chainermn.scatter_dataset(train, comm, shuffle=True) + test = chainermn.scatter_dataset(test, comm, shuffle=True) + + train_iter = chainer.iterators.SerialIterator(train, args.batchsize) + test_iter = chainer.iterators.SerialIterator(test, args.batchsize, + repeat=False, shuffle=False) + + updater = training.StandardUpdater(train_iter, optimizer, device=device) + trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) + + # Create a multi node evaluator from a standard Chainer evaluator. + evaluator = extensions.Evaluator(test_iter, model, device=device) + evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) + trainer.extend(evaluator) + + # Some display and output extensions are necessary only for one worker. + # (Otherwise, there would just be repeated outputs.) + if comm.rank == 0: + trainer.extend(extensions.dump_graph('main/loss')) + trainer.extend(extensions.LogReport()) + trainer.extend(extensions.PrintReport( + ['epoch', 'main/loss', 'validation/main/loss', + 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) + trainer.extend(extensions.ProgressBar()) + + if args.resume: + chainer.serializers.load_npz(args.resume, trainer) + + trainer.run() + + +if __name__ == '__main__': + main() diff --git a/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/pytorch_score.py b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/pytorch_score.py new file mode 100644 index 00000000..5df2d8dc --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/pytorch_score.py @@ -0,0 +1,31 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. + +import torch +import torch.nn as nn +from torchvision import transforms +import json + +from azureml.core.model import Model + + +def init(): + global model + model_path = Model.get_model_path('pytorch-birds') + model = torch.load(model_path, map_location=lambda storage, loc: storage) + model.eval() + + +def run(input_data): + input_data = torch.tensor(json.loads(input_data)['data']) + + # get prediction + with torch.no_grad(): + output = model(input_data) + classes = ['chicken', 'turkey'] + softmax = nn.Softmax(dim=1) + pred_probs = softmax(output).numpy()[0] + index = torch.argmax(output, 1) + + result = {"label": classes[index], "probability": str(pred_probs[index])} + return result diff --git a/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/pytorch_train.py b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/pytorch_train.py new file mode 100644 index 00000000..733c9a22 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/pytorch_train.py @@ -0,0 +1,206 @@ +# Copyright (c) 2017, PyTorch contributors +# Modifications copyright (C) Microsoft Corporation +# Licensed under the BSD license +# Adapted from https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html + +from __future__ import print_function, division +import torch +import torch.nn as nn +import torch.optim as optim +from torch.optim import lr_scheduler +from torchvision import datasets, models, transforms +import numpy as np +import time +import os +import copy +import argparse + +from azureml.core.run import Run +# get the Azure ML run object +run = Run.get_context() + + +def load_data(data_dir): + """Load the train/val data.""" + + # Data augmentation and normalization for training + # Just normalization for validation + data_transforms = { + 'train': transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]), + 'val': transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]), + } + + image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), + data_transforms[x]) + for x in ['train', 'val']} + dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, + shuffle=True, num_workers=4) + for x in ['train', 'val']} + dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} + class_names = image_datasets['train'].classes + + return dataloaders, dataset_sizes, class_names + + +def train_model(model, criterion, optimizer, scheduler, num_epochs, data_dir): + """Train the model.""" + + # load training/validation data + dataloaders, dataset_sizes, class_names = load_data(data_dir) + + device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + + since = time.time() + + best_model_wts = copy.deepcopy(model.state_dict()) + best_acc = 0.0 + + for epoch in range(num_epochs): + print('Epoch {}/{}'.format(epoch, num_epochs - 1)) + print('-' * 10) + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + scheduler.step() + model.train() # Set model to training mode + else: + model.eval() # Set model to evaluate mode + + running_loss = 0.0 + running_corrects = 0 + + # Iterate over data. + for inputs, labels in dataloaders[phase]: + inputs = inputs.to(device) + labels = labels.to(device) + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model.state_dict()) + + # log the best val accuracy to AML run + run.log('best_val_acc', np.float(best_acc)) + + print() + + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + + # load best model weights + model.load_state_dict(best_model_wts) + return model + + +def fine_tune_model(num_epochs, data_dir, learning_rate, momentum): + """Load a pretrained model and reset the final fully connected layer.""" + + # log the hyperparameter metrics to the AML run + run.log('lr', np.float(learning_rate)) + run.log('momentum', np.float(momentum)) + + model_ft = models.resnet18(pretrained=True) + num_ftrs = model_ft.fc.in_features + model_ft.fc = nn.Linear(num_ftrs, 2) # only 2 classes to predict + + device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + model_ft = model_ft.to(device) + + criterion = nn.CrossEntropyLoss() + + # Observe that all parameters are being optimized + optimizer_ft = optim.SGD(model_ft.parameters(), + lr=learning_rate, momentum=momentum) + + # Decay LR by a factor of 0.1 every 7 epochs + exp_lr_scheduler = lr_scheduler.StepLR( + optimizer_ft, step_size=7, gamma=0.1) + + model = train_model(model_ft, criterion, optimizer_ft, + exp_lr_scheduler, num_epochs, data_dir) + + return model + + +def download_data(): + """Download and extract the training data.""" + import urllib + from zipfile import ZipFile + # download data + data_file = './fowl_data.zip' + download_url = 'https://msdocsdatasets.blob.core.windows.net/pytorchfowl/fowl_data.zip' + urllib.request.urlretrieve(download_url, filename=data_file) + + # extract files + with ZipFile(data_file, 'r') as zip: + print('extracting files...') + zip.extractall() + print('finished extracting') + data_dir = zip.namelist()[0] + + # delete zip file + os.remove(data_file) + return data_dir + + +def main(): + print("Torch version:", torch.__version__) + + # get command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument('--num_epochs', type=int, default=25, + help='number of epochs to train') + parser.add_argument('--output_dir', type=str, help='output directory') + parser.add_argument('--learning_rate', type=float, + default=0.001, help='learning rate') + parser.add_argument('--momentum', type=float, default=0.9, help='momentum') + args = parser.parse_args() + + data_dir = download_data() + print("data directory is: " + data_dir) + model = fine_tune_model(args.num_epochs, data_dir, + args.learning_rate, args.momentum) + os.makedirs(args.output_dir, exist_ok=True) + torch.save(model, os.path.join(args.output_dir, 'model.pt')) + + +if __name__ == "__main__": + main() diff --git a/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/test_img.jpg b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/test_img.jpg new file mode 100644 index 00000000..f2878b48 Binary files /dev/null and b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/test_img.jpg differ diff --git a/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb new file mode 100644 index 00000000..821381ac --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb @@ -0,0 +1,715 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved. \n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train, hyperparameter tune, and deploy with PyTorch\n", + "\n", + "In this tutorial, you will train, hyperparameter tune, and deploy a PyTorch model using the Azure Machine Learning (Azure ML) Python SDK.\n", + "\n", + "This tutorial will train an image classification model using transfer learning, based on PyTorch's [Transfer Learning tutorial](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html). The model is trained to classify chickens and turkeys by first using a pretrained ResNet18 model that has been trained on the [ImageNet](http://image-net.org/index) dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Diagnostics" + ] + }, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or Attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource.\n", + "\n", + "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n", + "\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"gpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target.')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " compute_target.wait_for_completion(show_output=True)\n", + "\n", + "# use get_status() to get a detailed status for the current cluster. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code creates a GPU cluster. If you instead want to create a CPU cluster, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train model on the remote compute\n", + "Now that you have your data and training script prepared, you are ready to train on your remote compute cluster. You can take advantage of Azure compute to leverage GPUs to cut down your training time. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a project directory\n", + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "project_folder = './pytorch-birds'\n", + "os.makedirs(project_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download training data\n", + "The dataset we will use (located on a public blob [here](https://msdocsdatasets.blob.core.windows.net/pytorchfowl/fowl_data.zip) as a zip file) consists of about 120 training images each for turkeys and chickens, with 100 validation images for each class. The images are a subset of the [Open Images v5 Dataset](https://storage.googleapis.com/openimages/web/index.html). We will download and extract the dataset as part of our training script `pytorch_train.py`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare training script\n", + "Now you will need to create your training script. In this tutorial, the training script is already provided for you at `pytorch_train.py`. In practice, you should be able to take any custom training script as is and run it with Azure ML without having to modify your code.\n", + "\n", + "However, if you would like to use Azure ML's [tracking and metrics](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#metrics) capabilities, you will have to add a small amount of Azure ML code inside your training script. \n", + "\n", + "In `pytorch_train.py`, we will log some metrics to our Azure ML run. To do so, we will access the Azure ML `Run` object within the script:\n", + "```Python\n", + "from azureml.core.run import Run\n", + "run = Run.get_context()\n", + "```\n", + "Further within `pytorch_train.py`, we log the learning rate and momentum parameters, and the best validation accuracy the model achieves:\n", + "```Python\n", + "run.log('lr', np.float(learning_rate))\n", + "run.log('momentum', np.float(momentum))\n", + "\n", + "run.log('best_val_acc', np.float(best_acc))\n", + "```\n", + "These run metrics will become particularly important when we begin hyperparameter tuning our model in the \"Tune model hyperparameters\" section." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once your script is ready, copy the training script `pytorch_train.py` into your project directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.copy('pytorch_train.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an experiment\n", + "Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this transfer learning PyTorch tutorial. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "experiment_name = 'pytorch-birds'\n", + "experiment = Experiment(ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a PyTorch estimator\n", + "The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch). The following code will define a single-node PyTorch job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "dnn-pytorch-remarks-sample" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.dnn import PyTorch\n", + "\n", + "script_params = {\n", + " '--num_epochs': 30,\n", + " '--output_dir': './outputs'\n", + "}\n", + "\n", + "estimator = PyTorch(source_directory=project_folder, \n", + " script_params=script_params,\n", + " compute_target=compute_target,\n", + " entry_script='pytorch_train.py',\n", + " use_gpu=True,\n", + " pip_packages=['pillow==5.4.1'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`. Please note the following:\n", + "- We passed our training data reference `ds_data` to our script's `--data_dir` argument. This will 1) mount our datastore on the remote compute and 2) provide the path to the training data `fowl_data` on our datastore.\n", + "- We specified the output directory as `./outputs`. The `outputs` directory is specially treated by Azure ML in that all the content in this directory gets uploaded to your workspace as part of your run history. The files written to this directory are therefore accessible even once your remote run is over. In this tutorial, we will save our trained model to this output directory.\n", + "\n", + "To leverage the Azure VM's GPU for training, we set `use_gpu=True`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job\n", + "Run your experiment by submitting your estimator object. Note that this call is asynchronous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# to get more details of your run\n", + "print(run.get_details())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor your run\n", + "You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can block until the script has completed training before running more code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tune model hyperparameters\n", + "Now that we've seen how to do a simple PyTorch training run using the SDK, let's see if we can further improve the accuracy of our model. We can optimize our model's hyperparameters using Azure Machine Learning's hyperparameter tuning capabilities." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Start a hyperparameter sweep\n", + "First, we will define the hyperparameter space to sweep over. Since our training script uses a learning rate schedule to decay the learning rate every several epochs, let's tune the initial learning rate and the momentum parameters. In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, the best validation accuracy (`best_val_acc`).\n", + "\n", + "Then, we specify the early termination policy to use to early terminate poorly performing runs. Here we use the `BanditPolicy`, which will terminate any run that doesn't fall within the slack factor of our primary evaluation metric. In this tutorial, we will apply this policy every epoch (since we report our `best_val_acc` metric every epoch and `evaluation_interval=1`). Notice we will delay the first policy evaluation until after the first `10` epochs (`delay_evaluation=10`).\n", + "Refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-tune-hyperparameters#specify-an-early-termination-policy) for more information on the BanditPolicy and other policies available." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, uniform, PrimaryMetricGoal\n", + "\n", + "param_sampling = RandomParameterSampling( {\n", + " 'learning_rate': uniform(0.0005, 0.005),\n", + " 'momentum': uniform(0.9, 0.99)\n", + " }\n", + ")\n", + "\n", + "early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)\n", + "\n", + "hyperdrive_config = HyperDriveConfig(estimator=estimator,\n", + " hyperparameter_sampling=param_sampling, \n", + " policy=early_termination_policy,\n", + " primary_metric_name='best_val_acc',\n", + " primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n", + " max_total_runs=8,\n", + " max_concurrent_runs=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, lauch the hyperparameter tuning job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# start the HyperDrive run\n", + "hyperdrive_run = experiment.submit(hyperdrive_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor HyperDrive runs\n", + "You can monitor the progress of the runs with the following Jupyter widget. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RunDetails(hyperdrive_run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or block until the HyperDrive sweep has completed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hyperdrive_run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find and register the best model\n", + "Once all the runs complete, we can find the run that produced the model with the highest accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run = hyperdrive_run.get_best_run_by_primary_metric()\n", + "best_run_metrics = best_run.get_metrics()\n", + "print(best_run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('Best Run is:\\n Validation accuracy: {0:.5f} \\n Learning rate: {1:.5f} \\n Momentum: {2:.5f}'.format(\n", + " best_run_metrics['best_val_acc'][-1],\n", + " best_run_metrics['lr'],\n", + " best_run_metrics['momentum'])\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, register the model from your best-performing run to your workspace. The `model_path` parameter takes in the relative path on the remote VM to the model file in your `outputs` directory. In the next section, we will deploy this registered model as a web service." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = best_run.register_model(model_name = 'pytorch-birds', model_path = 'outputs/model.pt')\n", + "print(model.name, model.id, model.version, sep = '\\t')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deploy model as web service\n", + "Once you have your trained model, you can deploy the model on Azure. In this tutorial, we will deploy the model as a web service in [Azure Container Instances](https://docs.microsoft.com/en-us/azure/container-instances/) (ACI). For more information on deploying models using Azure ML, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-deploy-and-where)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create scoring script\n", + "\n", + "First, we will create a scoring script that will be invoked by the web service call. Note that the scoring script must have two required functions:\n", + "* `init()`: In this function, you typically load the model into a `global` object. This function is executed only once when the Docker container is started. \n", + "* `run(input_data)`: In this function, the model is used to predict a value based on the input data. The input and output typically use JSON as serialization and deserialization format, but you are not limited to that.\n", + "\n", + "Refer to the scoring script `pytorch_score.py` for this tutorial. Our web service will use this file to predict whether an image is a chicken or a turkey. When writing your own scoring script, don't forget to test it locally first before you go and deploy the web service." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create environment file\n", + "Then, we will need to create an environment file (`myenv.yml`) that specifies all of the scoring script's package dependencies. This file is used to ensure that all of those dependencies are installed in the Docker image by Azure ML. In this case, we need to specify `azureml-core`, `torch` and `torchvision`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.conda_dependencies import CondaDependencies \n", + "\n", + "myenv = CondaDependencies.create(pip_packages=['azureml-defaults', 'torch', 'torchvision'])\n", + "\n", + "with open(\"myenv.yml\",\"w\") as f:\n", + " f.write(myenv.serialize_to_string())\n", + " \n", + "print(myenv.serialize_to_string())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deploy to ACI container\n", + "We are ready to deploy. Create an inference configuration which gives specifies the inferencing environment and scripts. Create a deployment configuration file to specify the number of CPUs and gigabytes of RAM needed for your ACI container. While it depends on your model, the default of `1` core and `1` gigabyte of RAM is usually sufficient for many models. This cell will run for about 7-8 minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.webservice import AciWebservice\n", + "from azureml.core.model import InferenceConfig\n", + "from azureml.core.webservice import Webservice\n", + "from azureml.core.model import Model\n", + "\n", + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"pytorch_score.py\",\n", + " conda_file=\"myenv.yml\")\n", + "\n", + "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", + " memory_gb=1, \n", + " tags={'data': 'birds', 'method':'transfer learning', 'framework':'pytorch'},\n", + " description='Classify turkey/chickens using transfer learning with PyTorch')\n", + "\n", + "service = Model.deploy(workspace=ws, \n", + " name='aci-birds', \n", + " models=[model], \n", + " inference_config=inference_config, \n", + " deployment_config=aciconfig)\n", + "service.wait_for_deployment(True)\n", + "print(service.state)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your deployment fails for any reason and you need to redeploy, make sure to delete the service before you do so: `service.delete()`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "service.get_logs()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the web service's HTTP endpoint, which accepts REST client calls. This endpoint can be shared with anyone who wants to test the web service or integrate it into an application." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(service.scoring_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test the web service\n", + "Finally, let's test our deployed web service. We will send the data as a JSON string to the web service hosted in ACI and use the SDK's `run` API to invoke the service. Here we will take an image from our validation data to predict on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from PIL import Image\n", + "import matplotlib.pyplot as plt\n", + "\n", + "%matplotlib inline\n", + "plt.imshow(Image.open('test_img.jpg'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torchvision import transforms\n", + " \n", + "def preprocess(image_file):\n", + " \"\"\"Preprocess the input image.\"\"\"\n", + " data_transforms = transforms.Compose([\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n", + " ])\n", + "\n", + " image = Image.open(image_file)\n", + " image = data_transforms(image).float()\n", + " image = torch.tensor(image)\n", + " image = image.unsqueeze(0)\n", + " return image.numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "input_data = preprocess('test_img.jpg')\n", + "result = service.run(input_data=json.dumps({'data': input_data.tolist()}))\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up\n", + "Once you no longer need the web service, you can delete it with a simple API call." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "service.delete()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "ninhu" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.yml b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.yml new file mode 100644 index 00000000..09f8d5a9 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.yml @@ -0,0 +1,9 @@ +name: train-hyperparameter-tune-deploy-with-pytorch +dependencies: +- pip: + - azureml-sdk + - azureml-widgets + - pillow==5.4.1 + - matplotlib + - https://download.pytorch.org/whl/cpu/torch-1.1.0-cp35-cp35m-win_amd64.whl + - https://download.pytorch.org/whl/cpu/torchvision-0.3.0-cp35-cp35m-win_amd64.whl diff --git a/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb new file mode 100644 index 00000000..2aaf0d8c --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb @@ -0,0 +1,340 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distributed PyTorch with Horovod\n", + "In this tutorial, you will train a PyTorch model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using distributed training via [Horovod](https://github.com/uber/horovod) across a GPU cluster." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`\n", + "* Review the [tutorial](../train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) on single-node PyTorch training using Azure Machine Learning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Diagnostics" + ] + }, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n", + "\n", + "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n", + "\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"gpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target.')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " compute_target.wait_for_completion(show_output=True)\n", + "\n", + "# use get_status() to get a detailed status for the current AmlCompute. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train model on the remote compute\n", + "Now that we have the AmlCompute ready to go, let's run our distributed training job." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a project directory\n", + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "project_folder = './pytorch-distr-hvd'\n", + "os.makedirs(project_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare training script\n", + "Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `pytorch_horovod_mnist.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code.\n", + "\n", + "However, if you would like to use Azure ML's [metric logging](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#logging) capabilities, you will have to add a small amount of Azure ML logic inside your training script. In this example, at each logging interval, we will log the loss for that minibatch to our Azure ML run.\n", + "\n", + "To do so, in `pytorch_horovod_mnist.py`, we will first access the Azure ML `Run` object within the script:\n", + "```Python\n", + "from azureml.core.run import Run\n", + "run = Run.get_context()\n", + "```\n", + "Later within the script, we log the loss metric to our run:\n", + "```Python\n", + "run.log('loss', loss.item())\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once your script is ready, copy the training script `pytorch_horovod_mnist.py` into the project directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.copy('pytorch_horovod_mnist.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an experiment\n", + "Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed PyTorch tutorial. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "experiment_name = 'pytorch-distr-hvd'\n", + "experiment = Experiment(ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a PyTorch estimator\n", + "The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import PyTorch, Mpi\n", + "\n", + "estimator = PyTorch(source_directory=project_folder,\n", + " compute_target=compute_target,\n", + " entry_script='pytorch_horovod_mnist.py',\n", + " node_count=2,\n", + " distributed_training=Mpi(),\n", + " use_gpu=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI/Horovod, you must provide the argument `distributed_backend=Mpi()`. To specify `i` workers per node, you must provide the argument `distributed_backend=Mpi(process_count_per_node=i)`. Using this estimator with these settings, PyTorch, Horovod and their dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `PyTorch` constructor's `pip_packages` or `conda_packages` parameters." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job\n", + "Run your experiment by submitting your estimator object. Note that this call is asynchronous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor your run\n", + "You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can block until the script has completed training before running more code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True) # this provides a verbose log" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "ninhu" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.yml b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.yml new file mode 100644 index 00000000..58bb77d8 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.yml @@ -0,0 +1,5 @@ +name: distributed-pytorch-with-horovod +dependencies: +- pip: + - azureml-sdk + - azureml-widgets diff --git a/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/pytorch_horovod_mnist.py b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/pytorch_horovod_mnist.py new file mode 100644 index 00000000..83562526 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/pytorch_horovod_mnist.py @@ -0,0 +1,170 @@ +# Copyright (c) 2017, PyTorch contributors +# Modifications copyright (C) Microsoft Corporation +# Licensed under the BSD license +# Adapted from https://github.com/uber/horovod/blob/master/examples/pytorch_mnist.py + +from __future__ import print_function +import argparse +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torchvision import datasets, transforms +import torch.utils.data.distributed +import horovod.torch as hvd + +from azureml.core.run import Run +# get the Azure ML run object +run = Run.get_context() + +print("Torch version:", torch.__version__) + +# Training settings +parser = argparse.ArgumentParser(description='PyTorch MNIST Example') +parser.add_argument('--batch-size', type=int, default=64, metavar='N', + help='input batch size for training (default: 64)') +parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', + help='input batch size for testing (default: 1000)') +parser.add_argument('--epochs', type=int, default=10, metavar='N', + help='number of epochs to train (default: 10)') +parser.add_argument('--lr', type=float, default=0.01, metavar='LR', + help='learning rate (default: 0.01)') +parser.add_argument('--momentum', type=float, default=0.5, metavar='M', + help='SGD momentum (default: 0.5)') +parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables CUDA training') +parser.add_argument('--seed', type=int, default=42, metavar='S', + help='random seed (default: 42)') +parser.add_argument('--log-interval', type=int, default=10, metavar='N', + help='how many batches to wait before logging training status') +parser.add_argument('--fp16-allreduce', action='store_true', default=False, + help='use fp16 compression during allreduce') +args = parser.parse_args() +args.cuda = not args.no_cuda and torch.cuda.is_available() + +hvd.init() +torch.manual_seed(args.seed) + +if args.cuda: + # Horovod: pin GPU to local rank. + torch.cuda.set_device(hvd.local_rank()) + torch.cuda.manual_seed(args.seed) + + +kwargs = {} +train_dataset = \ + datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) +train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset, num_replicas=hvd.size(), rank=hvd.rank()) +train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs) + +test_dataset = \ + datasets.MNIST('data-%d' % hvd.rank(), train=False, transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) +test_sampler = torch.utils.data.distributed.DistributedSampler( + test_dataset, num_replicas=hvd.size(), rank=hvd.rank()) +test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, + sampler=test_sampler, **kwargs) + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, 320) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x) + + +model = Net() + +if args.cuda: + # Move model to GPU. + model.cuda() + +# Horovod: broadcast parameters. +hvd.broadcast_parameters(model.state_dict(), root_rank=0) + +# Horovod: scale learning rate by the number of GPUs. +optimizer = optim.SGD(model.parameters(), lr=args.lr * hvd.size(), + momentum=args.momentum) + +# Horovod: (optional) compression algorithm. +compression = hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none + +# Horovod: wrap optimizer with DistributedOptimizer. +optimizer = hvd.DistributedOptimizer(optimizer, + named_parameters=model.named_parameters(), + compression=compression) + + +def train(epoch): + model.train() + train_sampler.set_epoch(epoch) + for batch_idx, (data, target) in enumerate(train_loader): + if args.cuda: + data, target = data.cuda(), target.cuda() + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_interval == 0: + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, batch_idx * len(data), len(train_sampler), + 100. * batch_idx / len(train_loader), loss.item())) + + # log the loss to the Azure ML run + run.log('loss', loss.item()) + + +def metric_average(val, name): + tensor = torch.tensor(val) + avg_tensor = hvd.allreduce(tensor, name=name) + return avg_tensor.item() + + +def test(): + model.eval() + test_loss = 0. + test_accuracy = 0. + for data, target in test_loader: + if args.cuda: + data, target = data.cuda(), target.cuda() + output = model(data) + # sum up batch loss + test_loss += F.nll_loss(output, target, size_average=False).item() + # get the index of the max log-probability + pred = output.data.max(1, keepdim=True)[1] + test_accuracy += pred.eq(target.data.view_as(pred)).cpu().float().sum() + + test_loss /= len(test_sampler) + test_accuracy /= len(test_sampler) + + test_loss = metric_average(test_loss, 'avg_loss') + test_accuracy = metric_average(test_accuracy, 'avg_accuracy') + + if hvd.rank() == 0: + print('\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format( + test_loss, 100. * test_accuracy)) + + +for epoch in range(1, args.epochs + 1): + train(epoch) + test() diff --git a/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/distributed-pytorch-with-nccl-gloo.ipynb b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/distributed-pytorch-with-nccl-gloo.ipynb new file mode 100644 index 00000000..151cce38 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/distributed-pytorch-with-nccl-gloo.ipynb @@ -0,0 +1,382 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/training-with-deep-learning/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distributed PyTorch \n", + "In this tutorial, you will train a PyTorch model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using distributed training via Nccl/Gloo across a GPU cluster. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Diagnostics" + ] + }, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n", + "\n", + "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n", + "\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"gpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target.')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " compute_target.wait_for_completion(show_output=True)\n", + "\n", + "# use get_status() to get a detailed status for the current AmlCompute. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train model on the remote compute\n", + "Now that we have the AmlCompute ready to go, let's run our distributed training job." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a project directory\n", + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "project_folder = './pytorch-distr'\n", + "os.makedirs(project_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare training script\n", + "Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `pytorch_mnist.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code.\n", + "\n", + "However, if you would like to use Azure ML's [metric logging](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#logging) capabilities, you will have to add a small amount of Azure ML logic inside your training script. In this example, at each logging interval, we will log the loss for that minibatch to our Azure ML run.\n", + "\n", + "To do so, in `pytorch_mnist.py`, we will first access the Azure ML `Run` object within the script:\n", + "```Python\n", + "from azureml.core.run import Run\n", + "run = Run.get_context()\n", + "```\n", + "Later within the script, we log the loss metric to our run:\n", + "```Python\n", + "run.log('loss', losses.avg)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once your script is ready, copy the training script `pytorch_mnist.py` into the project directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.copy('pytorch_mnist.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an experiment\n", + "Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed PyTorch tutorial. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "experiment_name = 'pytorch-distr'\n", + "experiment = Experiment(ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a PyTorch estimator(Nccl Backend)\n", + "The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import PyTorch, Nccl\n", + "\n", + "estimator = PyTorch(source_directory=project_folder,\n", + " script_params={\"--dist-backend\" : \"nccl\",\n", + " \"--dist-url\": \"$AZ_BATCHAI_PYTORCH_INIT_METHOD\",\n", + " \"--rank\": \"$AZ_BATCHAI_TASK_INDEX\",\n", + " \"--world-size\": 2},\n", + " compute_target=compute_target,\n", + " entry_script='pytorch_mnist.py',\n", + " node_count=2,\n", + " distributed_training=Nccl(),\n", + " use_gpu=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above code, `script_params` uses Azure ML generated `AZ_BATCHAI_PYTORCH_INIT_METHOD` for shared file-system initialization and `AZ_BATCHAI_TASK_INDEX` as rank of each worker process.\n", + "The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using Nccl, you must provide the argument `distributed_training=Nccl()`. Using this estimator with these settings, PyTorch and dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `PyTorch` constructor's `pip_packages` or `conda_packages` parameters." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job\n", + "Run your experiment by submitting your estimator object. Note that this call is asynchronous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor your run\n", + "You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can block until the script has completed training before running more code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True) # this provides a verbose log" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a PyTorch estimator(Gloo Backend)\n", + "The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import PyTorch, Gloo\n", + "\n", + "estimator = PyTorch(source_directory=project_folder,\n", + " script_params={\"--dist-backend\" : \"gloo\",\n", + " \"--dist-url\": \"$AZ_BATCHAI_PYTORCH_INIT_METHOD\",\n", + " \"--rank\": \"$AZ_BATCHAI_TASK_INDEX\",\n", + " \"--world-size\": 2},\n", + " compute_target=compute_target,\n", + " entry_script='pytorch_mnist.py',\n", + " node_count=2,\n", + " distributed_training=Gloo(),\n", + " use_gpu=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above code, `script_params` uses Azure ML generated `AZ_BATCHAI_PYTORCH_INIT_METHOD` for shared file-system initialization and `AZ_BATCHAI_TASK_INDEX` as rank of each worker process.\n", + "The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using Gloo, you must provide the argument `distributed_training=Gloo()`. Using this estimator with these settings, PyTorch and dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `PyTorch` constructor's `pip_packages` or `conda_packages` parameters.\n", + "\n", + "Once you create the estimaotr you can follow the submit steps as shown above to submit a PyTorch run with `Gloo` backend. " + ] + } + ], + "metadata": { + "authors": [ + { + "name": "ninhu" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/distributed-pytorch-with-nccl-gloo.yml b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/distributed-pytorch-with-nccl-gloo.yml new file mode 100644 index 00000000..a960ad7e --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/distributed-pytorch-with-nccl-gloo.yml @@ -0,0 +1,5 @@ +name: distributed-pytorch-with-nccl-gloo +dependencies: +- pip: + - azureml-sdk + - azureml-widgets diff --git a/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/pytorch_mnist.py b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/pytorch_mnist.py new file mode 100644 index 00000000..e2b982d2 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/pytorch_mnist.py @@ -0,0 +1,209 @@ +# Copyright (c) 2017, PyTorch contributors +# Modifications copyright (C) Microsoft Corporation +# Licensed under the BSD license +# Adapted from https://github.com/Azure/BatchAI/tree/master/recipes/PyTorch/PyTorch-GPU-Distributed-Gloo + +from __future__ import print_function +import argparse +import os +import shutil +import time +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torchvision import datasets, transforms +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.utils.data +import torch.utils.data.distributed +import torchvision.models as models + +from azureml.core.run import Run +# get the Azure ML run object +run = Run.get_context() + +# Training settings +parser = argparse.ArgumentParser(description='PyTorch MNIST Example') +parser.add_argument('--batch-size', type=int, default=64, metavar='N', + help='input batch size for training (default: 64)') +parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', + help='input batch size for testing (default: 1000)') +parser.add_argument('--epochs', type=int, default=10, metavar='N', + help='number of epochs to train (default: 10)') +parser.add_argument('--lr', type=float, default=0.01, metavar='LR', + help='learning rate (default: 0.01)') +parser.add_argument('--momentum', type=float, default=0.5, metavar='M', + help='SGD momentum (default: 0.5)') +parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--log-interval', type=int, default=10, metavar='N', + help='how many batches to wait before logging training status') +parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)') +parser.add_argument('--world-size', default=1, type=int, + help='number of distributed processes') +parser.add_argument('--dist-url', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--rank', default=-1, type=int, + help='rank of the worker') + +best_prec1 = 0 +args = parser.parse_args() + +args.distributed = args.world_size >= 2 + +if args.distributed: + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + +train_dataset = datasets.MNIST('data', train=True, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + +if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) +else: + train_sampler = None + +train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.workers, pin_memory=True, sampler=train_sampler) + + +test_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, 320) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x) + + +model = Net() + +if not args.distributed: + model = torch.nn.DataParallel(model).cuda() +else: + model.cuda() + model = torch.nn.parallel.DistributedDataParallel(model) + +# define loss function (criterion) and optimizer +criterion = nn.CrossEntropyLoss().cuda() + +optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + + +def train(epoch): + batch_time = AverageMeter() + data_time = AverageMeter() + losses = AverageMeter() + top1 = AverageMeter() + top5 = AverageMeter() + + # switch to train mode + model.train() + end = time.time() + for i, (input, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + input, target = input.cuda(), target.cuda() + + # compute output + try: + output = model(input) + loss = criterion(output, target) + + # measure accuracy and record loss + prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) + losses.update(loss.item(), input.size(0)) + top1.update(prec1[0], input.size(0)) + top5.update(prec5[0], input.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % 10 == 0: + run.log("loss", losses.avg) + run.log("prec@1", "{0:.3f}".format(top1.avg)) + run.log("prec@5", "{0:.3f}".format(top5.avg)) + print('Epoch: [{0}][{1}/{2}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' + 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' + 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' + 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(epoch, i, len(train_loader), + batch_time=batch_time, data_time=data_time, + loss=losses, top1=top1, top5=top5)) + except: + import sys + print("Unexpected error:", sys.exc_info()[0]) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k""" + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +for epoch in range(1, args.epochs + 1): + train(epoch) diff --git a/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.ipynb b/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.ipynb new file mode 100644 index 00000000..7b274376 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.ipynb @@ -0,0 +1,568 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train and hyperparameter tune on Iris Dataset with Scikit-learn\n", + "In this tutorial, we demonstrate how to use the Azure ML Python SDK to train a support vector machine (SVM) on a single-node CPU with Scikit-learn to perform classification on the popular [Iris dataset](https://archive.ics.uci.edu/ml/datasets/iris). We will also demonstrate how to perform hyperparameter tuning of the model using Azure ML's HyperDrive service." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML Workspace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create AmlCompute" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource.\n", + "\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_D2_V2` CPU VMs. This process is broken down into 3 steps:\n", + "1. create the configuration (this step is local and only takes a second)\n", + "2. create the cluster (this step will take about **20 seconds**)\n", + "3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"cpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', \n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it uses the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + "\n", + "# use get_status() to get a detailed status for the current cluster. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code retrieves a CPU compute target. Scikit-learn does not support GPU computing." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train model on the remote compute" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that you have your data and training script prepared, you are ready to train on your remote compute. You can take advantage of Azure compute to leverage a CPU cluster." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a project directory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "project_folder = './sklearn-iris'\n", + "os.makedirs(project_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare training script" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now you will need to create your training script. In this tutorial, the training script is already provided for you at `train_iris`.py. In practice, you should be able to take any custom training script as is and run it with Azure ML without having to modify your code.\n", + "\n", + "However, if you would like to use Azure ML's [tracking and metrics](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#metrics) capabilities, you will have to add a small amount of Azure ML code inside your training script.\n", + "\n", + "In `train_iris.py`, we will log some metrics to our Azure ML run. To do so, we will access the Azure ML Run object within the script:\n", + "\n", + "```python\n", + "from azureml.core.run import Run\n", + "run = Run.get_context()\n", + "```\n", + "\n", + "Further within `train_iris.py`, we log the kernel and penalty parameters, and the highest accuracy the model achieves:\n", + "\n", + "```python\n", + "run.log('Kernel type', np.string(args.kernel))\n", + "run.log('Penalty', np.float(args.penalty))\n", + "\n", + "run.log('Accuracy', np.float(accuracy))\n", + "```\n", + "\n", + "These run metrics will become particularly important when we begin hyperparameter tuning our model in the \"Tune model hyperparameters\" section.\n", + "\n", + "Once your script is ready, copy the training script `train_iris.py` into your project directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.copy('train_iris.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this Scikit-learn tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "experiment_name = 'train_iris'\n", + "experiment = Experiment(ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a Scikit-learn estimator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Azure ML SDK's Scikit-learn estimator enables you to easily submit Scikit-learn training jobs for single-node runs. The following code will define a single-node Scikit-learn job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "sklearn-remarks-sample" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.sklearn import SKLearn\n", + "\n", + "script_params = {\n", + " '--kernel': 'linear',\n", + " '--penalty': 1.0,\n", + "}\n", + "\n", + "estimator = SKLearn(source_directory=project_folder, \n", + " script_params=script_params,\n", + " compute_target=compute_target,\n", + " entry_script='train_iris.py',\n", + " pip_packages=['joblib==0.13.2']\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run your experiment by submitting your estimator object. Note that this call is asynchronous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(estimator)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Monitor your run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.cancel()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tune model hyperparameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we've seen how to do a simple Scikit-learn training run using the SDK, let's see if we can further improve the accuracy of our model. We can optimize our model's hyperparameters using Azure Machine Learning's hyperparameter tuning capabilities." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Start a hyperparameter sweep" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we will define the hyperparameter space to sweep over. Let's tune the `kernel` and `penalty` parameters. In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, `Accuracy`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.hyperdrive.runconfig import HyperDriveRunConfig\n", + "from azureml.train.hyperdrive.sampling import RandomParameterSampling\n", + "from azureml.train.hyperdrive.run import PrimaryMetricGoal\n", + "from azureml.train.hyperdrive.parameter_expressions import choice\n", + " \n", + "\n", + "param_sampling = RandomParameterSampling( {\n", + " \"--kernel\": choice('linear', 'rbf', 'poly', 'sigmoid'),\n", + " \"--penalty\": choice(0.5, 1, 1.5)\n", + " }\n", + ")\n", + "\n", + "hyperdrive_run_config = HyperDriveRunConfig(estimator=estimator,\n", + " hyperparameter_sampling=param_sampling, \n", + " primary_metric_name='Accuracy',\n", + " primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n", + " max_total_runs=12,\n", + " max_concurrent_runs=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, lauch the hyperparameter tuning job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# start the HyperDrive run\n", + "hyperdrive_run = experiment.submit(hyperdrive_run_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Monitor HyperDrive runs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can monitor the progress of the runs with the following Jupyter widget." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RunDetails(hyperdrive_run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hyperdrive_run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find and register best model\n", + "When all jobs finish, we can find out the one that has the highest accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run = hyperdrive_run.get_best_run_by_primary_metric()\n", + "print(best_run.get_details()['runDefinition']['arguments'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's list the model files uploaded during the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(best_run.get_file_names())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can then register the folder (and all files in it) as a model named `sklearn-iris` under the workspace for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = best_run.register_model(model_name='sklearn-iris', model_path='outputs/model.joblib')" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "dipeck" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + }, + "msauthor": "dipeck" + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.yml b/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.yml new file mode 100644 index 00000000..2691a849 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.yml @@ -0,0 +1,6 @@ +name: train-hyperparameter-tune-deploy-with-sklearn +dependencies: +- pip: + - azureml-sdk + - azureml-widgets + - numpy diff --git a/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train_iris.py b/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train_iris.py new file mode 100644 index 00000000..bc9099d8 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train_iris.py @@ -0,0 +1,60 @@ +# Modified from https://www.geeksforgeeks.org/multiclass-classification-using-scikit-learn/ + +import argparse +import os + +# importing necessary libraries +import numpy as np + +from sklearn import datasets +from sklearn.metrics import confusion_matrix +from sklearn.model_selection import train_test_split + +import joblib + +from azureml.core.run import Run +run = Run.get_context() + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument('--kernel', type=str, default='linear', + help='Kernel type to be used in the algorithm') + parser.add_argument('--penalty', type=float, default=1.0, + help='Penalty parameter of the error term') + + args = parser.parse_args() + run.log('Kernel type', np.str(args.kernel)) + run.log('Penalty', np.float(args.penalty)) + + # loading the iris dataset + iris = datasets.load_iris() + + # X -> features, y -> label + X = iris.data + y = iris.target + + # dividing X, y into train and test data + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + + # training a linear SVM classifier + from sklearn.svm import SVC + svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train) + svm_predictions = svm_model_linear.predict(X_test) + + # model accuracy for X_test + accuracy = svm_model_linear.score(X_test, y_test) + print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy)) + run.log('Accuracy', np.float(accuracy)) + # creating a confusion matrix + cm = confusion_matrix(y_test, svm_predictions) + print(cm) + + os.makedirs('outputs', exist_ok=True) + # files saved in the "outputs" folder are automatically uploaded into run history + joblib.dump(svm_model_linear, 'outputs/model.joblib') + + +if __name__ == '__main__': + main() diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/nn.png b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/nn.png new file mode 100644 index 00000000..8910281e Binary files /dev/null and b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/nn.png differ diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/tf_mnist.py b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/tf_mnist.py new file mode 100644 index 00000000..f5ab7099 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/tf_mnist.py @@ -0,0 +1,106 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import numpy as np +import argparse +import os +import tensorflow as tf + +from azureml.core import Run +from utils import load_data + +print("TensorFlow version:", tf.VERSION) + +parser = argparse.ArgumentParser() +parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point') +parser.add_argument('--batch-size', type=int, dest='batch_size', default=50, help='mini batch size for training') +parser.add_argument('--first-layer-neurons', type=int, dest='n_hidden_1', default=100, + help='# of neurons in the first layer') +parser.add_argument('--second-layer-neurons', type=int, dest='n_hidden_2', default=100, + help='# of neurons in the second layer') +parser.add_argument('--learning-rate', type=float, dest='learning_rate', default=0.01, help='learning rate') +args = parser.parse_args() + +data_folder = os.path.join(args.data_folder, 'mnist') + +print('training dataset is stored here:', data_folder) + +X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0 +X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0 + +y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1) +y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1) + +print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n') +training_set_size = X_train.shape[0] + +n_inputs = 28 * 28 +n_h1 = args.n_hidden_1 +n_h2 = args.n_hidden_2 +n_outputs = 10 +learning_rate = args.learning_rate +n_epochs = 20 +batch_size = args.batch_size + +with tf.name_scope('network'): + # construct the DNN + X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X') + y = tf.placeholder(tf.int64, shape=(None), name='y') + h1 = tf.layers.dense(X, n_h1, activation=tf.nn.relu, name='h1') + h2 = tf.layers.dense(h1, n_h2, activation=tf.nn.relu, name='h2') + output = tf.layers.dense(h2, n_outputs, name='output') + +with tf.name_scope('train'): + cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output) + loss = tf.reduce_mean(cross_entropy, name='loss') + optimizer = tf.train.GradientDescentOptimizer(learning_rate) + train_op = optimizer.minimize(loss) + +with tf.name_scope('eval'): + correct = tf.nn.in_top_k(output, y, 1) + acc_op = tf.reduce_mean(tf.cast(correct, tf.float32)) + +init = tf.global_variables_initializer() +saver = tf.train.Saver() + +# start an Azure ML run +run = Run.get_context() + +with tf.Session() as sess: + init.run() + for epoch in range(n_epochs): + + # randomly shuffle training set + indices = np.random.permutation(training_set_size) + X_train = X_train[indices] + y_train = y_train[indices] + + # batch index + b_start = 0 + b_end = b_start + batch_size + for _ in range(training_set_size // batch_size): + # get a batch + X_batch, y_batch = X_train[b_start: b_end], y_train[b_start: b_end] + + # update batch index for the next batch + b_start = b_start + batch_size + b_end = min(b_start + batch_size, training_set_size) + + # train + sess.run(train_op, feed_dict={X: X_batch, y: y_batch}) + # evaluate training set + acc_train = acc_op.eval(feed_dict={X: X_batch, y: y_batch}) + # evaluate validation set + acc_val = acc_op.eval(feed_dict={X: X_test, y: y_test}) + + # log accuracies + run.log('training_acc', np.float(acc_train)) + run.log('validation_acc', np.float(acc_val)) + print(epoch, '-- Training accuracy:', acc_train, '\b Validation accuracy:', acc_val) + y_hat = np.argmax(output.eval(feed_dict={X: X_test}), axis=1) + + run.log('final_acc', np.float(acc_val)) + + os.makedirs('./outputs/model', exist_ok=True) + # files saved in the "./outputs" folder are automatically uploaded into run history + saver.save(sess, './outputs/model/mnist-tf.model') diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb new file mode 100644 index 00000000..ff9786c0 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb @@ -0,0 +1,1145 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "bf74d2e9-2708-49b1-934b-e0ede342f475" + } + }, + "source": [ + "# Training, hyperparameter tune, and deploy with TensorFlow\n", + "\n", + "## Introduction\n", + "This tutorial shows how to train a simple deep neural network using the MNIST dataset and TensorFlow on Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of `28x28` pixels, representing number from 0 to 9. The goal is to create a multi-class classifier to identify the digit each image represents, and deploy it as a web service in Azure.\n", + "\n", + "For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/).\n", + "\n", + "## Prerequisite:\n", + "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n", + "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n", + " * install the AML SDK\n", + " * create a workspace and its configuration file (`config.json`)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's get started. First let's import some Python libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbpresent": { + "id": "c377ea0c-0cd9-4345-9be2-e20fb29c94c3" + } + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import numpy as np\n", + "import os\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbpresent": { + "id": "edaa7f2f-2439-4148-b57a-8c794c0945ec" + } + }, + "outputs": [], + "source": [ + "import azureml\n", + "from azureml.core import Workspace\n", + "\n", + "# check core SDK version number\n", + "print(\"Azure ML SDK Version: \", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Diagnostics" + ] + }, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "59f52294-4a25-4c92-bab8-3b07f0f44d15" + } + }, + "source": [ + "## Create an Azure ML experiment\n", + "Let's create an experiment named \"tf-mnist\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbpresent": { + "id": "bc70f780-c240-4779-96f3-bc5ef9a37d59" + } + }, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "script_folder = './tf-mnist'\n", + "os.makedirs(script_folder, exist_ok=True)\n", + "\n", + "exp = Experiment(workspace=ws, name='tf-mnist')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "defe921f-8097-44c3-8336-8af6700804a7" + } + }, + "source": [ + "## Download MNIST dataset\n", + "In order to train on the MNIST dataset we will first need to download it from Yan LeCun's web site directly and save them in a `data` folder locally." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import urllib\n", + "\n", + "os.makedirs('./data/mnist', exist_ok=True)\n", + "\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/mnist/train-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/mnist/train-labels.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "c3f2f57c-7454-4d3e-b38d-b0946cf066ea" + } + }, + "source": [ + "## Show some sample images\n", + "Let's load the downloaded compressed file into numpy arrays using some utility functions included in the `utils.py` library file from the current folder. Then we use `matplotlib` to plot 30 random images from the dataset along with their labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbpresent": { + "id": "396d478b-34aa-4afa-9898-cdce8222a516" + } + }, + "outputs": [], + "source": [ + "from utils import load_data\n", + "\n", + "# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n", + "X_train = load_data('./data/mnist/train-images.gz', False) / 255.0\n", + "y_train = load_data('./data/mnist/train-labels.gz', True).reshape(-1)\n", + "\n", + "X_test = load_data('./data/mnist/test-images.gz', False) / 255.0\n", + "y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n", + "\n", + "count = 0\n", + "sample_size = 30\n", + "plt.figure(figsize = (16, 6))\n", + "for i in np.random.permutation(X_train.shape[0])[:sample_size]:\n", + " count = count + 1\n", + " plt.subplot(1, sample_size, count)\n", + " plt.axhline('')\n", + " plt.axvline('')\n", + " plt.text(x = 10, y = -10, s = y_train[i], fontsize = 18)\n", + " plt.imshow(X_train[i].reshape(28, 28), cmap = plt.cm.Greys)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload MNIST dataset to default datastore \n", + "A [datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data) is a place where data can be stored that is then made accessible to a Run either by means of mounting or copying the data to the compute target. A datastore can either be backed by an Azure Blob Storage or and Azure File Share (ADLS will be supported in the future). For simple data handling, each workspace provides a default datastore that can be used, in case the data is not already in Blob Storage or File Share." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = ws.get_default_datastore()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this next step, we will upload the training and test set into the workspace's default datastore, which we will then later be mount on an `AmlCompute` cluster for training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds.upload(src_dir='./data/mnist', target_path='mnist', overwrite=True, show_progress=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or Attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n", + "1. create the configuration (this step is local and only takes a second)\n", + "2. create the cluster (this step will take about **20 seconds**)\n", + "3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"gpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it uses the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + "\n", + "# use get_status() to get a detailed status for the current cluster. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'gpu-cluster' of type `AmlCompute`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "compute_targets = ws.compute_targets\n", + "for name, ct in compute_targets.items():\n", + " print(name, ct.type, ct.provisioning_state)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Copy the training files into the script folder\n", + "The TensorFlow training script is already created for you. You can simply copy it into the script folder, together with the utility library used to load compressed data file into numpy array." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "# the training logic is in the tf_mnist.py file.\n", + "shutil.copy('./tf_mnist.py', script_folder)\n", + "\n", + "# the utils.py just helps loading data from the downloaded MNIST dataset into numpy arrays.\n", + "shutil.copy('./utils.py', script_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "2039d2d5-aca6-4f25-a12f-df9ae6529cae" + } + }, + "source": [ + "## Construct neural network in TensorFlow\n", + "In the training script `tf_mnist.py`, it creates a very simple DNN (deep neural network), with just 2 hidden layers. The input layer has 28 * 28 = 784 neurons, each representing a pixel in an image. The first hidden layer has 300 neurons, and the second hidden layer has 100 neurons. The output layer has 10 neurons, each representing a targeted label from 0 to 9.\n", + "\n", + "![DNN](nn.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Azure ML concepts \n", + "Please note the following three things in the code below:\n", + "1. The script accepts arguments using the argparse package. In this case there is one argument `--data_folder` which specifies the file system folder in which the script can find the MNIST data\n", + "```\n", + " parser = argparse.ArgumentParser()\n", + " parser.add_argument('--data_folder')\n", + "```\n", + "2. The script is accessing the Azure ML `Run` object by executing `run = Run.get_context()`. Further down the script is using the `run` to report the training accuracy and the validation accuracy as training progresses.\n", + "```\n", + " run.log('training_acc', np.float(acc_train))\n", + " run.log('validation_acc', np.float(acc_val))\n", + "```\n", + "3. When running the script on Azure ML, you can write files out to a folder `./outputs` that is relative to the root directory. This folder is specially tracked by Azure ML in the sense that any files written to that folder during script execution on the remote target will be picked up by Run History; these files (known as artifacts) will be available as part of the run history record." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next cell will print out the training code for you to inspect it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(os.path.join(script_folder, './tf_mnist.py'), 'r') as f:\n", + " print(f.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create TensorFlow estimator\n", + "Next, we construct an `azureml.train.dnn.TensorFlow` estimator object, use the Batch AI cluster as compute target, and pass the mount-point of the datastore to the training code as a parameter.\n", + "\n", + "The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker.\n", + "\n", + "The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "dnn-tensorflow-remarks-sample" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.dnn import TensorFlow\n", + "\n", + "script_params = {\n", + " '--data-folder': ws.get_default_datastore().as_mount(),\n", + " '--batch-size': 50,\n", + " '--first-layer-neurons': 300,\n", + " '--second-layer-neurons': 100,\n", + " '--learning-rate': 0.01\n", + "}\n", + "\n", + "est = TensorFlow(source_directory=script_folder,\n", + " script_params=script_params,\n", + " compute_target=compute_target,\n", + " entry_script='tf_mnist.py', \n", + " use_gpu=True, \n", + " framework_version='1.13')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit job to run\n", + "Submit the estimator to an Azure ML experiment to kick off the execution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = exp.submit(est)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor the Run \n", + "As the Run is executed, it will go through the following stages:\n", + "1. Preparing: A docker image is created matching the Python environment specified by the TensorFlow estimator and it will be uploaded to the workspace's Azure Container Registry. This step will only happen once for each Python environment -- the container will then be cached for subsequent runs. Creating and uploading the image takes about **5 minutes**. While the job is preparing, logs are streamed to the run history and can be viewed to monitor the progress of the image creation.\n", + "\n", + "2. Scaling: If the compute needs to be scaled up (i.e. the Batch AI cluster requires more nodes to execute the run than currently available), the cluster will attempt to scale up in order to make the required amount of nodes available. Scaling typically takes about **5 minutes**.\n", + "\n", + "3. Running: All scripts in the script folder are uploaded to the compute target, data stores are mounted/copied and the `entry_script` is executed. While the job is running, stdout and the `./logs` folder are streamed to the run history and can be viewed to monitor the progress of the run.\n", + "\n", + "4. Post-Processing: The `./outputs` folder of the run is copied over to the run history\n", + "\n", + "There are multiple ways to check the progress of a running job. We can use a Jupyter notebook widget. \n", + "\n", + "**Note: The widget will automatically update ever 10-15 seconds, always showing you the most up-to-date information about the run**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also periodically check the status of the run object, and navigate to Azure portal to monitor the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The Run object \n", + "The Run object provides the interface to the run history -- both to the job and to the control plane (this notebook), and both while the job is running and after it has completed. It provides a number of interesting features for instance:\n", + "* `run.get_details()`: Provides a rich set of properties of the run\n", + "* `run.get_metrics()`: Provides a dictionary with all the metrics that were reported for the Run\n", + "* `run.get_file_names()`: List all the files that were uploaded to the run history for this Run. This will include the `outputs` and `logs` folder, azureml-logs and other logs, as well as files that were explicitly uploaded to the run using `run.upload_file()`\n", + "\n", + "Below are some examples -- please run through them and inspect their output. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.get_details()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.get_metrics()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.get_file_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot accuracy over epochs\n", + "Since we can retrieve the metrics from the run, we can easily make plots using `matplotlib` in the notebook. Then we can add the plotted image to the run using `run.log_image()`, so all information about the run is kept together." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "os.makedirs('./imgs', exist_ok=True)\n", + "metrics = run.get_metrics()\n", + "\n", + "plt.figure(figsize = (13,5))\n", + "plt.plot(metrics['validation_acc'], 'r-', lw=4, alpha=.6)\n", + "plt.plot(metrics['training_acc'], 'b--', alpha=0.5)\n", + "plt.legend(['Full evaluation set', 'Training set mini-batch'])\n", + "plt.xlabel('epochs', fontsize=14)\n", + "plt.ylabel('accuracy', fontsize=14)\n", + "plt.title('Accuracy over Epochs', fontsize=16)\n", + "run.log_image(name='acc_over_epochs.png', plot=plt)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download the saved model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the training script, a TensorFlow `saver` object is used to persist the model in a local folder (local to the compute target). The model was saved to the `./outputs` folder on the disk of the Batch AI cluster node where the job is run. Azure ML automatically uploaded anything written in the `./outputs` folder into run history file store. Subsequently, we can use the `Run` object to download the model files the `saver` object saved. They are under the the `outputs/model` folder in the run history file store, and are downloaded into a local folder named `model`. Note the TensorFlow model consists of four files in binary format and they are not human-readable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a model folder in the current directory\n", + "os.makedirs('./model', exist_ok=True)\n", + "\n", + "for f in run.get_file_names():\n", + " if f.startswith('outputs/model'):\n", + " output_file_path = os.path.join('./model', f.split('/')[-1])\n", + " print('Downloading from {} to {} ...'.format(f, output_file_path))\n", + " run.download_file(name=f, output_file_path=output_file_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predict on the test set\n", + "Now load the saved TensorFlow graph, and list all operations under the `network` scope. This way we can discover the input tensor `network/X:0` and the output tensor `network/output/MatMul:0`, and use them in the scoring script in the next step.\n", + "\n", + "Note: if your local TensorFlow version is different than the version running in the cluster where the model is trained, you might see a \"compiletime version mismatch\" warning. You can ignore it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "\n", + "tf.reset_default_graph()\n", + "\n", + "saver = tf.train.import_meta_graph(\"./model/mnist-tf.model.meta\")\n", + "graph = tf.get_default_graph()\n", + "\n", + "for op in graph.get_operations():\n", + " if op.name.startswith('network'):\n", + " print(op.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Feed test dataset to the persisted model to get predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# input tensor. this is an array of 784 elements, each representing the intensity of a pixel in the digit image.\n", + "X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n", + "# output tensor. this is an array of 10 elements, each representing the probability of predicted value of the digit.\n", + "output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n", + "\n", + "with tf.Session() as sess:\n", + " saver.restore(sess, './model/mnist-tf.model')\n", + " k = output.eval(feed_dict={X : X_test})\n", + "# get the prediction, which is the index of the element that has the largest probability value.\n", + "y_hat = np.argmax(k, axis=1)\n", + "\n", + "# print the first 30 labels and predictions\n", + "print('labels: \\t', y_test[:30])\n", + "print('predictions:\\t', y_hat[:30])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate the overall accuracy by comparing the predicted value against the test set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Accuracy on the test set:\", np.average(y_hat == y_test))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Intelligent hyperparameter tuning\n", + "We have trained the model with one set of hyperparameters, now let's how we can do hyperparameter tuning by launching multiple runs on the cluster. First let's define the parameter space using random sampling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal\n", + "from azureml.train.hyperdrive import choice, loguniform\n", + "\n", + "ps = RandomParameterSampling(\n", + " {\n", + " '--batch-size': choice(25, 50, 100),\n", + " '--first-layer-neurons': choice(10, 50, 200, 300, 500),\n", + " '--second-layer-neurons': choice(10, 50, 200, 500),\n", + " '--learning-rate': loguniform(-6, -1)\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we will create a new estimator without the above parameters since they will be passed in later. Note we still need to keep the `data-folder` parameter since that's not a hyperparamter we will sweep." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "est = TensorFlow(source_directory=script_folder,\n", + " script_params={'--data-folder': ws.get_default_datastore().as_mount()},\n", + " compute_target=compute_target,\n", + " entry_script='tf_mnist.py', \n", + " use_gpu=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we will define an early termnination policy. The `BanditPolicy` basically states to check the job every 2 iterations. If the primary metric (defined later) falls outside of the top 10% range, Azure ML terminate the job. This saves us from continuing to explore hyperparameters that don't show promise of helping reach our target metric." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we are ready to configure a run configuration object, and specify the primary metric `validation_acc` that's recorded in your training runs. If you go back to visit the training script, you will notice that this value is being logged after every epoch (a full batch set). We also want to tell the service that we are looking to maximizing this value. We also set the number of samples to 20, and maximal concurrent job to 4, which is the same as the number of nodes in our computer cluster." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "htc = HyperDriveConfig(estimator=est, \n", + " hyperparameter_sampling=ps, \n", + " policy=policy, \n", + " primary_metric_name='validation_acc', \n", + " primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n", + " max_total_runs=8,\n", + " max_concurrent_runs=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, let's launch the hyperparameter tuning job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "htr = exp.submit(config=htc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use a run history widget to show the progress. Be patient as this might take a while to complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RunDetails(htr).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "htr.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Find and register best model \n", + "When all the jobs finish, we can find out the one that has the highest accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run = htr.get_best_run_by_primary_metric()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's list the model files uploaded during the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(best_run.get_file_names())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can then register the folder (and all files in it) as a model named `tf-dnn-mnist` under the workspace for deployment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = best_run.register_model(model_name='tf-dnn-mnist', model_path='outputs/model')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deploy the model in ACI\n", + "Now we are ready to deploy the model as a web service running in Azure Container Instance [ACI](https://azure.microsoft.com/en-us/services/container-instances/). Azure Machine Learning accomplishes this by constructing a Docker image with the scoring logic and model baked in.\n", + "### Create score.py\n", + "First, we will create a scoring script that will be invoked by the web service call. \n", + "\n", + "* Note that the scoring script must have two required functions, `init()` and `run(input_data)`. \n", + " * In `init()` function, you typically load the model into a global object. This function is executed only once when the Docker container is started. \n", + " * In `run(input_data)` function, the model is used to predict a value based on the input data. The input and output to `run` typically use JSON as serialization and de-serialization format but you are not limited to that." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile score.py\n", + "import json\n", + "import numpy as np\n", + "import os\n", + "import tensorflow as tf\n", + "\n", + "from azureml.core.model import Model\n", + "\n", + "def init():\n", + " global X, output, sess\n", + " tf.reset_default_graph()\n", + " model_root = Model.get_model_path('tf-dnn-mnist')\n", + " saver = tf.train.import_meta_graph(os.path.join(model_root, 'mnist-tf.model.meta'))\n", + " X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n", + " output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n", + " \n", + " sess = tf.Session()\n", + " saver.restore(sess, os.path.join(model_root, 'mnist-tf.model'))\n", + "\n", + "def run(raw_data):\n", + " data = np.array(json.loads(raw_data)['data'])\n", + " # make prediction\n", + " out = output.eval(session=sess, feed_dict={X: data})\n", + " y_hat = np.argmax(out, axis=1)\n", + " return y_hat.tolist()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create myenv.yml\n", + "We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify packages `numpy`, `tensorflow`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import CondaDependencies\n", + "\n", + "cd = CondaDependencies.create()\n", + "cd.add_conda_package('numpy')\n", + "cd.add_tensorflow_conda_package()\n", + "cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n", + "\n", + "print(cd.serialize_to_string())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deploy to ACI\n", + "We are almost ready to deploy. Create the inference configuration and deployment configuration and deploy to ACI. This cell will run for about 7-8 minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.webservice import AciWebservice\n", + "from azureml.core.model import InferenceConfig\n", + "from azureml.core.webservice import Webservice\n", + "from azureml.core.model import Model\n", + "\n", + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"score.py\",\n", + " conda_file=\"myenv.yml\")\n", + "\n", + "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", + " memory_gb=1, \n", + " tags={'name':'mnist', 'framework': 'TensorFlow DNN'},\n", + " description='Tensorflow DNN on MNIST')\n", + "\n", + "service = Model.deploy(workspace=ws, \n", + " name='tf-mnist-svc', \n", + " models=[model], \n", + " inference_config=inference_config, \n", + " deployment_config=aciconfig)\n", + "\n", + "service.wait_for_deployment(True)\n", + "print(service.state)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(service.get_logs())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is the scoring web service endpoint:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(service.scoring_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test the deployed model\n", + "Let's test the deployed model. Pick 30 random samples from the test set, and send it to the web service hosted in ACI. Note here we are using the `run` API in the SDK to invoke the service. You can also make raw HTTP calls using any HTTP tool such as curl.\n", + "\n", + "After the invocation, we print the returned predictions and plot them along with the input images. Use red font color and inversed image (white on black) to highlight the misclassified samples. Note since the model accuracy is pretty high, you might have to run the below cell a few times before you can see a misclassified sample." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "# find 30 random samples from test set\n", + "n = 30\n", + "sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n", + "\n", + "test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n", + "test_samples = bytes(test_samples, encoding='utf8')\n", + "\n", + "# predict using the deployed model\n", + "result = service.run(input_data=test_samples)\n", + "\n", + "# compare actual value vs. the predicted values:\n", + "i = 0\n", + "plt.figure(figsize = (20, 1))\n", + "\n", + "for s in sample_indices:\n", + " plt.subplot(1, n, i + 1)\n", + " plt.axhline('')\n", + " plt.axvline('')\n", + " \n", + " # use different color for misclassified sample\n", + " font_color = 'red' if y_test[s] != result[i] else 'black'\n", + " clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n", + " \n", + " plt.text(x=10, y=-10, s=y_hat[s], fontsize=18, color=font_color)\n", + " plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n", + " \n", + " i = i + 1\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also send raw HTTP request to the service." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "# send a random row from the test set to score\n", + "random_index = np.random.randint(0, len(X_test)-1)\n", + "input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n", + "\n", + "headers = {'Content-Type':'application/json'}\n", + "\n", + "resp = requests.post(service.scoring_uri, input_data, headers=headers)\n", + "\n", + "print(\"POST to url\", service.scoring_uri)\n", + "#print(\"input data:\", input_data)\n", + "print(\"label:\", y_test[random_index])\n", + "print(\"prediction:\", resp.text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's look at the workspace after the web service was deployed. You should see \n", + "* a registered model named 'model' and with the id 'model:1'\n", + "* a webservice called 'tf-mnist' with some scoring URL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "models = ws.models\n", + "for name, model in models.items():\n", + " print(\"Model: {}, ID: {}\".format(name, model.id))\n", + " \n", + "webservices = ws.webservices\n", + "for name, webservice in webservices.items():\n", + " print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up\n", + "You can delete the ACI deployment with a simple delete API call." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "service.delete()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "ninhu" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.yml b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.yml new file mode 100644 index 00000000..4b9dd138 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.yml @@ -0,0 +1,8 @@ +name: train-hyperparameter-tune-deploy-with-tensorflow +dependencies: +- numpy +- tensorflow +- matplotlib +- pip: + - azureml-sdk + - azureml-widgets diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/utils.py b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/utils.py new file mode 100644 index 00000000..98170ada --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/utils.py @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import gzip +import numpy as np +import struct + + +# load compressed MNIST gz files and return numpy arrays +def load_data(filename, label=False): + with gzip.open(filename) as gz: + struct.unpack('I', gz.read(4)) + n_items = struct.unpack('>I', gz.read(4)) + if not label: + n_rows = struct.unpack('>I', gz.read(4))[0] + n_cols = struct.unpack('>I', gz.read(4))[0] + res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8) + res = res.reshape(n_items[0], n_rows * n_cols) + else: + res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8) + res = res.reshape(n_items[0], 1) + return res + + +# one-hot encode a 1-D array +def one_hot_encode(array, num_of_classes): + return np.eye(num_of_classes)[array.reshape(-1)] diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb new file mode 100644 index 00000000..568b7648 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb @@ -0,0 +1,409 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/training/manage-runs/manage-runs.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distributed Tensorflow with Horovod\n", + "In this tutorial, you will train a word2vec model in TensorFlow using distributed training via [Horovod](https://github.com/uber/horovod)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning (AML)\n", + "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n", + " * install the AML SDK\n", + " * create a workspace and its configuration file (`config.json`)\n", + "* Review the [tutorial](../train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) on single-node TensorFlow training using the SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Diagnostics" + ] + }, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or Attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource.\n", + "\n", + "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n", + "\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"gpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " compute_target.wait_for_completion(show_output=True)\n", + "\n", + "# use get_status() to get a detailed status for the current cluster. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code creates a GPU cluster. If you instead want to create a CPU cluster, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload data to datastore\n", + "To make data accessible for remote training, AML provides a convenient way to do so via a [Datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data). The datastore provides a mechanism for you to upload/download data to Azure Storage, and interact with it from your remote compute targets. \n", + "\n", + "If your data is already stored in Azure, or you download the data as part of your training script, you will not need to do this step. For this tutorial, although you can download the data in your training script, we will demonstrate how to upload the training data to a datastore and access it during training to illustrate the datastore functionality." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, download the training data from [here](http://mattmahoney.net/dc/text8.zip) to your local machine:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib\n", + "\n", + "os.makedirs('./data', exist_ok=True)\n", + "download_url = 'http://mattmahoney.net/dc/text8.zip'\n", + "urllib.request.urlretrieve(download_url, filename='./data/text8.zip')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each workspace is associated with a default datastore. In this tutorial, we will upload the training data to this default datastore." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = ws.get_default_datastore()\n", + "print(ds.datastore_type, ds.account_name, ds.container_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Upload the contents of the data directory to the path `./data` on the default datastore." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds.upload(src_dir='data', target_path='data', overwrite=True, show_progress=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For convenience, let's get a reference to the path on the datastore with the zip file of training data. We can do so using the `path` method. In the next section, we can then pass this reference to our training script's `--input_data` argument. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "path_on_datastore = 'data/text8.zip'\n", + "ds_data = ds.path(path_on_datastore)\n", + "print(ds_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train model on the remote compute" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a project directory\n", + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "project_folder = './tf-distr-hvd'\n", + "os.makedirs(project_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy the training script `tf_horovod_word2vec.py` into this project directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.copy('tf_horovod_word2vec.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an experiment\n", + "Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed TensorFlow tutorial. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "experiment_name = 'tf-distr-hvd'\n", + "experiment = Experiment(ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a TensorFlow estimator\n", + "The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow).\n", + "\n", + "The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import TensorFlow, Mpi\n", + "\n", + "script_params={\n", + " '--input_data': ds_data\n", + "}\n", + "\n", + "estimator= TensorFlow(source_directory=project_folder,\n", + " compute_target=compute_target,\n", + " script_params=script_params,\n", + " entry_script='tf_horovod_word2vec.py',\n", + " node_count=2,\n", + " distributed_training=Mpi(),\n", + " framework_version='1.13')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI/Horovod, you must provide the argument `distributed_backend=Mpi()`. To specify `i` workers per node, you must provide the argument `distributed_backend=Mpi(process_count_per_node=i)`. Using this estimator with these settings, TensorFlow, Horovod and their dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `TensorFlow` constructor's `pip_packages` or `conda_packages` parameters.\n", + "\n", + "Note that we passed our training data reference `ds_data` to our script's `--input_data` argument. This will 1) mount our datastore on the remote compute and 2) provide the path to the data zip file on our datastore." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job\n", + "Run your experiment by submitting your estimator object. Note that this call is asynchronous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor your run\n", + "You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can block until the script has completed training before running more code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True)" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "roastala" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.yml b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.yml new file mode 100644 index 00000000..15d0a491 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.yml @@ -0,0 +1,5 @@ +name: distributed-tensorflow-with-horovod +dependencies: +- pip: + - azureml-sdk + - azureml-widgets diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/tf_horovod_word2vec.py b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/tf_horovod_word2vec.py new file mode 100644 index 00000000..f29fb278 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/tf_horovod_word2vec.py @@ -0,0 +1,259 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Modifications copyright (C) 2017 Uber Technologies, Inc. +# Additional modifications copyright (C) Microsoft Corporation +# Licensed under the Apache License, Version 2.0 +# Script adapted from: https://github.com/uber/horovod/blob/master/examples/tensorflow_word2vec.py +# ====================================== +"""Basic word2vec example.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import math +import os +import random +import zipfile +import argparse + +import numpy as np +from six.moves import urllib +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf +import horovod.tensorflow as hvd +from azureml.core.run import Run + +# Horovod: initialize Horovod. +hvd.init() + +parser = argparse.ArgumentParser() +parser.add_argument('--input_data', type=str, help='training data') + +args = parser.parse_args() + +input_data = args.input_data +print("the input data is at %s" % input_data) + +# Step 1: Download the data. +url = 'http://mattmahoney.net/dc/text8.zip' + + +def maybe_download(filename, expected_bytes): + """Download a file if not present, and make sure it's the right size.""" + if not filename: + filename = "text8.zip" + if not os.path.exists(filename): + print("Downloading the data from http://mattmahoney.net/dc/text8.zip") + filename, _ = urllib.request.urlretrieve(url, filename) + else: + print("Use the data from %s" % input_data) + statinfo = os.stat(filename) + if statinfo.st_size == expected_bytes: + print('Found and verified', filename) + else: + print(statinfo.st_size) + raise Exception( + 'Failed to verify ' + url + '. Can you get to it with a browser?') + return filename + + +filename = maybe_download(input_data, 31344016) + + +# Read the data into a list of strings. +def read_data(filename): + """Extract the first file enclosed in a zip file as a list of words.""" + with zipfile.ZipFile(filename) as f: + data = tf.compat.as_str(f.read(f.namelist()[0])).split() + return data + + +vocabulary = read_data(filename) +print('Data size', len(vocabulary)) + +# Step 2: Build the dictionary and replace rare words with UNK token. +vocabulary_size = 50000 + + +def build_dataset(words, n_words): + """Process raw inputs into a dataset.""" + count = [['UNK', -1]] + count.extend(collections.Counter(words).most_common(n_words - 1)) + dictionary = dict() + for word, _ in count: + dictionary[word] = len(dictionary) + data = list() + unk_count = 0 + for word in words: + if word in dictionary: + index = dictionary[word] + else: + index = 0 # dictionary['UNK'] + unk_count += 1 + data.append(index) + count[0][1] = unk_count + reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys())) + return data, count, dictionary, reversed_dictionary + + +data, count, dictionary, reverse_dictionary = build_dataset(vocabulary, + vocabulary_size) +del vocabulary # Hint to reduce memory. +print('Most common words (+UNK)', count[:5]) +print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]]) + + +# Step 3: Function to generate a training batch for the skip-gram model. +def generate_batch(batch_size, num_skips, skip_window): + assert num_skips <= 2 * skip_window + # Adjust batch_size to match num_skips + batch_size = batch_size // num_skips * num_skips + span = 2 * skip_window + 1 # [ skip_window target skip_window ] + # Backtrack a little bit to avoid skipping words in the end of a batch + data_index = random.randint(0, len(data) - span - 1) + batch = np.ndarray(shape=(batch_size), dtype=np.int32) + labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) + buffer = collections.deque(maxlen=span) + for _ in range(span): + buffer.append(data[data_index]) + data_index = (data_index + 1) % len(data) + for i in range(batch_size // num_skips): + target = skip_window # target label at the center of the buffer + targets_to_avoid = [skip_window] + for j in range(num_skips): + while target in targets_to_avoid: + target = random.randint(0, span - 1) + targets_to_avoid.append(target) + batch[i * num_skips + j] = buffer[skip_window] + labels[i * num_skips + j, 0] = buffer[target] + buffer.append(data[data_index]) + data_index = (data_index + 1) % len(data) + return batch, labels + + +batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1) +for i in range(8): + print(batch[i], reverse_dictionary[batch[i]], + '->', labels[i, 0], reverse_dictionary[labels[i, 0]]) + +# Step 4: Build and train a skip-gram model. + +max_batch_size = 128 +embedding_size = 128 # Dimension of the embedding vector. +skip_window = 1 # How many words to consider left and right. +num_skips = 2 # How many times to reuse an input to generate a label. + +# We pick a random validation set to sample nearest neighbors. Here we limit the +# validation samples to the words that have a low numeric ID, which by +# construction are also the most frequent. +valid_size = 16 # Random set of words to evaluate similarity on. +valid_window = 100 # Only pick dev samples in the head of the distribution. +valid_examples = np.random.choice(valid_window, valid_size, replace=False) +num_sampled = 64 # Number of negative examples to sample. + +graph = tf.Graph() + +with graph.as_default(): + + # Input data. + train_inputs = tf.placeholder(tf.int32, shape=[None]) + train_labels = tf.placeholder(tf.int32, shape=[None, 1]) + valid_dataset = tf.constant(valid_examples, dtype=tf.int32) + + # Look up embeddings for inputs. + embeddings = tf.Variable( + tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) + embed = tf.nn.embedding_lookup(embeddings, train_inputs) + + # Construct the variables for the NCE loss + nce_weights = tf.Variable( + tf.truncated_normal([vocabulary_size, embedding_size], + stddev=1.0 / math.sqrt(embedding_size))) + nce_biases = tf.Variable(tf.zeros([vocabulary_size])) + + # Compute the average NCE loss for the batch. + # tf.nce_loss automatically draws a new sample of the negative labels each + # time we evaluate the loss. + loss = tf.reduce_mean( + tf.nn.nce_loss(weights=nce_weights, + biases=nce_biases, + labels=train_labels, + inputs=embed, + num_sampled=num_sampled, + num_classes=vocabulary_size)) + + # Horovod: adjust learning rate based on number of GPUs. + optimizer = tf.train.GradientDescentOptimizer(1.0 * hvd.size()) + + # Horovod: add Horovod Distributed Optimizer. + optimizer = hvd.DistributedOptimizer(optimizer) + + train_op = optimizer.minimize(loss) + + # Compute the cosine similarity between minibatch examples and all embeddings. + norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) + normalized_embeddings = embeddings / norm + valid_embeddings = tf.nn.embedding_lookup( + normalized_embeddings, valid_dataset) + similarity = tf.matmul( + valid_embeddings, normalized_embeddings, transpose_b=True) + + # Add variable initializer. + init = tf.global_variables_initializer() + + # Horovod: broadcast initial variable states from rank 0 to all other processes. + # This is necessary to ensure consistent initialization of all workers when + # training is started with random weights or restored from a checkpoint. + bcast = hvd.broadcast_global_variables(0) + +# Step 5: Begin training. + +# Horovod: adjust number of steps based on number of GPUs. +num_steps = 4000 // hvd.size() + 1 + +# Horovod: pin GPU to be used to process local rank (one GPU per process) +config = tf.ConfigProto() +config.gpu_options.allow_growth = True +config.gpu_options.visible_device_list = str(hvd.local_rank()) + +with tf.Session(graph=graph, config=config) as session: + # We must initialize all variables before we use them. + init.run() + bcast.run() + print('Initialized') + run = Run.get_context() + average_loss = 0 + for step in xrange(num_steps): + # simulate various sentence length by randomization + batch_size = random.randint(max_batch_size // 2, max_batch_size) + batch_inputs, batch_labels = generate_batch( + batch_size, num_skips, skip_window) + feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels} + + # We perform one update step by evaluating the optimizer op (including it + # in the list of returned values for session.run() + _, loss_val = session.run([train_op, loss], feed_dict=feed_dict) + average_loss += loss_val + + if step % 2000 == 0: + if step > 0: + average_loss /= 2000 + # The average loss is an estimate of the loss over the last 2000 batches. + print('Average loss at step ', step, ': ', average_loss) + run.log("Loss", average_loss) + average_loss = 0 + final_embeddings = normalized_embeddings.eval() + + # Evaluate similarity in the end on worker 0. + if hvd.rank() == 0: + sim = similarity.eval() + for i in xrange(valid_size): + valid_word = reverse_dictionary[valid_examples[i]] + top_k = 8 # number of nearest neighbors + nearest = (-sim[i, :]).argsort()[1:top_k + 1] + log_str = 'Nearest to %s:' % valid_word + for k in xrange(top_k): + close_word = reverse_dictionary[nearest[k]] + log_str = '%s %s,' % (log_str, close_word) + print(log_str) diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb new file mode 100644 index 00000000..a5e3e143 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb @@ -0,0 +1,321 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distributed TensorFlow with parameter server\n", + "In this tutorial, you will train a TensorFlow model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using native [distributed TensorFlow](https://www.tensorflow.org/deploy/distributed)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning (AML)\n", + "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n", + " * install the AML SDK\n", + " * create a workspace and its configuration file (`config.json`)\n", + "* Review the [tutorial](../train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) on single-node TensorFlow training using the SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Diagnostics" + ] + }, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or Attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource.\n", + "\n", + "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n", + "\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"gpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target.')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " compute_target.wait_for_completion(show_output=True)\n", + "\n", + "# use get_status() to get a detailed status for the current cluster. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train model on the remote compute\n", + "Now that we have the cluster ready to go, let's run our distributed training job." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a project directory\n", + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "project_folder = './tf-distr-ps'\n", + "os.makedirs(project_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy the training script `tf_mnist_replica.py` into this project directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.copy('tf_mnist_replica.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an experiment\n", + "Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed TensorFlow tutorial. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "experiment_name = 'tf-distr-ps'\n", + "experiment = Experiment(ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a TensorFlow estimator\n", + "The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import TensorFlow, ParameterServer\n", + "\n", + "script_params={\n", + " '--num_gpus': 1,\n", + " '--train_steps': 500\n", + "}\n", + "\n", + "estimator = TensorFlow(source_directory=project_folder,\n", + " compute_target=compute_target,\n", + " script_params=script_params,\n", + " entry_script='tf_mnist_replica.py',\n", + " node_count=2,\n", + " distributed_training=ParameterServer(worker_count=2),\n", + " use_gpu=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code specifies that we will run our training script on `2` nodes, with two workers and one parameter server. In order to execute a native distributed TensorFlow run, you must provide the argument `distributed_backend=ParameterServer()`. Using this estimator with these settings, TensorFlow and its dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `TensorFlow` constructor's `pip_packages` or `conda_packages` parameters." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job\n", + "Run your experiment by submitting your estimator object. Note that this call is asynchronous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor your run\n", + "You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can block until the script has completed training before running more code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True) # this provides a verbose log" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "ninhu" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.yml b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.yml new file mode 100644 index 00000000..bc5a30eb --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.yml @@ -0,0 +1,5 @@ +name: distributed-tensorflow-with-parameter-server +dependencies: +- pip: + - azureml-sdk + - azureml-widgets diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/tf_mnist_replica.py b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/tf_mnist_replica.py new file mode 100644 index 00000000..96d40fed --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/tf_mnist_replica.py @@ -0,0 +1,271 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 +# Script adapted from: +# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/dist_test/python/mnist_replica.py +# ============================================================================== +"""Distributed MNIST training and validation, with model replicas. +A simple softmax model with one hidden layer is defined. The parameters +(weights and biases) are located on one parameter server (ps), while the ops +are executed on two worker nodes by default. The TF sessions also run on the +worker node. +Multiple invocations of this script can be done in parallel, with different +values for --task_index. There should be exactly one invocation with +--task_index, which will create a master session that carries out variable +initialization. The other, non-master, sessions will wait for the master +session to finish the initialization before proceeding to the training stage. +The coordination between the multiple worker invocations occurs due to +the definition of the parameters on the same ps devices. The parameter updates +from one worker is visible to all other workers. As such, the workers can +perform forward computation and gradient calculation in parallel, which +should lead to increased training speed for the simple model. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import math +import sys +import tempfile +import time +import json + +import tensorflow as tf +from tensorflow.examples.tutorials.mnist import input_data +from azureml.core.run import Run + +flags = tf.app.flags +flags.DEFINE_string("data_dir", "/tmp/mnist-data", + "Directory for storing mnist data") +flags.DEFINE_boolean("download_only", False, + "Only perform downloading of data; Do not proceed to " + "session preparation, model definition or training") +flags.DEFINE_integer("num_gpus", 0, "Total number of gpus for each machine." + "If you don't use GPU, please set it to '0'") +flags.DEFINE_integer("replicas_to_aggregate", None, + "Number of replicas to aggregate before parameter update " + "is applied (For sync_replicas mode only; default: " + "num_workers)") +flags.DEFINE_integer("hidden_units", 100, + "Number of units in the hidden layer of the NN") +flags.DEFINE_integer("train_steps", 200, + "Number of (global) training steps to perform") +flags.DEFINE_integer("batch_size", 100, "Training batch size") +flags.DEFINE_float("learning_rate", 0.01, "Learning rate") +flags.DEFINE_boolean( + "sync_replicas", False, + "Use the sync_replicas (synchronized replicas) mode, " + "wherein the parameter updates from workers are aggregated " + "before applied to avoid stale gradients") +flags.DEFINE_boolean( + "existing_servers", False, "Whether servers already exists. If True, " + "will use the worker hosts via their GRPC URLs (one client process " + "per worker host). Otherwise, will create an in-process TensorFlow " + "server.") + +FLAGS = flags.FLAGS + +IMAGE_PIXELS = 28 + + +def main(unused_argv): + data_root = os.path.join("outputs", "MNIST") + mnist = None + tf_config = os.environ.get("TF_CONFIG") + if not tf_config or tf_config == "": + raise ValueError("TF_CONFIG not found.") + tf_config_json = json.loads(tf_config) + cluster = tf_config_json.get('cluster') + job_name = tf_config_json.get('task', {}).get('type') + task_index = tf_config_json.get('task', {}).get('index') + job_name = "worker" if job_name == "master" else job_name + sentinel_path = os.path.join(data_root, "complete.txt") + if job_name == "worker" and task_index == 0: + mnist = input_data.read_data_sets(data_root, one_hot=True) + with open(sentinel_path, 'w+') as f: + f.write("download complete") + else: + while not os.path.exists(sentinel_path): + time.sleep(0.01) + mnist = input_data.read_data_sets(data_root, one_hot=True) + + if FLAGS.download_only: + sys.exit(0) + + print("job name = %s" % job_name) + print("task index = %d" % task_index) + print("number of GPUs = %d" % FLAGS.num_gpus) + + # Construct the cluster and start the server + cluster_spec = tf.train.ClusterSpec(cluster) + + # Get the number of workers. + num_workers = len(cluster_spec.task_indices("worker")) + + if not FLAGS.existing_servers: + # Not using existing servers. Create an in-process server. + server = tf.train.Server( + cluster_spec, job_name=job_name, task_index=task_index) + if job_name == "ps": + server.join() + + is_chief = (task_index == 0) + if FLAGS.num_gpus > 0: + # Avoid gpu allocation conflict: now allocate task_num -> #gpu + # for each worker in the corresponding machine + gpu = (task_index % FLAGS.num_gpus) + worker_device = "/job:worker/task:%d/gpu:%d" % (task_index, gpu) + elif FLAGS.num_gpus == 0: + # Just allocate the CPU to worker server + cpu = 0 + worker_device = "/job:worker/task:%d/cpu:%d" % (task_index, cpu) + # The device setter will automatically place Variables ops on separate + # parameter servers (ps). The non-Variable ops will be placed on the workers. + # The ps use CPU and workers use corresponding GPU + with tf.device( + tf.train.replica_device_setter( + worker_device=worker_device, + ps_device="/job:ps/cpu:0", + cluster=cluster)): + global_step = tf.Variable(0, name="global_step", trainable=False) + + # Variables of the hidden layer + hid_w = tf.Variable( + tf.truncated_normal( + [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units], + stddev=1.0 / IMAGE_PIXELS), + name="hid_w") + hid_b = tf.Variable(tf.zeros([FLAGS.hidden_units]), name="hid_b") + + # Variables of the softmax layer + sm_w = tf.Variable( + tf.truncated_normal( + [FLAGS.hidden_units, 10], + stddev=1.0 / math.sqrt(FLAGS.hidden_units)), + name="sm_w") + sm_b = tf.Variable(tf.zeros([10]), name="sm_b") + + # Ops: located on the worker specified with task_index + x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS]) + y_ = tf.placeholder(tf.float32, [None, 10]) + + hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b) + hid = tf.nn.relu(hid_lin) + + y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b)) + cross_entropy = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) + + opt = tf.train.AdamOptimizer(FLAGS.learning_rate) + + if FLAGS.sync_replicas: + if FLAGS.replicas_to_aggregate is None: + replicas_to_aggregate = num_workers + else: + replicas_to_aggregate = FLAGS.replicas_to_aggregate + + opt = tf.train.SyncReplicasOptimizer( + opt, + replicas_to_aggregate=replicas_to_aggregate, + total_num_replicas=num_workers, + name="mnist_sync_replicas") + + train_step = opt.minimize(cross_entropy, global_step=global_step) + + if FLAGS.sync_replicas: + local_init_op = opt.local_step_init_op + if is_chief: + local_init_op = opt.chief_init_op + + ready_for_local_init_op = opt.ready_for_local_init_op + + # Initial token and chief queue runners required by the sync_replicas mode + chief_queue_runner = opt.get_chief_queue_runner() + sync_init_op = opt.get_init_tokens_op() + + init_op = tf.global_variables_initializer() + train_dir = tempfile.mkdtemp() + + if FLAGS.sync_replicas: + sv = tf.train.Supervisor( + is_chief=is_chief, + logdir=train_dir, + init_op=init_op, + local_init_op=local_init_op, + ready_for_local_init_op=ready_for_local_init_op, + recovery_wait_secs=1, + global_step=global_step) + else: + sv = tf.train.Supervisor( + is_chief=is_chief, + logdir=train_dir, + init_op=init_op, + recovery_wait_secs=1, + global_step=global_step) + + sess_config = tf.ConfigProto( + allow_soft_placement=True, + log_device_placement=False, + device_filters=["/job:ps", + "/job:worker/task:%d" % task_index]) + + # The chief worker (task_index==0) session will prepare the session, + # while the remaining workers will wait for the preparation to complete. + if is_chief: + print("Worker %d: Initializing session..." % task_index) + else: + print("Worker %d: Waiting for session to be initialized..." % + task_index) + + if FLAGS.existing_servers: + server_grpc_url = "grpc://" + task_index + print("Using existing server at: %s" % server_grpc_url) + + sess = sv.prepare_or_wait_for_session(server_grpc_url, config=sess_config) + else: + sess = sv.prepare_or_wait_for_session(server.target, config=sess_config) + + print("Worker %d: Session initialization complete." % task_index) + + if FLAGS.sync_replicas and is_chief: + # Chief worker will start the chief queue runner and call the init op. + sess.run(sync_init_op) + sv.start_queue_runners(sess, [chief_queue_runner]) + + # Perform training + time_begin = time.time() + print("Training begins @ %f" % time_begin) + + local_step = 0 + while True: + # Training feed + batch_xs, batch_ys = mnist.train.next_batch(FLAGS.batch_size) + train_feed = {x: batch_xs, y_: batch_ys} + + _, step = sess.run([train_step, global_step], feed_dict=train_feed) + local_step += 1 + + now = time.time() + print("%f: Worker %d: training step %d done (global step: %d)" % + (now, task_index, local_step, step)) + + if step >= FLAGS.train_steps: + break + + time_end = time.time() + print("Training ends @ %f" % time_end) + training_time = time_end - time_begin + print("Training elapsed time: %f s" % training_time) + + # Validation feed + val_feed = {x: mnist.validation.images, y_: mnist.validation.labels} + val_xent = sess.run(cross_entropy, feed_dict=val_feed) + print("After %d training step(s), validation cross entropy = %g" % + (FLAGS.train_steps, val_xent)) + if job_name == "worker" and task_index == 0: + run = Run.get_context() + run.log("CrossEntropy", val_xent) + + +if __name__ == "__main__": + tf.app.run() diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/tf_mnist_with_checkpoint.py b/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/tf_mnist_with_checkpoint.py new file mode 100644 index 00000000..85e80cbd --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/tf_mnist_with_checkpoint.py @@ -0,0 +1,123 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import numpy as np +import argparse +import os +import re +import tensorflow as tf + +from azureml.core import Run +from utils import load_data + +print("TensorFlow version:", tf.VERSION) + +parser = argparse.ArgumentParser() +parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point') + +parser.add_argument('--resume-from', type=str, default=None, + help='location of the model or checkpoint files from where to resume the training') +args = parser.parse_args() + + +previous_model_location = args.resume_from +# You can also use environment variable to get the model/checkpoint files location +# previous_model_location = os.path.expandvars(os.getenv("AZUREML_DATAREFERENCE_MODEL_LOCATION", None)) + +data_folder = os.path.join(args.data_folder, 'mnist') + +print('training dataset is stored here:', data_folder) + +X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0 +X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0 + +y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1) +y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1) + +print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n') +training_set_size = X_train.shape[0] + +n_inputs = 28 * 28 +n_h1 = 100 +n_h2 = 100 +n_outputs = 10 +learning_rate = 0.01 +n_epochs = 20 +batch_size = 50 + +with tf.name_scope('network'): + # construct the DNN + X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X') + y = tf.placeholder(tf.int64, shape=(None), name='y') + h1 = tf.layers.dense(X, n_h1, activation=tf.nn.relu, name='h1') + h2 = tf.layers.dense(h1, n_h2, activation=tf.nn.relu, name='h2') + output = tf.layers.dense(h2, n_outputs, name='output') + +with tf.name_scope('train'): + cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output) + loss = tf.reduce_mean(cross_entropy, name='loss') + optimizer = tf.train.GradientDescentOptimizer(learning_rate) + train_op = optimizer.minimize(loss) + +with tf.name_scope('eval'): + correct = tf.nn.in_top_k(output, y, 1) + acc_op = tf.reduce_mean(tf.cast(correct, tf.float32)) + +init = tf.global_variables_initializer() +saver = tf.train.Saver() + +# start an Azure ML run +run = Run.get_context() + +with tf.Session() as sess: + start_epoch = 0 + if previous_model_location: + checkpoint_file_path = tf.train.latest_checkpoint(previous_model_location) + saver.restore(sess, checkpoint_file_path) + checkpoint_filename = os.path.basename(checkpoint_file_path) + num_found = re.search(r'\d+', checkpoint_filename) + if num_found: + start_epoch = int(num_found.group(0)) + print("Resuming from epoch {}".format(str(start_epoch))) + else: + init.run() + + for epoch in range(start_epoch, n_epochs): + + # randomly shuffle training set + indices = np.random.permutation(training_set_size) + X_train = X_train[indices] + y_train = y_train[indices] + + # batch index + b_start = 0 + b_end = b_start + batch_size + for _ in range(training_set_size // batch_size): + # get a batch + X_batch, y_batch = X_train[b_start: b_end], y_train[b_start: b_end] + + # update batch index for the next batch + b_start = b_start + batch_size + b_end = min(b_start + batch_size, training_set_size) + + # train + sess.run(train_op, feed_dict={X: X_batch, y: y_batch}) + # evaluate training set + acc_train = acc_op.eval(feed_dict={X: X_batch, y: y_batch}) + # evaluate validation set + acc_val = acc_op.eval(feed_dict={X: X_test, y: y_test}) + + # log accuracies + run.log('training_acc', np.float(acc_train)) + run.log('validation_acc', np.float(acc_val)) + print(epoch, '-- Training accuracy:', acc_train, '\b Validation accuracy:', acc_val) + y_hat = np.argmax(output.eval(feed_dict={X: X_test}), axis=1) + + if epoch % 5 == 0: + saver.save(sess, './outputs/', global_step=epoch) + + # saving only half of the model and resuming again from same epoch + if not previous_model_location and epoch == 10: + break + + run.log('final_acc', np.float(acc_val)) diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/train-tensorflow-resume-training.ipynb b/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/train-tensorflow-resume-training.ipynb new file mode 100644 index 00000000..da294e7d --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/train-tensorflow-resume-training.ipynb @@ -0,0 +1,487 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/train-tensorflow-resume-training.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Resuming Tensorflow training from previous run\n", + "In this tutorial, you will resume a mnist model in TensorFlow from a previously submitted run." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning (AML)\n", + "* Go through the [configuration notebook](../../../configuration.ipynb) to:\n", + " * install the AML SDK\n", + " * create a workspace and its configuration file (`config.json`)\n", + "* Review the [tutorial](../train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) on single-node TensorFlow training using the SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "Opt-in diagnostics for better experience, quality, and security of future releases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Diagnostics" + ] + }, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or Attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource.\n", + "\n", + "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n", + "\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"gpu-cluster\"\n", + "\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target.')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n", + " max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " compute_target.wait_for_completion(show_output=True)\n", + "\n", + "# use get_status() to get a detailed status for the current cluster. \n", + "print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above code creates a GPU cluster. If you instead want to create a CPU cluster, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload data to datastore\n", + "To make data accessible for remote training, AML provides a convenient way to do so via a [Datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data). The datastore provides a mechanism for you to upload/download data to Azure Storage, and interact with it from your remote compute targets. \n", + "\n", + "If your data is already stored in Azure, or you download the data as part of your training script, you will not need to do this step. For this tutorial, although you can download the data in your training script, we will demonstrate how to upload the training data to a datastore and access it during training to illustrate the datastore functionality." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First download the data from Yan LeCun's web site directly and save them in a data folder locally." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib\n", + "\n", + "os.makedirs('./data/mnist', exist_ok=True)\n", + "\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/mnist/train-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/mnist/train-labels.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each workspace is associated with a default datastore. In this tutorial, we will upload the training data to this default datastore." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = ws.get_default_datastore()\n", + "print(ds.datastore_type, ds.account_name, ds.container_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Upload MNIST data to the default datastore." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds.upload(src_dir='./data/mnist', target_path='mnist', overwrite=True, show_progress=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For convenience, let's get a reference to the datastore. In the next section, we can then pass this reference to our training script's `--data-folder` argument. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds_data = ds.as_mount()\n", + "print(ds_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train model on the remote compute" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a project directory\n", + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "script_folder = './tf-resume-training'\n", + "os.makedirs(script_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy the training script `tf_mnist_with_checkpoint.py` into this project directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "# the training logic is in the tf_mnist_with_checkpoint.py file.\n", + "shutil.copy('./tf_mnist_with_checkpoint.py', script_folder)\n", + "\n", + "# the utils.py just helps loading data from the downloaded MNIST dataset into numpy arrays.\n", + "shutil.copy('./utils.py', script_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an experiment\n", + "Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed TensorFlow tutorial. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "experiment_name = 'tf-resume-training'\n", + "experiment = Experiment(ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a TensorFlow estimator\n", + "The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow).\n", + "\n", + "The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import TensorFlow\n", + "\n", + "script_params={\n", + " '--data-folder': ds_data\n", + "}\n", + "\n", + "estimator= TensorFlow(source_directory=script_folder,\n", + " compute_target=compute_target,\n", + " script_params=script_params,\n", + " entry_script='tf_mnist_with_checkpoint.py')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above code, we passed our training data reference `ds_data` to our script's `--data-folder` argument. This will 1) mount our datastore on the remote compute and 2) provide the path to the data zip file on our datastore." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job\n", + "### Run your experiment by submitting your estimator object. Note that this call is asynchronous." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor your run\n", + "You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can block until the script has completed training before running more code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Now let's resume the training from the above run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we will get the DataPath to the outputs directory of the above run which\n", + "contains the checkpoint files and/or model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_location = run._get_outputs_datapath()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we will create a new TensorFlow estimator and pass in the model location. On passing 'resume_from' parameter, a new entry in script_params is created with key as 'resume_from' and value as the model/checkpoint files location and the location gets automatically mounted on the compute target." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import TensorFlow\n", + "\n", + "script_params={\n", + " '--data-folder': ds_data\n", + "}\n", + "\n", + "estimator2 = TensorFlow(source_directory=script_folder,\n", + " compute_target=compute_target,\n", + " script_params=script_params,\n", + " entry_script='tf_mnist_with_checkpoint.py',\n", + " resume_from=model_location)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now you can submit the experiment and it should resume from previous run's checkpoint files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run2 = experiment.submit(estimator2)\n", + "print(run2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run2.wait_for_completion(show_output=True)" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "hesuri" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + }, + "msauthor": "hesuri" + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/train-tensorflow-resume-training.yml b/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/train-tensorflow-resume-training.yml new file mode 100644 index 00000000..c814eef5 --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/train-tensorflow-resume-training.yml @@ -0,0 +1,5 @@ +name: train-tensorflow-resume-training +dependencies: +- pip: + - azureml-sdk + - azureml-widgets diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/utils.py b/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/utils.py new file mode 100644 index 00000000..98170ada --- /dev/null +++ b/how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/utils.py @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import gzip +import numpy as np +import struct + + +# load compressed MNIST gz files and return numpy arrays +def load_data(filename, label=False): + with gzip.open(filename) as gz: + struct.unpack('I', gz.read(4)) + n_items = struct.unpack('>I', gz.read(4)) + if not label: + n_rows = struct.unpack('>I', gz.read(4))[0] + n_cols = struct.unpack('>I', gz.read(4))[0] + res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8) + res = res.reshape(n_items[0], n_rows * n_cols) + else: + res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8) + res = res.reshape(n_items[0], 1) + return res + + +# one-hot encode a 1-D array +def one_hot_encode(array, num_of_classes): + return np.eye(num_of_classes)[array.reshape(-1)] diff --git a/how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb b/how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb index 83041a51..84264fa6 100644 --- a/how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb +++ b/how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb @@ -100,7 +100,7 @@ "\n", "# Check core SDK version number\n", "\n", - "print(\"This notebook was created using SDK version 1.0.60, you are currently running version\", azureml.core.VERSION)" + "print(\"This notebook was created using SDK version 1.0.62, you are currently running version\", azureml.core.VERSION)" ] }, { diff --git a/how-to-use-azureml/training-with-deep-learning/README.md b/how-to-use-azureml/training-with-deep-learning/README.md index 6a842f45..77a64b1b 100644 --- a/how-to-use-azureml/training-with-deep-learning/README.md +++ b/how-to-use-azureml/training-with-deep-learning/README.md @@ -3,18 +3,10 @@ These examples show you: 1. [How to use the Estimator pattern in Azure ML](how-to-use-estimator) -2. [Train using TensorFlow Estimator and tune hyperparameters using Hyperdrive](train-hyperparameter-tune-deploy-with-tensorflow) -3. [Train using Pytorch Estimator and tune hyperparameters using Hyperdrive](train-hyperparameter-tune-deploy-with-pytorch) -4. [Train using Keras and tune hyperparameters using Hyperdrive](train-hyperparameter-tune-deploy-with-keras) -5. [Train using Chainer Estimator and tune hyperparameters using Hyperdrive](train-hyperparameter-tune-deploy-with-chainer) -6. [Distributed training using TensorFlow and Parameter Server](distributed-tensorflow-with-parameter-server) -7. [Distributed training using TensorFlow and Horovod](distributed-tensorflow-with-horovod) -8. [Distributed training using Pytorch and Horovod](distributed-pytorch-with-horovod) -9. [Distributed training using CNTK and custom Docker image](distributed-cntk-with-custom-docker) -10. [Distributed training using Chainer](distributed-chainer) -11. [Export run history records to Tensorboard](export-run-history-to-tensorboard) -12. [Use TensorBoard to monitor training execution](tensorboard) -13. [Resuming training from previous run](train-tensorflow-resume-training) +2. [Train using Keras and tune hyperparameters using Hyperdrive](train-hyperparameter-tune-deploy-with-keras) +3. [Distributed training using CNTK and custom Docker image](distributed-cntk-with-custom-docker) +4. [Export run history records to Tensorboard](export-run-history-to-tensorboard) +5. [Use TensorBoard to monitor training execution](tensorboard) Learn more about how to use `Estimator` class to [train deep neural networks with Azure Machine Learning](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-ml-models). diff --git a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/keras_mnist.py b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/keras_mnist.py index 9f2529e6..e4d17706 100644 --- a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/keras_mnist.py +++ b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/keras_mnist.py @@ -4,6 +4,7 @@ import numpy as np import argparse import os +import glob import matplotlib.pyplot as plt @@ -36,11 +37,15 @@ data_folder = args.data_folder print('training dataset is stored here:', data_folder) -X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0 -X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0 +X_train_path = glob.glob(os.path.join(data_folder, '**/train-images-idx3-ubyte.gz'), recursive=True)[0] +X_test_path = glob.glob(os.path.join(data_folder, '**/t10k-images-idx3-ubyte.gz'), recursive=True)[0] +y_train_path = glob.glob(os.path.join(data_folder, '**/train-labels-idx1-ubyte.gz'), recursive=True)[0] +y_test_path = glob.glob(os.path.join(data_folder, '**/t10k-labels-idx1-ubyte.gz'), recursive=True)[0] -y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1) -y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1) +X_train = load_data(X_train_path, False) / 255.0 +X_test = load_data(X_test_path, False) / 255.0 +y_train = load_data(y_train_path, True).reshape(-1) +y_test = load_data(y_test_path, True).reshape(-1) training_set_size = X_train.shape[0] diff --git a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb index 913fb842..45ae4330 100644 --- a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb +++ b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb @@ -132,14 +132,18 @@ }, { "cell_type": "markdown", - "metadata": { - "nbpresent": { - "id": "defe921f-8097-44c3-8336-8af6700804a7" - } - }, + "metadata": {}, "source": [ - "## Download MNIST dataset\n", - "In order to train on the MNIST dataset we will first need to download it from Yan LeCun's web site directly and save them in a `data` folder locally." + "## Explore data\n", + "\n", + "Before you train a model, you need to understand the data that you are using to train it. In this section you learn how to:\n", + "\n", + "* Download the MNIST dataset\n", + "* Display some sample images\n", + "\n", + "### Download the MNIST dataset\n", + "\n", + "Download the MNIST dataset and save the files into a `data` directory locally. Images and labels for both training and testing are downloaded." ] }, { @@ -148,47 +152,42 @@ "metadata": {}, "outputs": [], "source": [ - "import urllib\n", + "import urllib.request\n", "\n", - "os.makedirs('./data/mnist', exist_ok=True)\n", + "data_folder = os.path.join(os.getcwd(), 'data')\n", + "os.makedirs(data_folder, exist_ok=True)\n", "\n", - "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename='./data/mnist/train-images.gz')\n", - "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename='./data/mnist/train-labels.gz')\n", - "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename='./data/mnist/test-images.gz')\n", - "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename='./data/mnist/test-labels.gz')" + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename=os.path.join(data_folder, 'train-images.gz'))\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename=os.path.join(data_folder, 'train-labels.gz'))\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename=os.path.join(data_folder, 'test-images.gz'))\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename=os.path.join(data_folder, 'test-labels.gz'))" ] }, { "cell_type": "markdown", - "metadata": { - "nbpresent": { - "id": "c3f2f57c-7454-4d3e-b38d-b0946cf066ea" - } - }, + "metadata": {}, "source": [ - "## Show some sample images\n", - "Let's load the downloaded compressed file into numpy arrays using some utility functions included in the `utils.py` library file from the current folder. Then we use `matplotlib` to plot 30 random images from the dataset along with their labels." + "### Display some sample images\n", + "\n", + "Load the compressed files into `numpy` arrays. Then use `matplotlib` to plot 30 random images from the dataset with their labels above them. Note this step requires a `load_data` function that's included in an `utils.py` file. This file is included in the sample folder. Please make sure it is placed in the same folder as this notebook. The `load_data` function simply parses the compressed files into numpy arrays." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "nbpresent": { - "id": "396d478b-34aa-4afa-9898-cdce8222a516" - } - }, + "metadata": {}, "outputs": [], "source": [ + "# make sure utils.py is in the same directory as this code\n", "from utils import load_data, one_hot_encode\n", "\n", - "# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n", - "X_train = load_data('./data/mnist/train-images.gz', False) / 255.0\n", - "y_train = load_data('./data/mnist/train-labels.gz', True).reshape(-1)\n", - "\n", - "X_test = load_data('./data/mnist/test-images.gz', False) / 255.0\n", - "y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n", + "# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the model converge faster.\n", + "X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0\n", + "X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0\n", + "y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1)\n", + "y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1)\n", "\n", + "# now let's show some randomly chosen images from the training set.\n", "count = 0\n", "sample_size = 30\n", "plt.figure(figsize = (16, 6))\n", @@ -197,8 +196,8 @@ " plt.subplot(1, sample_size, count)\n", " plt.axhline('')\n", " plt.axvline('')\n", - " plt.text(x = 10, y = -10, s = y_train[i], fontsize = 18)\n", - " plt.imshow(X_train[i].reshape(28, 28), cmap = plt.cm.Greys)\n", + " plt.text(x=10, y=-10, s=y_train[i], fontsize=18)\n", + " plt.imshow(X_train[i].reshape(28, 28), cmap=plt.cm.Greys)\n", "plt.show()" ] }, @@ -206,8 +205,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Upload MNIST dataset to default datastore \n", - "A [datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data) is a place where data can be stored that is then made accessible to a Run either by means of mounting or copying the data to the compute target. A datastore can either be backed by an Azure Blob Storage or and Azure File Share (ADLS will be supported in the future). For simple data handling, each workspace provides a default datastore that can be used, in case the data is not already in Blob Storage or File Share." + "Now you have an idea of what these images look like and the expected prediction outcome." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "defe921f-8097-44c3-8336-8af6700804a7" + } + }, + "source": [ + "## Create a FileDataset\n", + "A FileDataset references one or multiple files in your datastores or public urls. The files can be of any format. FileDataset provides you with the ability to download or mount the files to your compute. By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. [Learn More](https://aka.ms/azureml/howto/createdatasets)" ] }, { @@ -216,14 +226,22 @@ "metadata": {}, "outputs": [], "source": [ - "ds = ws.get_default_datastore()" + "from azureml.core.dataset import Dataset\n", + "\n", + "web_paths = [\n", + " 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',\n", + " 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',\n", + " 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',\n", + " 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'\n", + " ]\n", + "dataset = Dataset.File.from_files(path = web_paths)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In this next step, we will upload the training and test set into the workspace's default datastore, which we will then later be mount on an `AmlCompute` cluster for training." + "Use the `register()` method to register datasets to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script." ] }, { @@ -232,7 +250,10 @@ "metadata": {}, "outputs": [], "source": [ - "ds.upload(src_dir='./data/mnist', target_path='mnist', overwrite=True, show_progress=True)" + "dataset = dataset.register(workspace = ws,\n", + " name = 'mnist dataset',\n", + " description='training and test dataset',\n", + " create_new_version=True)" ] }, { @@ -345,7 +366,7 @@ "source": [ "### Azure ML concepts \n", "Please note the following three things in the code below:\n", - "1. The script accepts arguments using the argparse package. In this case there is one argument `--data_folder` which specifies the file system folder in which the script can find the MNIST data\n", + "1. The script accepts arguments using the argparse package. In this case there is one argument `--data_folder` which specifies the FileDataset in which the script can find the MNIST data\n", "```\n", " parser = argparse.ArgumentParser()\n", " parser.add_argument('--data_folder')\n", @@ -384,6 +405,36 @@ "The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed. In this case, we add `keras` package (for the Keras framework obviously), and `matplotlib` package for plotting a \"Loss vs. Accuracy\" chart and record it in run history." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.dataset import Dataset\n", + "\n", + "dataset = Dataset.get_by_name(ws, 'mnist dataset')\n", + "\n", + "# list the files referenced by mnist dataset\n", + "dataset.to_path()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.environment import Environment\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "# set up environment\n", + "env = Environment('my_env')\n", + "cd = CondaDependencies.create(pip_packages=['keras','azureml-sdk','tensorflow-gpu','matplotlib','azureml-dataprep[pandas,fuse]>=1.1.14'])\n", + "\n", + "env.python.conda_dependencies = cd" + ] + }, { "cell_type": "code", "execution_count": null, @@ -393,7 +444,7 @@ "from azureml.train.dnn import TensorFlow\n", "\n", "script_params = {\n", - " '--data-folder': ds.path('mnist').as_mount(),\n", + " '--data-folder': dataset.as_named_input('mnist').as_mount(),\n", " '--batch-size': 50,\n", " '--first-layer-neurons': 300,\n", " '--second-layer-neurons': 100,\n", @@ -403,25 +454,8 @@ "est = TensorFlow(source_directory=script_folder,\n", " script_params=script_params,\n", " compute_target=compute_target, \n", - " pip_packages=['keras', 'matplotlib'],\n", " entry_script='keras_mnist.py', \n", - " use_gpu=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And if you are curious, this is what the mounting point looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(ds.path('mnist').as_mount())" + " environment_definition= env)" ] }, { @@ -698,11 +732,10 @@ "outputs": [], "source": [ "est = TensorFlow(source_directory=script_folder,\n", - " script_params={'--data-folder': ds.path('mnist').as_mount()},\n", + " script_params={'--data-folder': dataset.as_named_input('mnist').as_mount()},\n", " compute_target=compute_target,\n", - " pip_packages=['keras', 'matplotlib'],\n", " entry_script='keras_mnist.py', \n", - " use_gpu=True)" + " environment_definition= env)" ] }, { @@ -911,7 +944,7 @@ "metadata": {}, "source": [ "### Deploy to ACI\n", - "We are almost ready to deploy. Create a deployment configuration and specify the number of CPUs and gigbyte of RAM needed for your ACI container. " + "We are almost ready to deploy. Create the inference configuration and deployment configuration and deploy to ACI. This cell will run for about 7-8 minutes." ] }, { @@ -921,73 +954,35 @@ "outputs": [], "source": [ "from azureml.core.webservice import AciWebservice\n", - "\n", - "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", - " auth_enabled=True, # this flag generates API keys to secure access\n", - " memory_gb=1, \n", - " tags={'name':'mnist', 'framework': 'Keras'},\n", - " description='Keras MLP on MNIST')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Deployment Process\n", - "Now we can deploy. **This cell will run for about 7-8 minutes**. Behind the scene, it will do the following:\n", - "1. **Build Docker image** \n", - "Build a Docker image using the scoring file (`score.py`), the environment file (`myenv.yml`), and the `model` object. \n", - "2. **Register image** \n", - "Register that image under the workspace. \n", - "3. **Ship to ACI** \n", - "And finally ship the image to the ACI infrastructure, start up a container in ACI using that image, and expose an HTTP endpoint to accept REST client calls." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.image import ContainerImage\n", - "\n", - "imgconfig = ContainerImage.image_configuration(execution_script=\"score.py\", \n", - " runtime=\"python\", \n", - " conda_file=\"myenv.yml\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", + "from azureml.core.model import InferenceConfig\n", "from azureml.core.webservice import Webservice\n", + "from azureml.core.model import Model\n", "\n", - "service = Webservice.deploy_from_model(workspace=ws,\n", - " name='keras-mnist-svc',\n", - " deployment_config=aciconfig,\n", - " models=[model],\n", - " image_config=imgconfig)\n", + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"score.py\",\n", + " conda_file=\"myenv.yml\")\n", "\n", - "service.wait_for_deployment(show_output=True)" + "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n", + " auth_enabled=True, # this flag generates API keys to secure access\n", + " memory_gb=1,\n", + " tags={'name': 'mnist', 'framework': 'Keras'},\n", + " description='Keras MLP on MNIST')\n", + "\n", + "service = Model.deploy(workspace=ws, \n", + " name='keras-mnist-svc', \n", + " models=[model], \n", + " inference_config=inference_config, \n", + " deployment_config=aciconfig)\n", + "\n", + "service.wait_for_deployment(True)\n", + "print(service.state)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(service.get_logs())" + "**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:** `print(service.get_logs())`" ] }, { @@ -1047,7 +1042,7 @@ " font_color = 'red' if y_test[s] != result[i] else 'black'\n", " clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n", " \n", - " plt.text(x=10, y=-10, s=y_hat[s], fontsize=18, color=font_color)\n", + " plt.text(x=10, y=-10, s=y_test[s], fontsize=18, color=font_color)\n", " plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n", " \n", " i = i + 1\n", @@ -1106,8 +1101,7 @@ "metadata": {}, "source": [ "Let's look at the workspace after the web service was deployed. You should see \n", - "* a registered model named 'keras-mlp-mnist' and with the id 'model:1'\n", - "* an image called 'keras-mnist-svc' and with a docker image location pointing to your workspace's Azure Container Registry (ACR) \n", + "* a registered model named 'keras-mlp-mnist' and with the id 'model:1' \n", "* a webservice called 'keras-mnist-svc' with some scoring URL" ] }, @@ -1121,10 +1115,6 @@ "for name, model in models.items():\n", " print(\"Model: {}, ID: {}\".format(name, model.id))\n", " \n", - "images = ws.images\n", - "for name, image in images.items():\n", - " print(\"Image: {}, location: {}\".format(name, image.image_location))\n", - " \n", "webservices = ws.webservices\n", "for name, webservice in webservices.items():\n", " print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))" @@ -1169,7 +1159,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/how-to-use-azureml/training/README.md b/how-to-use-azureml/training/README.md index d32fd1b1..39d2d6b7 100644 --- a/how-to-use-azureml/training/README.md +++ b/how-to-use-azureml/training/README.md @@ -9,4 +9,4 @@ Follow these sample notebooks to learn: 5. [Train in an HDI Spark cluster](train-in-spark): train a Spark ML model using an HDInsight Spark cluster as compute target. 6. [Train and hyperparameter tune on Iris Dataset with Scikit-learn](train-hyperparameter-tune-deploy-with-sklearn): train a model using the Scikit-learn estimator and tune hyperparameters with Hyperdrive. - ![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/training/README.png) \ No newline at end of file + ![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/training/README.png) diff --git a/how-to-use-azureml/work-with-data/datasets/README.md b/how-to-use-azureml/work-with-data/datasets/README.md index a08adf6c..2a4adb53 100644 --- a/how-to-use-azureml/work-with-data/datasets/README.md +++ b/how-to-use-azureml/work-with-data/datasets/README.md @@ -12,9 +12,10 @@ With Azure Machine Learning datasets, you can: ## Learn how to use Azure Machine Learning datasets: * [Create and register datasets](https://aka.ms/azureml/howto/createdatasets) +* Use [Datasets in training](datasets-tutorial/train-with-datasets.ipynb) * Use TabularDatasets in [automated machine learning training](https://aka.ms/automl-dataset) -* Use TabularDatasets in [training](https://aka.ms/tabulardataset-samplenotebook) -* Use FileDatasets in [training](https://aka.ms/filedataset-samplenotebook) +* Use FileDatasets in [image classification](https://aka.ms/filedataset-samplenotebook) +* Use FileDatasets in [deep learning with hyperparameter tuning](https://aka.ms/filedataset-hyperdrive) * For existing Dataset users: [Dataset API change notice](dataset-api-change-notice.md) diff --git a/how-to-use-azureml/work-with-data/datasets/datasets-diff/datasets-diff.ipynb b/how-to-use-azureml/work-with-data/datasets/datasets-diff/datasets-diff.ipynb new file mode 100644 index 00000000..66b5d5f0 --- /dev/null +++ b/how-to-use-azureml/work-with-data/datasets/datasets-diff/datasets-diff.ipynb @@ -0,0 +1,796 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks//notebooks/work-with-data/datasets/datasets-tutorial/datasets-diff.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#
Detect drift using Dataset Diff API
\n", + "\n", + "
\n", + "\n", + " This notebook provides step by step instructions on how to compare two different datasets. It includes two parts\u00ef\u00bc\u0161\n", + "
    ☑ compare two datasets using local compute;\n", + "
    ☑ compare two datasets remotely using Azure ML compute.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prerequisites and Setup\n", + "\n", + "This section is shared by both local and remote execution, you may need duplicate this section if splitting this notebook into separate local/remote notebooks.\n", + "\n", + "\n", + "## Prerequisites\n", + "\n", + "### Install Supporting Packages" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "scrolled": true + }, + "source": [ + "    pip install scipy
\n", + "    pip install tqdm
\n", + "    pip install pandas
\n", + "    pip install pyarrow
\n", + "    pip install ipywidgets
\n", + "    pip install lightgbm
\n", + "    pip install matplotlib
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install AzureML Packages" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "scrolled": true + }, + "source": [ + "    pip install --user azureml-core
\n", + "\n", + "    pip install --user azureml-opendatasets
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import warnings\n", + "import requests\n", + "import pandas as pd\n", + "import numpy as np\n", + "import ipywidgets as widgets\n", + "\n", + "import azureml.core\n", + "\n", + "from io import StringIO\n", + "from tqdm import tqdm\n", + "from IPython import display\n", + "from datetime import datetime, timedelta\n", + "from azureml.core import Datastore, Dataset\n", + "from azureml.opendatasets import NoaaIsdWeather\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Declare Variables For Demo\n", + "\n", + "Feel free to customize them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "year = 2016\n", + "month = 1\n", + "date = 1\n", + "b_days = 2 # for baseline\n", + "t_days = 7 # for target\n", + "\n", + "local_folder = \"demo\"\n", + "baseline_file = 'baseline.csv'\n", + "\n", + "feature_columns = ['usaf', 'wban', 'latitude', 'longitude', 'elevation', 'temperature', 'p_k']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Datasets\n", + "\n", + "The diff calcualtion is always between two datasets, here for demo, we use \"baseline\" and \"target\" to present them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs(local_folder, exist_ok=True)\n", + "\n", + "local_baseline = os.path.join(local_folder, baseline_file)\n", + "\n", + "start_date = datetime(year, month, date)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare Baseline Dataset\n", + "Retrieve wether data from NOAA for declared days (b_days declared in above cell). It may takes 2 minutes for 2 days." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "start = start_date\n", + "isd = NoaaIsdWeather(start, start + timedelta(days=b_days))\n", + "\n", + "baseline_df = isd.to_pandas_dataframe()\n", + "baseline_df.head()\n", + "\n", + "baseline_df.to_csv(local_baseline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare Target Dataset(s)\n", + "\n", + "Retrieve wether data from NOAA for declared days (t_days declared in above cell). It may takes 5 minutes for 7 days." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for day in tqdm(range(0, t_days)):\n", + " start = start_date + timedelta(days=day)\n", + " isd = NoaaIsdWeather(start, start + timedelta(days=1))\n", + "\n", + " target_df = isd.to_pandas_dataframe()\n", + " target_df = target_df[feature_columns]\n", + " target_df.to_csv(os.path.join(local_folder, 'target_{}.csv'.format(day)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predefine Methods For Result Processing\n", + "\n", + "## Parse and Present Datasets' Diff Results\n", + "\n", + "Each diff result is a list of \"DiffMetric\" objects. Typically each objec present a detailed measurement output for a specific column.\n", + "

Below is an example of \"DiffMetric\" object:
\n", + "\n", + "
    {  \n", + "
       'name':'percentage_difference_median',                        --> measurement name\n", + "
       'value':0.01270670472603889,                                  --> the result value a number to indicate how big the diff is for current measurement.\n", + "
       'extended_properties':{  \n", + "

          'action_id':'3d3da05d-0871-4cc9-93cb-f43859aae13b',        --> (remote calculation only) action id\n", + "
          'from_dataset_id':'12edc566-8803-4e0f-ba91-c2ee05eeddee',  --> (remote calculation only) baseline dataset\n", + "
          'from_dataset_version':'1',                                --> (remote calculation only) baseline version\n", + "
          'to_dataset_id':'9b85c9ba-50c2-4227-a9bc-91dee4a18228',    --> (remote calculation only) target dataset\n", + "
          'to_dataset_version':'1',                                  --> (remote calculation only) target version\n", + "

          'column_name':'elevation',                                 --> column name in dataset, 
                                                                         could be ['name':'datadrift_coefficient'] for dataset level diff\n", + "
          'metric_category':'profile_diff'                           --> category, could be :
                                                                             dataset_drift (dataset level)
                                                                             profile_diff (column level)
                                                                             statistical_distance (column level)\n", + "
       }\n", + "
    }\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def parse_result(rst, columns, measurements):\n", + " columnlist = list(columns)\n", + " columnlist.insert(0, \"measurements \\ columns\")\n", + " measurementlist = list(measurements)\n", + " \n", + " daily_result = []\n", + " daily_result.append(columnlist)\n", + " \n", + " drift = None\n", + " daily_contribution = {}\n", + " \n", + " for m in measurements:\n", + " emptylist = ([''] * len(columns))\n", + " emptylist.insert(0, m)\n", + " daily_result.append(emptylist)\n", + "\n", + " for r in rst:\n", + " # get dataset level diff (drift)\n", + " if r.name == \"datadrift_coefficient\":\n", + " drift = r.value\n", + " # get diff (drift) contribution for each column:\n", + " elif r.name == \"datadrift_contribution\":\n", + " daily_contribution[r.extended_properties[\"column_name\"]] = r.value\n", + " # get column level diff measurements\n", + " else:\n", + " if \"column_name\" in r.extended_properties:\n", + " col = r.extended_properties[\"column_name\"]\n", + " msm = r.name\n", + " val = r.value\n", + " cid = columnlist.index(col)\n", + " kid = measurementlist.index(msm) + 1\n", + " daily_result[kid][cid] = val\n", + "\n", + " return daily_result, drift, daily_contribution" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Present Dataset Level Diff (aka drift)\n", + "\n", + "This method will generate two graphs, the left graph presents dataset level difference for all compared baseline-target pairs, the right graph presents dataset level difference contribution for each column so that we know which column impacts more." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import matplotlib.dates as mdates\n", + "import matplotlib.pyplot as plt \n", + "import matplotlib as mpl\n", + "\n", + "def show_diff(drift_metrics, dates, columns, drift_contributions, summary_contribute, bottoms_contribute):\n", + " drifts = [drift_metrics[day] for day in drift_metrics]\n", + " daily_summary_contribution = list(summary_contribute.values())\n", + " xrange = pd.date_range(dates[0], dates[-1], freq='D')\n", + "\n", + " figure = plt.figure(figsize=(16, 4))\n", + " plt.tight_layout()\n", + "\n", + " # left graph\n", + " ax1 = plt.subplot(1, 2, 1)\n", + " ax1.grid()\n", + " plt.sca(ax1)\n", + " plt.title(\"Diff(Drift) Trend\\n\", fontsize=20)\n", + " plt.xticks(rotation=30)\n", + " plt.xlabel(\"Date\", fontsize=16)\n", + " plt.ylabel(\"Drift Coefficent\", fontsize=16)\n", + " plt.plot_date(dates, drifts, '-r', marker='.', linewidth=0.5, markersize=5)\n", + "\n", + " # right graph\n", + " ax2 = plt.subplot(1, 2, 2)\n", + " plt.sca(ax2)\n", + " plt.title(\"Drift Contribution of columns\\n\", fontsize=20)\n", + " plt.xticks(xrange, rotation=30)\n", + " plt.xlabel(\"Date\", fontsize=16)\n", + " plt.ylabel(\"Drift Contribution\", fontsize=16)\n", + "\n", + " yvals = ax2.get_yticks()\n", + " ax2.set_yticklabels(['{:,.2%}'.format(v) for v in yvals])\n", + " ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y%m-%d'))\n", + "\n", + " for c in columns:\n", + " contribution = []\n", + " for dt in drift_contributions:\n", + " contribution.append(drift_contributions[dt][c])\n", + " bar_ratio = [x / y for x, y in zip(contribution, daily_summary_contribution)]\n", + "\n", + " ax2.bar(dates, height=bar_ratio, bottom=bottoms_contribute)\n", + " bottoms_contribute = [x + y for x, y in zip(bottoms_contribute, bar_ratio)]\n", + "\n", + " plt.legend(columns)\n", + "\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Execute Datasets' Diff Calculation Locally\n", + "\n", + "Local execution let you to run in a Jupyter Notebook or Code editor in a local computer.\n", + "\n", + "## Calculate Dataset Diff At Local\n", + "\n", + "### Create Baseline Dataset\n", + "\n", + "Create baseline dataset object from the retrieved baseline data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Dataset\n", + "\n", + "baseline = Dataset.auto_read_files(local_baseline, include_path=True)\n", + "\n", + "# The baseline data is not filtered by feature columns list, thus all retrieved data columns will be listed below.\n", + "# You'll see \"Column1\" in the output, which is a default name added when the original column is not available.\n", + "baseline.get_profile()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Target Datasets\n", + "\n", + "Create target dataset objects from retrieved target data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "targets = {}\n", + "\n", + "for day in tqdm(range(0, t_days)):\n", + " target = Dataset.auto_read_files(os.path.join(local_folder, 'target_{}.csv'.format(day)))\n", + " targets[day] = target" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculate Diff Between Each Target Dataset And Baseline Dataset\n", + "\n", + "Compare each target dataset with baseline dataset to calculate diff between them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buf = {}\n", + "\n", + "columns = set()\n", + "measurements = set()\n", + "\n", + "for day in tqdm(range(0, t_days)):\n", + " diff_action = baseline.diff(rhs_dataset=targets[day])\n", + " diff_action.wait_for_completion()\n", + " \n", + " dt = (start_date + timedelta(days=day)).strftime(\"%Y-%m-%d\")\n", + " buf[dt] = diff_action._result\n", + " \n", + " for r in diff_action._result:\n", + " if r.name not in measurements:\n", + " measurements.add(r.name)\n", + " if \"column_name\" in r.extended_properties and r.extended_properties[\"column_name\"] not in columns:\n", + " columns.add(r.extended_properties[\"column_name\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Parse And Present Local Execution Results\n", + "\n", + "\n", + "
The diff outputs usually contains two different level information:\n", + "
    1. General diff, aka dataset level diff. The output is a number between 0 and 1 to indicate what level the diff is. This dataset level diff is also called drift between two datasets.\n", + "
    2. Detailed diff, aka column level diff. The output is a metrics organized like a 2-D array. One dimension is column names, that is why it's in column level. The other dimension are measurements. The diff calculation actually includes variuos measurements from different perspectives, each measurement will generate an index for each column to present how big impacts this column contributed.\n", + "
\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parse and List Column Level Diff Results\n", + "\n", + "Here will iteratively list all details per each measurement per column calculated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas import DataFrame\n", + "\n", + "dates = []\n", + "drift_metrics = {}\n", + "drift_contributions = {}\n", + "summary_contribute = {}\n", + "bottoms_contribute = []\n", + "\n", + "for dt, rst in buf.items():\n", + " dates.append(dt)\n", + " print(\"\\n---------------------------------------- Result of {} ----------------------------------------\".format(dt))\n", + " \n", + " daily_result, drift, daily_contribution = parse_result(rst, columns, measurements)\n", + " drift_metrics[dt] = drift\n", + " drift_contributions[dt] = daily_contribution\n", + "\n", + " sum_contribution = 0\n", + " bottoms_contribute.append(0)\n", + " for col, val in daily_contribution.items():\n", + " sum_contribution += val\n", + " summary_contribute[dt] = sum_contribution\n", + "\n", + " \n", + " display.display(pd.DataFrame(daily_result))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Present Dataset Level Diff (aka drift) In Graphs\n", + "\n", + "The left graph presents dataset level difference for all compared baseline-target pairs, the right graph presents dataset level difference contribution for each column so that we know which column impacts more." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "show_diff(drift_metrics, dates, columns, drift_contributions, summary_contribute, bottoms_contribute)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Excute Datasets's Diff Calculation Remotely\n", + "\n", + "Remote execution let you to data compare on more powerful computes - Machine Learning Compute clusters." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Remote Environment\n", + "### Get Workspace\n", + "\n", + "
If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, check the configuration notebook first if you haven't already to establish your connection to the AzureML Workspace.\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "from azureml.core.authentication import InteractiveLoginAuthentication\n", + "\n", + "ws = Workspace.from_config()\n", + "\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep=\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Compute Resource For Calculation\n", + "Check if compute resouce exists and create a new one if not." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import AmlCompute, ComputeTarget\n", + "\n", + "existing = False\n", + "del_cmpt = False\n", + "cts = ws.compute_targets\n", + "\n", + "if (ws.DEFAULT_CPU_CLUSTER_NAME in cts and cts[ws.DEFAULT_CPU_CLUSTER_NAME].type == 'AmlCompute'):\n", + " existing = True\n", + " aml_compute = cts[ws.DEFAULT_CPU_CLUSTER_NAME]\n", + " \n", + "if not existing:\n", + " aml_compute = AmlCompute.create(ws,ws.DEFAULT_CPU_CLUSTER_NAME,ws.DEFAULT_CPU_CLUSTER_CONFIGURATION)\n", + " aml_compute.wait_for_completion(show_output=True)\n", + " del_cmpt = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upload Sample Data To Datastore\n", + "\n", + "Upload data files to the blob storage in Azure ML workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Datastore, Dataset\n", + "import azureml.data\n", + "from azureml.data.azure_storage_datastore import AzureFileDatastore, AzureBlobDatastore\n", + "\n", + "remote_data_path ='demo'\n", + "\n", + "dstore = ws.get_default_datastore()\n", + "dstore.upload_files([local_baseline],\n", + " target_path=remote_data_path,\n", + " overwrite=True,\n", + " show_progress=True)\n", + "\n", + "for day in tqdm(range(0, t_days)):\n", + " target_file = os.path.join(local_folder, 'target_{}.csv'.format(day))\n", + " dstore.upload_files([target_file],\n", + " target_path=remote_data_path,\n", + " overwrite=True,\n", + " show_progress=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register DataSets\n", + "\n", + "Create and Register Datasets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Datastore, Dataset\n", + "dstore = ws.get_default_datastore()\n", + "\n", + "xpath = remote_data_path + '/' + baseline_file\n", + "toregister_baseline = Dataset.from_delimited_files(dstore.path(xpath))\n", + "registered_baseline = toregister_baseline.register(workspace = ws,\n", + " name = 'dataset baseline for diff demo',\n", + " description = 'dataset baseline for diff comparison',\n", + " exist_ok = True,\n", + " update_if_exist = True\n", + " )\n", + "\n", + "registered_targets = {}\n", + "for day in tqdm(range(0, t_days)):\n", + " target_file = 'target_{}.csv'.format(day)\n", + " toregister_target = Dataset.from_delimited_files(dstore.path(remote_data_path + '/' + target_file))\n", + " registered_target = toregister_target.register(workspace = ws,\n", + " name = 'dataset target-{} for diff demo'.format(day),\n", + " description = 'target target-{} for diff comparison'.format(day),\n", + " exist_ok = True,\n", + " update_if_exist = True\n", + " )\n", + " registered_targets[day] = registered_target" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculate Dataset Diff Remotely\n", + "\n", + "Perform the calculation remotely. This may take 20 minutes.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_diffs = {}\n", + "\n", + "r_columns = set()\n", + "r_measurements = set()\n", + "\n", + "for day, registered_target in registered_targets.items():\n", + " dt = (start_date + timedelta(days=day)).strftime(\"%Y-%m-%d\")\n", + " remote_diff = registered_baseline.diff(registered_target, compute_target=ws.DEFAULT_CPU_CLUSTER_NAME)\n", + " remote_diff.wait_for_completion()\n", + " \n", + " remote_diffs[dt] = remote_diff.get_result()\n", + " \n", + " for r in remote_diff.get_result():\n", + " if r.name not in r_measurements:\n", + " r_measurements.add(r.name)\n", + " if \"column_name\" in r.extended_properties and r.extended_properties[\"column_name\"] not in r_columns:\n", + " r_columns.add(r.extended_properties[\"column_name\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Parse And Present Remote Execution Results\n", + "\n", + "### Parse And List Column Level Diff Results\n", + "\n", + "Here will iteratively list all details per each measurement per column calculated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas import DataFrame\n", + "\n", + "r_dates = []\n", + "r_drift_metrics = {}\n", + "r_drift_contributions = {}\n", + "r_summary_contribute = {}\n", + "r_bottoms_contribute = []\n", + "\n", + "for dt, rst in remote_diffs.items():\n", + " r_dates.append(dt)\n", + " print(\"\\n---------------------------------------- Result of {} ----------------------------------------\".format(dt))\n", + " \n", + " daily_result, drift, daily_contribution = parse_result(rst, r_columns, r_measurements)\n", + " r_drift_metrics[dt] = drift\n", + " r_drift_contributions[dt] = daily_contribution\n", + "\n", + " sum_contribution = 0\n", + " r_bottoms_contribute.append(0)\n", + " for col, val in daily_contribution.items():\n", + " sum_contribution += val\n", + " r_summary_contribute[dt] = sum_contribution\n", + "\n", + " \n", + " display.display(pd.DataFrame(daily_result))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Present Dataset Level Diff (aka drift) In Graphs\n", + "\n", + "The left graph presents dataset level difference for all compared baseline-target pairs, the right graph presents dataset level difference contribution for each column so that we know which column impacts more." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "show_diff(r_drift_metrics, r_dates, r_columns, r_drift_contributions, r_summary_contribute, r_bottoms_contribute)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean Resources Created" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if del_cmpt == True:\n", + " try:\n", + " aml_compute.delete()\n", + " aml_compute.wait_for_completion()\n", + " except Exception as e:\n", + " if 'ComputeTargetNotFound' in e.message:\n", + " print(\"Compute target deleted.\")\n", + " del_cmpt = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Reference\n", + "\n", + "Detailed description of Dataset Diff attribute can be found at
\n", + "https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.dataset(class)?view=azure-ml-py#diff-rhs-dataset--compute-target-none--columns-none-" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "davx" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License." + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/file-dataset-img-classification.ipynb b/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/file-dataset-img-classification.ipynb index 3f066658..d2d99569 100644 --- a/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/file-dataset-img-classification.ipynb +++ b/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/file-dataset-img-classification.ipynb @@ -713,4 +713,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/tabular-timeseries-dataset-filtering.ipynb b/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/tabular-timeseries-dataset-filtering.ipynb index 3339e46f..0672e5d5 100644 --- a/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/tabular-timeseries-dataset-filtering.ipynb +++ b/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/tabular-timeseries-dataset-filtering.ipynb @@ -1,544 +1,544 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Tabular Time Series Related API Demo with NOAA Weather Data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright (c) Microsoft Corporation. All rights reserved.
\n", - "Licensed under the MIT License." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this notebook, you will learn how to use the Tabular Time Series related API to filter the data by time windows for sample data uploaded to Azure blob storage. \n", - "\n", - "The detailed APIs to be demoed in this script are:\n", - "- Create Tabular Dataset instance\n", - "- Assign fine timestamp column and coarse timestamp column for Tabular Dataset to activate Time Series related APIs\n", - "- Clear fine timestamp column and coarse timestamp column\n", - "- Filter in data before a specific time\n", - "- Filter in data after a specific time\n", - "- Filter in data in a specific time range\n", - "- Filter in data for recent time range\n", - "\n", - "Besides above APIs, you'll also see:\n", - "- Create and load a Workspace\n", - "- Load National Oceanic & Atmospheric (NOAA) weather data into Azure blob storage\n", - "- Create and register NOAA weather data as a Tabular dataset\n", - "- Re-load Tabular Dataset from your Workspace" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import Dependencies\n", - "\n", - "If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, run the cells below to install the Azure Machine Learning Python SDK and create an Azure ML Workspace that's required for this demo." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prepare Environment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print out your version of the Azure ML Python SDK. Version 1.0.60 or above is required for TabularDataset with timeseries attribute. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import azureml.core\n", - "azureml.data.__version__" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import Packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# import packages\n", - "import os\n", - "\n", - "import pandas as pd\n", - "\n", - "from calendar import monthrange\n", - "from datetime import datetime, timedelta\n", - "\n", - "from azureml.core import Dataset, Datastore, Workspace, Run\n", - "from azureml.opendatasets import NoaaIsdWeather" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Set up Configuraton and Create Azure ML Workspace\n", - "\n", - "If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) first if you haven't already to establish your connection to the Azure ML Workspace." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ws = Workspace.from_config()\n", - "dstore = ws.get_default_datastore()\n", - "\n", - "dset_name = 'weather-data-florida'\n", - "\n", - "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, dstore.name, sep = '\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Data to Blob Storage\n", - "\n", - "This demo uses public NOAA weather data. You can replace this data with your own. The first cell below creates a Pandas Dataframe object with the first 6 months of 2019 NOAA weather data. The last cell saves the data to a CSV file and uploads the CSV file to Azure blob storage to the location specified in the datapath variable. Currently, the Dataset class only reads uploaded files from blob storage. \n", - "\n", - "**NOTE:** to reduce the size of data, we will only keep specific rows with a given stationName." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "target_years = [2019]\n", - "\n", - "for year in target_years:\n", - " for month in range(1, 12+1):\n", - " path = 'data/{}/{:02d}/'.format(year, month)\n", - " \n", - " try: \n", - " start = datetime(year, month, 1)\n", - " end = datetime(year, month, monthrange(year, month)[1]) + timedelta(days=1)\n", - " isd = NoaaIsdWeather(start, end).to_pandas_dataframe()\n", - " isd = isd[isd['stationName'].str.contains('FLORIDA', regex=True, na=False)]\n", - " \n", - " os.makedirs(path, exist_ok=True)\n", - " isd.to_parquet(path + 'data.parquet')\n", - " except Exception as e:\n", - " print('Month {} in year {} likely has no data.\\n'.format(month, year))\n", - " print('Exception: {}'.format(e))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Upload data to blob storage so it can be used as a Dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dstore.upload('data', dset_name, overwrite=True, show_progress=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create & Register Tabular Dataset with time-series trait from Blob\n", - "\n", - "The API on Tabular datasets with time-series trait is specially designed to handle Tabular time-series data and time related operations more efficiently. By registering your time-series dataset, you are publishing your dataset to your workspace so that it is accessible to anyone with the same subscription id. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create Tabular Dataset instance from blob storage datapath.\n", - "\n", - "**TIP:** you can set virtual columns in the partition_format. I.e. if you partition the weather data by state and city, the path can be '/{STATE}/{CITY}/{coarse_time:yyy/MM}/data.parquet'. STATE and CITY would then appear as virtual columns in the dataset, allowing for efficient filtering by these grains. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "datastore_path = [(dstore, dset_name + '/*/*/data.parquet')]\n", - "dataset = Dataset.Tabular.from_parquet_files(path=datastore_path, partition_format = dset_name + '/{coarse_time:yyyy/MM}/data.parquet')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Assign fine timestamp column for Tabular Dataset to activate Time Series related APIs. The column to be assigned should be a Date type, otherwise the assigning will fail." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# for this demo, leave out coarse_time so fine_grain_timestamp is used\n", - "tsd = dataset.with_timestamp_columns(fine_grain_timestamp='datetime') # , coarse_grain_timestamp='coarse_time')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Register the dataset for easy access from anywhere in Azure ML and to keep track of versions, lineage. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# register dataset to Workspace\n", - "registered_ds = tsd.register(ws, dset_name, create_new_version=True, description='Data for Tabular Dataset with time-series trait demo.', tags={ 'type': 'TabularDataset' })" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Reload the Dataset from Workspace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# get dataset by dataset name\n", - "tsd = Dataset.get_by_name(ws, name=dset_name)\n", - "tsd.to_pandas_dataframe().head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Filter Data by Time Windows\n", - "\n", - "Once your data has been loaded into the notebook, you can query by time using the time_before(), time_after(), time_between(), and time_recent() functions. You can also choose to drop or keep certain columns. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Before Time Input" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# select data that occurs before a specified date\n", - "tsd2 = tsd.time_before(datetime(2019, 6, 12))\n", - "tsd2.to_pandas_dataframe().tail(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## After Time Input" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# select data that occurs after a specified date\n", - "tsd2 = tsd.time_after(datetime(2019, 5, 30))\n", - "tsd2.to_pandas_dataframe().head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Before & After Time Inputs\n", - "\n", - "You can chain time functions together." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**NOTE:** You must set the coarse_grain_timestamp to None to filter on the fine_grain_timestamp. The below cell will fail unless the second line is uncommented " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# select data that occurs within a given time range\n", - "#tsd = tsd.with_timestamp_columns(fine_grain_timestamp='datetime', coarse_grain_timestamp=None)\n", - "tsd2 = tsd.time_after(datetime(2019, 1, 2)).time_before(datetime(2019, 1, 10))\n", - "tsd2.to_pandas_dataframe().head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Time Range Input" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# another way to select data that occurs within a given time range\n", - "tsd2 = tsd.time_between(start_time=datetime(2019, 1, 31, 23, 59, 59), end_time=datetime(2019, 2, 7))\n", - "tsd2.to_pandas_dataframe().head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Time Recent Input" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function takes in a datetime.timedelta and returns a dataset containing the data from datetime.now()-timedelta() to datetime.now()." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tsd2 = tsd.time_recent(timedelta(weeks=5, days=0))\n", - "tsd2.to_pandas_dataframe().head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**NOTE:** This will return an empty dataframe there is no data within the last 2 days." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tsd2 = tsd.time_recent(timedelta(days=2))\n", - "tsd2.to_pandas_dataframe().tail(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Drop Columns" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The columns to be dropped should NOT include timstamp columns.
Below operation will lead to exception." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " tsd2 = tsd.drop_columns(columns=['snowDepth', 'version', 'datetime'])\n", - "except Exception as e:\n", - " print('Expected exception : {}'.format(str(e)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Drop will succeed if modify column list to exclude timestamp columns." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tsd2 = tsd.drop_columns(columns=['snowDepth', 'version', 'upload_date'])\n", - "tsd2.take(5).to_pandas_dataframe().sort_values(by='datetime')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Keep Columns" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The columns to be kept should ALWAYS include timstamp columns.
Below operation will lead to exception." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " tsd2 = tsd.keep_columns(columns=['snowDepth'], validate=False)\n", - "except Exception as e:\n", - " print('Expected exception : {}'.format(str(e)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Keep will succeed if modify column list to include timestamp columns." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tsd2 = tsd.keep_columns(columns=['snowDepth', 'datetime', 'coarse_time'], validate=False)\n", - "tsd2.to_pandas_dataframe().tail()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Resetting Timestamp Columns" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Rules for reseting are:\n", - "- You cannot assign 'None' to fine_grain_timestamp while assign a valid column name to coarse_grain_timestamp because coarse_grain_timestamp is optional while fine_grain_timestamp is mandatory for Tabular time series data.\n", - "- If you assign 'None' to fine_grain_timestamp, then both fine_grain_timestamp and coarse_grain_timestamp will all be cleared.\n", - "- If you assign only 'None' to coarse_grain_timestamp, then only coarse_grain_timestamp will be cleared." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Illegal clearing, exception is expected.\n", - "try:\n", - " tsd2 = tsd.with_timestamp_columns(fine_grain_timestamp=None, coarse_grain_timestamp='coarse_time')\n", - "except Exception as e:\n", - " print('Cleaning not allowed because {}'.format(str(e)))\n", - "\n", - "# clear both\n", - "tsd2 = tsd.with_timestamp_columns(fine_grain_timestamp=None, coarse_grain_timestamp=None)\n", - "print('after clean both with None/None, timestamp columns are: {}'.format(tsd2.timestamp_columns))\n", - "\n", - "# clear coarse_grain_timestamp only and assign 'datetime' as fine timestamp column\n", - "tsd2 = tsd2.with_timestamp_columns(fine_grain_timestamp='datetime', coarse_grain_timestamp=None)\n", - "print('after clean coarse timestamp column, timestamp columns are: {}'.format(tsd2.timestamp_columns))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/work-with-data/datasets/datasets-tutorial/datasets-tutorial.png)" - ] - } - ], - "metadata": { - "authors": [ - { - "name": "copeters" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tabular Time Series Related API Demo with NOAA Weather Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.
\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, you will learn how to use the Tabular Time Series related API to filter the data by time windows for sample data uploaded to Azure blob storage. \n", + "\n", + "The detailed APIs to be demoed in this script are:\n", + "- Create Tabular Dataset instance\n", + "- Assign fine timestamp column and coarse timestamp column for Tabular Dataset to activate Time Series related APIs\n", + "- Clear fine timestamp column and coarse timestamp column\n", + "- Filter in data before a specific time\n", + "- Filter in data after a specific time\n", + "- Filter in data in a specific time range\n", + "- Filter in data for recent time range\n", + "\n", + "Besides above APIs, you'll also see:\n", + "- Create and load a Workspace\n", + "- Load National Oceanic & Atmospheric (NOAA) weather data into Azure blob storage\n", + "- Create and register NOAA weather data as a Tabular dataset\n", + "- Re-load Tabular Dataset from your Workspace" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import Dependencies\n", + "\n", + "If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, run the cells below to install the Azure Machine Learning Python SDK and create an Azure ML Workspace that's required for this demo." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Environment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print out your version of the Azure ML Python SDK. Version 1.0.60 or above is required for TabularDataset with timeseries attribute. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import azureml.core\n", + "azureml.data.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import Packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import packages\n", + "import os\n", + "\n", + "import pandas as pd\n", + "\n", + "from calendar import monthrange\n", + "from datetime import datetime, timedelta\n", + "\n", + "from azureml.core import Dataset, Datastore, Workspace, Run\n", + "from azureml.opendatasets import NoaaIsdWeather" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up Configuraton and Create Azure ML Workspace\n", + "\n", + "If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) first if you haven't already to establish your connection to the Azure ML Workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "dstore = ws.get_default_datastore()\n", + "\n", + "dset_name = 'weather-data-florida'\n", + "\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, dstore.name, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data to Blob Storage\n", + "\n", + "This demo uses public NOAA weather data. You can replace this data with your own. The first cell below creates a Pandas Dataframe object with the first 6 months of 2019 NOAA weather data. The last cell saves the data to a CSV file and uploads the CSV file to Azure blob storage to the location specified in the datapath variable. Currently, the Dataset class only reads uploaded files from blob storage. \n", + "\n", + "**NOTE:** to reduce the size of data, we will only keep specific rows with a given stationName." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "target_years = [2019]\n", + "\n", + "for year in target_years:\n", + " for month in range(1, 12+1):\n", + " path = 'data/{}/{:02d}/'.format(year, month)\n", + " \n", + " try: \n", + " start = datetime(year, month, 1)\n", + " end = datetime(year, month, monthrange(year, month)[1]) + timedelta(days=1)\n", + " isd = NoaaIsdWeather(start, end).to_pandas_dataframe()\n", + " isd = isd[isd['stationName'].str.contains('FLORIDA', regex=True, na=False)]\n", + " \n", + " os.makedirs(path, exist_ok=True)\n", + " isd.to_parquet(path + 'data.parquet')\n", + " except Exception as e:\n", + " print('Month {} in year {} likely has no data.\\n'.format(month, year))\n", + " print('Exception: {}'.format(e))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Upload data to blob storage so it can be used as a Dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dstore.upload('data', dset_name, overwrite=True, show_progress=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create & Register Tabular Dataset with time-series trait from Blob\n", + "\n", + "The API on Tabular datasets with time-series trait is specially designed to handle Tabular time-series data and time related operations more efficiently. By registering your time-series dataset, you are publishing your dataset to your workspace so that it is accessible to anyone with the same subscription id. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create Tabular Dataset instance from blob storage datapath.\n", + "\n", + "**TIP:** you can set virtual columns in the partition_format. I.e. if you partition the weather data by state and city, the path can be '/{STATE}/{CITY}/{coarse_time:yyy/MM}/data.parquet'. STATE and CITY would then appear as virtual columns in the dataset, allowing for efficient filtering by these grains. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "datastore_path = [(dstore, dset_name + '/*/*/data.parquet')]\n", + "dataset = Dataset.Tabular.from_parquet_files(path=datastore_path, partition_format = dset_name + '/{coarse_time:yyyy/MM}/data.parquet')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Assign fine timestamp column for Tabular Dataset to activate Time Series related APIs. The column to be assigned should be a Date type, otherwise the assigning will fail." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for this demo, leave out coarse_time so fine_grain_timestamp is used\n", + "tsd = dataset.with_timestamp_columns(fine_grain_timestamp='datetime') # , coarse_grain_timestamp='coarse_time')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Register the dataset for easy access from anywhere in Azure ML and to keep track of versions, lineage. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# register dataset to Workspace\n", + "registered_ds = tsd.register(ws, dset_name, create_new_version=True, description='Data for Tabular Dataset with time-series trait demo.', tags={ 'type': 'TabularDataset' })" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reload the Dataset from Workspace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get dataset by dataset name\n", + "tsd = Dataset.get_by_name(ws, name=dset_name)\n", + "tsd.to_pandas_dataframe().head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filter Data by Time Windows\n", + "\n", + "Once your data has been loaded into the notebook, you can query by time using the time_before(), time_after(), time_between(), and time_recent() functions. You can also choose to drop or keep certain columns. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Before Time Input" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# select data that occurs before a specified date\n", + "tsd2 = tsd.time_before(datetime(2019, 6, 12))\n", + "tsd2.to_pandas_dataframe().tail(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## After Time Input" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# select data that occurs after a specified date\n", + "tsd2 = tsd.time_after(datetime(2019, 5, 30))\n", + "tsd2.to_pandas_dataframe().head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Before & After Time Inputs\n", + "\n", + "You can chain time functions together." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**NOTE:** You must set the coarse_grain_timestamp to None to filter on the fine_grain_timestamp. The below cell will fail unless the second line is uncommented " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# select data that occurs within a given time range\n", + "#tsd = tsd.with_timestamp_columns(fine_grain_timestamp='datetime', coarse_grain_timestamp=None)\n", + "tsd2 = tsd.time_after(datetime(2019, 1, 2)).time_before(datetime(2019, 1, 10))\n", + "tsd2.to_pandas_dataframe().head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Time Range Input" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# another way to select data that occurs within a given time range\n", + "tsd2 = tsd.time_between(start_time=datetime(2019, 1, 31, 23, 59, 59), end_time=datetime(2019, 2, 7))\n", + "tsd2.to_pandas_dataframe().head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Time Recent Input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function takes in a datetime.timedelta and returns a dataset containing the data from datetime.now()-timedelta() to datetime.now()." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tsd2 = tsd.time_recent(timedelta(weeks=5, days=0))\n", + "tsd2.to_pandas_dataframe().head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**NOTE:** This will return an empty dataframe there is no data within the last 2 days." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tsd2 = tsd.time_recent(timedelta(days=2))\n", + "tsd2.to_pandas_dataframe().tail(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Drop Columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The columns to be dropped should NOT include timstamp columns.
Below operation will lead to exception." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " tsd2 = tsd.drop_columns(columns=['snowDepth', 'version', 'datetime'])\n", + "except Exception as e:\n", + " print('Expected exception : {}'.format(str(e)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Drop will succeed if modify column list to exclude timestamp columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tsd2 = tsd.drop_columns(columns=['snowDepth', 'version', 'upload_date'])\n", + "tsd2.take(5).to_pandas_dataframe().sort_values(by='datetime')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Keep Columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The columns to be kept should ALWAYS include timstamp columns.
Below operation will lead to exception." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " tsd2 = tsd.keep_columns(columns=['snowDepth'], validate=False)\n", + "except Exception as e:\n", + " print('Expected exception : {}'.format(str(e)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Keep will succeed if modify column list to include timestamp columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tsd2 = tsd.keep_columns(columns=['snowDepth', 'datetime', 'coarse_time'], validate=False)\n", + "tsd2.to_pandas_dataframe().tail()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Resetting Timestamp Columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rules for reseting are:\n", + "- You cannot assign 'None' to fine_grain_timestamp while assign a valid column name to coarse_grain_timestamp because coarse_grain_timestamp is optional while fine_grain_timestamp is mandatory for Tabular time series data.\n", + "- If you assign 'None' to fine_grain_timestamp, then both fine_grain_timestamp and coarse_grain_timestamp will all be cleared.\n", + "- If you assign only 'None' to coarse_grain_timestamp, then only coarse_grain_timestamp will be cleared." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Illegal clearing, exception is expected.\n", + "try:\n", + " tsd2 = tsd.with_timestamp_columns(fine_grain_timestamp=None, coarse_grain_timestamp='coarse_time')\n", + "except Exception as e:\n", + " print('Cleaning not allowed because {}'.format(str(e)))\n", + "\n", + "# clear both\n", + "tsd2 = tsd.with_timestamp_columns(fine_grain_timestamp=None, coarse_grain_timestamp=None)\n", + "print('after clean both with None/None, timestamp columns are: {}'.format(tsd2.timestamp_columns))\n", + "\n", + "# clear coarse_grain_timestamp only and assign 'datetime' as fine timestamp column\n", + "tsd2 = tsd2.with_timestamp_columns(fine_grain_timestamp='datetime', coarse_grain_timestamp=None)\n", + "print('after clean coarse timestamp column, timestamp columns are: {}'.format(tsd2.timestamp_columns))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/work-with-data/datasets/datasets-tutorial/datasets-tutorial.png)" + ] + } ], - "kernelspec": { - "display_name": "Python 3.6", - "language": "python", - "name": "python36" + "metadata": { + "authors": [ + { + "name": "copeters" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License." }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - }, - "notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License." - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/train-with-datasets.ipynb b/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/train-with-datasets.ipynb new file mode 100644 index 00000000..42b183e7 --- /dev/null +++ b/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/train-with-datasets.ipynb @@ -0,0 +1,620 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/train-with-datasets.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train with Azure Machine Learning Datasets\n", + "Datasets are categorized into TabularDataset and FileDataset based on how users consume them in training. \n", + "* A TabularDataset represents data in a tabular format by parsing the provided file or list of files. TabularDataset can be created from csv, tsv, parquet files, SQL query results etc. For the complete list, please visit our [documentation](https://aka.ms/tabulardataset-api-reference). It provides you with the ability to materialize the data into a pandas DataFrame.\n", + "* A FileDataset references single or multiple files in your datastores or public urls. This provides you with the ability to download or mount the files to your compute. The files can be of any format, which enables a wider range of machine learning scenarios including deep learning.\n", + "\n", + "In this tutorial, you will learn how to train with Azure Machine Learning Datasets:\n", + "\n", + "☑ Use Datasets directly in your training script\n", + "\n", + "☑ Use Datasets to mount files to a remote compute" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) first if you haven't already established your connection to the AzureML Workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print('SDK version:', azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = 'train-with-datasets'\n", + "\n", + "from azureml.core import Experiment\n", + "exp = Experiment(workspace=ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or Attach existing compute resource\n", + "By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n", + "\n", + "**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import AmlCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "import os\n", + "\n", + "# choose a name for your cluster\n", + "compute_name = os.environ.get('AML_COMPUTE_CLUSTER_NAME', 'cpu-cluster')\n", + "compute_min_nodes = os.environ.get('AML_COMPUTE_CLUSTER_MIN_NODES', 0)\n", + "compute_max_nodes = os.environ.get('AML_COMPUTE_CLUSTER_MAX_NODES', 4)\n", + "\n", + "# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n", + "vm_size = os.environ.get('AML_COMPUTE_CLUSTER_SKU', 'STANDARD_D2_V2')\n", + "\n", + "\n", + "if compute_name in ws.compute_targets:\n", + " compute_target = ws.compute_targets[compute_name]\n", + " if compute_target and type(compute_target) is AmlCompute:\n", + " print('found compute target. just use it. ' + compute_name)\n", + "else:\n", + " print('creating a new compute target...')\n", + " provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,\n", + " min_nodes=compute_min_nodes, \n", + " max_nodes=compute_max_nodes)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)\n", + " \n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + " \n", + " # For a more detailed view of current AmlCompute status, use get_status()\n", + " print(compute_target.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You now have the necessary packages and compute resources to train a model in the cloud.\n", + "## Use Datasets directly in training\n", + "\n", + "### Create a TabularDataset\n", + "By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. \n", + "\n", + "Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create Dataset from it.We will now upload the [Titanic data](./train-dataset/Titanic.csv) to the default datastore (blob) within your workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "datastore = ws.get_default_datastore()\n", + "datastore.upload_files(files = ['./train-dataset/Titanic.csv'],\n", + " target_path = 'train-dataset/tabular/',\n", + " overwrite = True,\n", + " show_progress = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we will create an unregistered TabularDataset pointing to the path in the datastore. You can also create a Dataset from multiple paths. [learn more](https://aka.ms/azureml/howto/createdatasets) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Dataset\n", + "dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'train-dataset/tabular/Titanic.csv')])\n", + "\n", + "# preview the first 3 rows of the dataset\n", + "dataset.take(3).to_pandas_dataframe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a training script\n", + "\n", + "To submit the job to the cluster, first create a training script. Run the following code to create the training script called `train_titanic.py` in the script_folder. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "script_folder = os.path.join(os.getcwd(), 'train-dataset')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $script_folder/train_titanic.py\n", + "\n", + "import os\n", + "\n", + "from azureml.core import Dataset, Run\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.externals import joblib\n", + "\n", + "run = Run.get_context()\n", + "# get input dataset by name\n", + "dataset = run.input_datasets['titanic']\n", + "\n", + "df = dataset.to_pandas_dataframe()\n", + "\n", + "x_col = ['Pclass', 'Sex', 'SibSp', 'Parch']\n", + "y_col = ['Survived']\n", + "x_df = df.loc[:, x_col]\n", + "y_df = df.loc[:, y_col]\n", + "\n", + "x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=223)\n", + "\n", + "data = {'train': {'X': x_train, 'y': y_train},\n", + "\n", + " 'test': {'X': x_test, 'y': y_test}}\n", + "\n", + "clf = DecisionTreeClassifier().fit(data['train']['X'], data['train']['y'])\n", + "model_file_name = 'decision_tree.pkl'\n", + "\n", + "print('Accuracy of Decision Tree classifier on training set: {:.2f}'.format(clf.score(x_train, y_train)))\n", + "print('Accuracy of Decision Tree classifier on test set: {:.2f}'.format(clf.score(x_test, y_test)))\n", + "\n", + "os.makedirs('./outputs', exist_ok=True)\n", + "with open(model_file_name, 'wb') as file:\n", + " joblib.dump(value=clf, filename='outputs/' + model_file_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure and use Datasets as the input to Estimator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can ask the system to build a conda environment based on your dependency specification. Once the environment is built, and if you don't change your dependencies, it will be reused in subsequent runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Environment\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "conda_env = Environment('conda-env')\n", + "conda_env.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk','azureml-dataprep[pandas,fuse]>=1.1.','scikit-learn'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "An estimator object is used to submit the run. Azure Machine Learning has pre-configured estimators for common machine learning frameworks, as well as generic Estimator. Create a generic estimator for by specifying\n", + "\n", + "* The name of the estimator object, `est`\n", + "* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n", + "* The training script name, train_titanic.py\n", + "* The input Dataset for training\n", + "* The compute target. In this case you will use the AmlCompute you created\n", + "* The environment definition for the experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.estimator import Estimator\n", + "\n", + "est = Estimator(source_directory=script_folder, \n", + " entry_script='train_titanic.py', \n", + " # pass dataset object as an input with name 'titanic'\n", + " inputs=[dataset.as_named_input('titanic')],\n", + " compute_target=compute_target,\n", + " environment_definition= conda_env) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit job to run\n", + "Submit the estimator to the Azure ML experiment to kick off the execution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = exp.submit(est)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "\n", + "# monitor the run\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use Datasets to mount files to a remote compute\n", + "\n", + "You can use the Dataset object to mount or download files referred by it. When you mount a file system, you attach that file system to a directory (mount point) and make it available to the system. Because mounting load files at the time of processing, it is usually faster than download.
\n", + "Note: mounting is only available for Linux-based compute (DSVM/VM, AMLCompute, HDInsights)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upload data files into datastore\n", + "We will first load diabetes data from `scikit-learn` to the train-dataset folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_diabetes\n", + "import numpy as np\n", + "\n", + "training_data = load_diabetes()\n", + "np.save(file='train-dataset/features.npy', arr=training_data['data'])\n", + "np.save(file='train-dataset/labels.npy', arr=training_data['target'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's upload the 2 files into the default datastore under a path named `diabetes`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "datastore.upload_files(['train-dataset/features.npy', 'train-dataset/labels.npy'], target_path='diabetes', overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a FileDataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.dataset import Dataset\n", + "\n", + "dataset = Dataset.File.from_files(path = [(datastore, 'diabetes/')])\n", + "\n", + "# see a list of files referenced by dataset\n", + "dataset.to_path()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a training script\n", + "\n", + "To submit the job to the cluster, first create a training script. Run the following code to create the training script called `train_diabetes.py` in the script_folder. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $script_folder/train_diabetes.py\n", + "\n", + "import os\n", + "import glob\n", + "\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "from azureml.core.run import Run\n", + "from sklearn.externals import joblib\n", + "\n", + "import numpy as np\n", + "\n", + "os.makedirs('./outputs', exist_ok=True)\n", + "\n", + "run = Run.get_context()\n", + "base_path = run.input_datasets['diabetes']\n", + "\n", + "X = np.load(glob.glob(os.path.join(base_path, '**/features.npy'), recursive=True)[0])\n", + "y = np.load(glob.glob(os.path.join(base_path, '**/labels.npy'), recursive=True)[0])\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=0)\n", + "data = {'train': {'X': X_train, 'y': y_train},\n", + " 'test': {'X': X_test, 'y': y_test}}\n", + "\n", + "# list of numbers from 0.0 to 1.0 with a 0.05 interval\n", + "alphas = np.arange(0.0, 1.0, 0.05)\n", + "\n", + "for alpha in alphas:\n", + " # use Ridge algorithm to create a regression model\n", + " reg = Ridge(alpha=alpha)\n", + " reg.fit(data['train']['X'], data['train']['y'])\n", + "\n", + " preds = reg.predict(data['test']['X'])\n", + " mse = mean_squared_error(preds, data['test']['y'])\n", + " run.log('alpha', alpha)\n", + " run.log('mse', mse)\n", + "\n", + " model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n", + " with open(model_file_name, 'wb') as file:\n", + " joblib.dump(value=reg, filename='outputs/' + model_file_name)\n", + "\n", + " print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure & Run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import ScriptRunConfig\n", + "\n", + "src = ScriptRunConfig(source_directory=script_folder, \n", + " script='train_diabetes.py', \n", + " # to mount the dataset on the remote compute and pass the mounted path as an argument to the training script\n", + " arguments =[dataset.as_named_input('diabetes').as_mount('tmp/dataset')])\n", + "\n", + "src.run_config.framework = 'python'\n", + "src.run_config.environment = conda_env\n", + "src.run_config.target = compute_target.name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = exp.submit(config=src)\n", + "\n", + "# monitor the run\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Display run results\n", + "You now have a model trained on a remote cluster. Retrieve all the metrics logged during the run, including the accuracy of the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(run.get_metrics())\n", + "metrics = run.get_metrics()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register Datasets\n", + "Use the register() method to register datasets to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = dataset.register(workspace = ws,\n", + " name = 'diabetes dataset',\n", + " description='training dataset',\n", + " create_new_version=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register models with Datasets\n", + "The last step in the training script wrote the model files in a directory named `outputs` in the VM of the cluster where the job is executed. `outputs` is a special directory in that all content in this directory is automatically uploaded to your workspace. This content appears in the run record in the experiment under your workspace. Hence, the model file is now also available in your workspace.\n", + "\n", + "You can register models with Datasets for reproducibility and auditing purpose." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# find the index where MSE is the smallest\n", + "indices = list(range(0, len(metrics['mse'])))\n", + "min_mse_index = min(indices, key=lambda x: metrics['mse'][x])\n", + "\n", + "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n", + " metrics['mse'][min_mse_index], \n", + " metrics['alpha'][min_mse_index]\n", + "))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# find the best model\n", + "best_alpha = metrics['alpha'][min_mse_index]\n", + "model_file_name = 'ridge_{0:.2f}.pkl'.format(best_alpha)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# register the best model with the input dataset\n", + "model = run.register_model(model_name='sklearn_diabetes', model_path=os.path.join('outputs', model_file_name),\n", + " datasets =[('training data',dataset)])" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "sihhu" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/index.md b/index.md index a554ac88..d8c2d28a 100644 --- a/index.md +++ b/index.md @@ -16,6 +16,8 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an |Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags | |:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:| +| :star:[Use pipelines for batch scoring](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/tutorial-pipeline-batch-scoring-classification.ipynb) | Batch scoring | None | AmlCompute | Published pipeline | Azure ML Pipelines | None | + ## Training @@ -35,158 +37,504 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an ## Other Notebooks |Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags | |:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:| -| [Logging APIs](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb) | Logging APIs and analyzing results | None | None | None | None | None | -| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azuremlconfiguration.ipynb) | | | | | | | -| [azure-ml-with-nvidia-rapids](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/contrib/RAPIDS/azure-ml-with-nvidia-rapids.ipynb) | | | | | | | -| [auto-ml-classification](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb) | | | | | | | -| [auto-ml-classification-bank-marketing](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb) | | | | | | | -| [auto-ml-classification-credit-card-fraud](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb) | | | | | | | -| [auto-ml-classification-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb) | | | | | | | -| [auto-ml-classification-with-onnx](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb) | | | | | | | -| [auto-ml-classification-with-whitelisting](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb) | | | | | | | -| [auto-ml-dataset](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb) | | | | | | | -| [auto-ml-dataset-remote-execution](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb) | | | | | | | -| [auto-ml-exploring-previous-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/exploring-previous-runs/auto-ml-exploring-previous-runs.ipynb) | | | | | | | -| [auto-ml-forecasting-bike-share](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb) | | | | | | | -| [auto-ml-forecasting-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb) | | | | | | | -| [auto-ml-forecasting-orange-juice-sales](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb) | | | | | | | -| [auto-ml-missing-data-blacklist-early-termination](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb) | | | | | | | -| [auto-ml-model-explanation](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb) | | | | | | | -| [auto-ml-regression](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb) | | | | | | | -| [auto-ml-regression-concrete-strength](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb) | | | | | | | -| [auto-ml-regression-hardware-performance](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb) | | | | | | | -| [auto-ml-remote-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.ipynb) | | | | | | | -| [auto-ml-remote-amlcompute-with-onnx](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.ipynb) | | | | | | | -| [auto-ml-sample-weight](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/sample-weight/auto-ml-sample-weight.ipynb) | | | | | | | -| [auto-ml-sparse-data-train-test-split](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/sparse-data-train-test-split/auto-ml-sparse-data-train-test-split.ipynb) | | | | | | | -| [auto-ml-sql-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb) | | | | | | | -| [auto-ml-sql-setup](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb) | | | | | | | -| [auto-ml-subsampling-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/subsampling/auto-ml-subsampling-local.ipynb) | | | | | | | -| [build-model-run-history-03](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/build-model-run-history-03.ipynb) | | | | | | | -| [deploy-to-aci-04](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/deploy-to-aci-04.ipynb) | | | | | | | -| [deploy-to-aks-existingimage-05](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/deploy-to-aks-existingimage-05.ipynb) | | | | | | | -| [ingest-data-02](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/ingest-data-02.ipynb) | | | | | | | -| [installation-and-configuration-01](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/installation-and-configuration-01.ipynb) | | | | | | | -| [automl-databricks-local-01](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb) | | | | | | | -| [automl-databricks-local-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb) | | | | | | | -| [aml-pipelines-use-databricks-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/databricks-as-remote-compute-target/aml-pipelines-use-databricks-as-compute-target.ipynb) | | | | | | | -| [automl_hdi_local_classification](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-hdi/automl_hdi_local_classification.ipynb) | | | | | | | -| [model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deploy-to-cloud/model-register-and-deploy.ipynb) | | | | | | | -| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | | -| [register-model-deploy-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deploy-to-local/register-model-deploy-local.ipynb) | | | | | | | -| [accelerated-models-object-detection](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/accelerated-models/accelerated-models-object-detection.ipynb) | | | | | | | -| [accelerated-models-quickstart](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb) | | | | | | | -| [accelerated-models-training](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/accelerated-models/accelerated-models-training.ipynb) | | | | | | | -| [model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/deploy-to-cloud/model-register-and-deploy.ipynb) | | | | | | | -| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | | -| [register-model-deploy-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local.ipynb) | | | | | | | -| [enable-app-insights-in-production-service](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) | | | | | | | -| [enable-data-collection-for-models-in-aks](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/enable-data-collection-for-models-in-aks/enable-data-collection-for-models-in-aks.ipynb) | | | | | | | -| [onnx-convert-aml-deploy-tinyyolo](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb) | | | | | | | -| [onnx-inference-facial-expression-recognition-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb) | | | | | | | -| [onnx-inference-mnist-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb) | | | | | | | -| [onnx-modelzoo-aml-deploy-resnet50](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb) | | | | | | | -| [onnx-train-pytorch-aml-deploy-mnist](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb) | | | | | | | -| [production-deploy-to-aks](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb) | | | | | | | -| [production-deploy-to-aks-gpu](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/production-deploy-to-aks-gpu/production-deploy-to-aks-gpu.ipynb) | | | | | | | -| [register-model-create-image-deploy-service](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb) | | | | | | | -| [explain-model-on-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb) | | | | | | | -| [save-retrieve-explanations-run-history](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb) | | | | | | | -| [train-explain-model-locally-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb) | | | | | | | -| [train-explain-model-on-amlcompute-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb) | | | | | | | -| [advanced-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/advanced-feature-transformations-explain-local.ipynb) | | | | | | | -| [explain-binary-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/explain-binary-classification-local.ipynb) | | | | | | | -| [explain-multiclass-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/explain-multiclass-classification-local.ipynb) | | | | | | | -| [explain-regression-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/explain-regression-local.ipynb) | | | | | | | -| [simple-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/simple-feature-transformations-explain-local.ipynb) | | | | | | | -| [aml-pipelines-data-transfer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb) | | | | | | | -| [aml-pipelines-getting-started](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb) | | | | | | -| [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb) | | | | | | | -| [aml-pipelines-how-to-use-estimatorstep](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb) | | | | | | | -| [aml-pipelines-how-to-use-pipeline-drafts](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-pipeline-drafts.ipynb) | | | | | | | -| [aml-pipelines-parameter-tuning-with-hyperdrive](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb) | | | | | | | -| [aml-pipelines-publish-and-run-using-rest-endpoint](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-publish-and-run-using-rest-endpoint.ipynb) | | | | | | | -| [aml-pipelines-setup-schedule-for-a-published-pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb) | | | | | | | -| [aml-pipelines-setup-versioned-pipeline-endpoints](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.ipynb) | | | | | | | -| [aml-pipelines-use-adla-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-adla-as-compute-target.ipynb) | | | | | | | -| [aml-pipelines-use-databricks-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb) | | | | | | | -| [aml-pipelines-with-automated-machine-learning-step](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb) | | | | | | | -| [aml-pipelines-with-data-dependency-steps](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb) | | | | | | | -| [nyc-taxi-data-regression-model-building](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb) | | | | | | | -| [pipeline-batch-scoring](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb) | | | | | | | -| [pipeline-style-transfer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb) | | | | | | | -| [authentication-in-azureml](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb) | | | | | | | -| [azure-ml-datadrift](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/monitor-models/data-drift/azure-ml-datadrift.ipynb) | | | | | | | -| [manage-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/manage-runs/manage-runs.ipynb) | | | | | | | -| [tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/tensorboard/tensorboard.ipynb) | | | | | | | -| [deploy-model](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml//track-and-monitor-experiments/using-mlflow/deploy-model/deploy-model.ipynb) | | | | | | | -| [train-and-deploy-pytorch](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-deploy-pytorch/train-and-deploy-pytorch.ipynb) | | | | | | | -| [train-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-local/train-local.ipynb) | | | | | | | -| [train-remote](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-remote/train-remote.ipynb) | | | | | | | -| [logging-api](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/logging-api/logging-api.ipynb) | | | | | | | -| [manage-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/manage-runs/manage-runs.ipynb) | | | | | | | -| [train-hyperparameter-tune-deploy-with-sklearn](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.ipynb) | | | | | | | -| [train-in-spark](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-in-spark/train-in-spark.ipynb) | | | | | | | -| [train-on-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-on-amlcompute/train-on-amlcompute.ipynb) | | | | | | | -| [train-on-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-on-local/train-on-local.ipynb) | | | | | | | -| [train-on-remote-vm](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb) | | | | | | | -| [train-within-notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb) | | | | | | | -| [using-environments](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/using-environments/using-environments.ipynb) | | | | | | | -| [distributed-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-chainer/distributed-chainer.ipynb) | | | | | | | -| [distributed-cntk-with-custom-docker](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-cntk-with-custom-docker/distributed-cntk-with-custom-docker.ipynb) | | | | | | | -| [distributed-pytorch-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb) | | | | | | | -| [distributed-tensorflow-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb) | | | | | | | -| [distributed-tensorflow-with-parameter-server](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb) | | | | | | | -| [export-run-history-to-tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/export-run-history-to-tensorboard/export-run-history-to-tensorboard.ipynb) | | | | | | | -| [how-to-use-estimator](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb) | | | | | | | -| [notebook_example](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/notebook_example.ipynb) | | | | | | | -| [tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/tensorboard/tensorboard.ipynb) | | | | | | | -| [train-hyperparameter-tune-deploy-with-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.ipynb) | | | | | | | -| [train-hyperparameter-tune-deploy-with-keras](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb) | | | | | | | -| [train-hyperparameter-tune-deploy-with-pytorch](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) | | | | | | | -| [train-hyperparameter-tune-deploy-with-tensorflow](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) | | | | | | | -| [train-tensorflow-resume-training](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-tensorflow-resume-training/train-tensorflow-resume-training.ipynb) | | | | | | | -| [new-york-taxi](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi.ipynb) | | | | | | | -| [new-york-taxi_scale-out](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb) | | | | | | | -| [add-column-using-expression](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/add-column-using-expression.ipynb) | | | | | | | -| [append-columns-and-rows](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/append-columns-and-rows.ipynb) | | | | | | | -| [assertions](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/assertions.ipynb) | | | | | | | -| [auto-read-file](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/auto-read-file.ipynb) | | | | | | | -| [cache](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/cache.ipynb) | | | | | | | -| [column-manipulations](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/column-manipulations.ipynb) | | | | | | | -| [column-type-transforms](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/column-type-transforms.ipynb) | | | | | | | -| [custom-python-transforms](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/custom-python-transforms.ipynb) | | | | | | | -| [data-ingestion](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/data-ingestion.ipynb) | | | | | | | -| [data-profile](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/data-profile.ipynb) | | | | | | | -| [datastore](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/datastore.ipynb) | | | | | | | -| [derive-column-by-example](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/derive-column-by-example.ipynb) | | | | | | | -| [external-references](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/external-references.ipynb) | | | | | | | -| [filtering](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/filtering.ipynb) | | | | | | | -| [fuzzy-group](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/fuzzy-group.ipynb) | | | | | | | -| [impute-missing-values](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/impute-missing-values.ipynb) | | | | | | | -| [join](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/join.ipynb) | | | | | | | -| [label-encoder](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/label-encoder.ipynb) | | | | | | | -| [min-max-scaler](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/min-max-scaler.ipynb) | | | | | | | -| [one-hot-encoder](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/one-hot-encoder.ipynb) | | | | | | | -| [open-save-dataflows](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/open-save-dataflows.ipynb) | | | | | | | -| [quantile-transformation](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/quantile-transformation.ipynb) | | | | | | | -| [random-split](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/random-split.ipynb) | | | | | | | -| [replace-datasource-replace-reference](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/replace-datasource-replace-reference.ipynb) | | | | | | | -| [replace-fill-error](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/replace-fill-error.ipynb) | | | | | | | -| [secrets](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/secrets.ipynb) | | | | | | | -| [semantic-types](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/semantic-types.ipynb) | | | | | | | -| [split-column-by-example](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/split-column-by-example.ipynb) | | | | | | | -| [subsetting-sampling](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/subsetting-sampling.ipynb) | | | | | | | -| [summarize](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/summarize.ipynb) | | | | | | | -| [working-with-file-streams](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/working-with-file-streams.ipynb) | | | | | | | -| [writing-data](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/writing-data.ipynb) | | | | | | | -| [getting-started](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/tutorials/getting-started/getting-started.ipynb) | | | | | | | -| [datasets-diff](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/datasets/datasets-diff/datasets-diff.ipynb) | | | | | | | -| [file-dataset-img-classification](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/file-dataset-img-classification.ipynb) | | | | | | | -| [tabular-dataset-tutorial](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/tabular-dataset-tutorial.ipynb) | | | | | | | -| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master/setup-environment/configuration.ipynb) | | | | | | | -| [img-classification-part1-training](https://github.com/Azure/MachineLearningNotebooks/blob/master/tutorials/img-classification-part1-training.ipynb) | | | | | | | -| [img-classification-part2-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/tutorials/img-classification-part2-deploy.ipynb) | | | | | | | -| [regression-automated-ml](https://github.com/Azure/MachineLearningNotebooks/blob/master/tutorials/regression-automated-ml.ipynb) | | | | | | | -| [tutorial-1st-experiment-sdk-train](https://github.com/Azure/MachineLearningNotebooks/blob/master/tutorials/tutorial-1st-experiment-sdk-train.ipynb) | | | | | | | + +| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) | | | | | | | + + +| [azure-ml-with-nvidia-rapids](https://github.com/Azure/MachineLearningNotebooks/blob/master//contrib/RAPIDS/azure-ml-with-nvidia-rapids.ipynb) | | | | | | | + + +| [auto-ml-classification](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb) | | | | | | | + + +| [auto-ml-classification-bank-marketing](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb) | | | | | | | + + +| [auto-ml-classification-credit-card-fraud](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb) | | | | | | | + + +| [auto-ml-classification-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb) | | | | | | | + + +| [auto-ml-classification-with-onnx](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb) | | | | | | | + + +| [auto-ml-classification-with-whitelisting](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb) | | | | | | | + + +| [auto-ml-dataset](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb) | | | | | | | + + +| [auto-ml-dataset-remote-execution](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb) | | | | | | | + + +| [auto-ml-exploring-previous-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/exploring-previous-runs/auto-ml-exploring-previous-runs.ipynb) | | | | | | | + + +| [auto-ml-forecasting-bike-share](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb) | | | | | | | + + +| [auto-ml-forecasting-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb) | | | | | | | + + +| [auto-ml-forecasting-orange-juice-sales](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb) | | | | | | | + + +| [auto-ml-missing-data-blacklist-early-termination](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb) | | | | | | | + + +| [auto-ml-model-explanation](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb) | | | | | | | + + +| [auto-ml-regression](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb) | | | | | | | + + +| [auto-ml-regression-concrete-strength](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb) | | | | | | | + + +| [auto-ml-regression-hardware-performance](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb) | | | | | | | + + +| [auto-ml-remote-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.ipynb) | | | | | | | + + +| [auto-ml-remote-amlcompute-with-onnx](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.ipynb) | | | | | | | + + +| [auto-ml-sample-weight](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/sample-weight/auto-ml-sample-weight.ipynb) | | | | | | | + + +| [auto-ml-sparse-data-train-test-split](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/sparse-data-train-test-split/auto-ml-sparse-data-train-test-split.ipynb) | | | | | | | + + +| [auto-ml-sql-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb) | | | | | | | + + +| [auto-ml-sql-setup](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb) | | | | | | | + + +| [auto-ml-subsampling-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/subsampling/auto-ml-subsampling-local.ipynb) | | | | | | | + + +| [build-model-run-history-03](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/build-model-run-history-03.ipynb) | | | | | | | + + +| [deploy-to-aci-04](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/deploy-to-aci-04.ipynb) | | | | | | | + + +| [deploy-to-aks-existingimage-05](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/deploy-to-aks-existingimage-05.ipynb) | | | | | | | + + +| [ingest-data-02](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/ingest-data-02.ipynb) | | | | | | | + + +| [installation-and-configuration-01](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/installation-and-configuration-01.ipynb) | | | | | | | + + +| [automl-databricks-local-01](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb) | | | | | | | + + +| [automl-databricks-local-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb) | | | | | | | + + +| [aml-pipelines-use-databricks-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/databricks-as-remote-compute-target/aml-pipelines-use-databricks-as-compute-target.ipynb) | | | | | | | + + +| [automl_hdi_local_classification](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-hdi/automl_hdi_local_classification.ipynb) | | | | | | | + + +| [model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deploy-to-cloud/model-register-and-deploy.ipynb) | | | | | | | + + +| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | | + + +| [register-model-deploy-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deploy-to-local/register-model-deploy-local.ipynb) | | | | | | | + + +| [accelerated-models-object-detection](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/accelerated-models/accelerated-models-object-detection.ipynb) | | | | | | | + + +| [accelerated-models-quickstart](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb) | | | | | | | + + +| [accelerated-models-training](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/accelerated-models/accelerated-models-training.ipynb) | | | | | | | + + +| [model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-cloud/model-register-and-deploy.ipynb) | | | | | | | + + +| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | | + + +| [register-model-deploy-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local.ipynb) | | | | | | | + + +| [enable-app-insights-in-production-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) | | | | | | | + + +| [enable-data-collection-for-models-in-aks](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/enable-data-collection-for-models-in-aks/enable-data-collection-for-models-in-aks.ipynb) | | | | | | | + + +| [onnx-convert-aml-deploy-tinyyolo](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb) | | | | | | | + + +| [onnx-inference-facial-expression-recognition-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb) | | | | | | | + + +| [onnx-inference-mnist-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb) | | | | | | | + + +| [onnx-modelzoo-aml-deploy-resnet50](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb) | | | | | | | + + +| [onnx-train-pytorch-aml-deploy-mnist](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb) | | | | | | | + + +| [production-deploy-to-aks](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb) | | | | | | | + + +| [production-deploy-to-aks-gpu](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/production-deploy-to-aks-gpu/production-deploy-to-aks-gpu.ipynb) | | | | | | | + + +| [register-model-create-image-deploy-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb) | | | | | | | + + +| [explain-model-on-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb) | | | | | | | + + +| [save-retrieve-explanations-run-history](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb) | | | | | | | + + +| [train-explain-model-locally-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb) | | | | | | | + + +| [train-explain-model-on-amlcompute-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb) | | | | | | | + + +| [advanced-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/advanced-feature-transformations-explain-local.ipynb) | | | | | | | + + +| [explain-binary-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/explain-binary-classification-local.ipynb) | | | | | | | + + +| [explain-multiclass-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/explain-multiclass-classification-local.ipynb) | | | | | | | + + +| [explain-regression-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/explain-regression-local.ipynb) | | | | | | | + + +| [simple-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/simple-feature-transformations-explain-local.ipynb) | | | | | | | + + +| [aml-pipelines-data-transfer](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb) | | | | | | | + + +| [aml-pipelines-getting-started](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb) | | | | | | | + + +| [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb) | | | | | | | + + +| [aml-pipelines-how-to-use-estimatorstep](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb) | | | | | | | + + +| [aml-pipelines-how-to-use-pipeline-drafts](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-pipeline-drafts.ipynb) | | | | | | | + + +| [aml-pipelines-parameter-tuning-with-hyperdrive](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb) | | | | | | | + + +| [aml-pipelines-publish-and-run-using-rest-endpoint](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-publish-and-run-using-rest-endpoint.ipynb) | | | | | | | + + +| [aml-pipelines-setup-schedule-for-a-published-pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb) | | | | | | | + + +| [aml-pipelines-setup-versioned-pipeline-endpoints](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.ipynb) | | | | | | | + + +| [aml-pipelines-showcasing-datapath-and-pipelineparameter](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-showcasing-datapath-and-pipelineparameter.ipynb) | | | | | | | + + +| [aml-pipelines-use-adla-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-adla-as-compute-target.ipynb) | | | | | | | + + +| [aml-pipelines-use-databricks-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb) | | | | | | | + + +| [aml-pipelines-with-automated-machine-learning-step](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb) | | | | | | | + + +| [aml-pipelines-with-data-dependency-steps](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb) | | | | | | | + + +| [nyc-taxi-data-regression-model-building](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb) | | | | | | | + + +| [pipeline-batch-scoring](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb) | | | | | | | + + +| [pipeline-style-transfer](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb) | | | | | | | + + +| [authentication-in-azureml](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb) | | | | | | | + + +| [train-hyperparameter-tune-deploy-with-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.ipynb) | | | | | | | + + +| [distributed-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/distributed-chainer.ipynb) | | | | | | | + + +| [train-hyperparameter-tune-deploy-with-pytorch](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) | | | | | | | + + +| [distributed-pytorch-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb) | | | | | | | + + +| [distributed-pytorch-with-nccl-gloo](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/distributed-pytorch-with-nccl-gloo.ipynb) | | | | | | | + + +| [train-hyperparameter-tune-deploy-with-sklearn](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.ipynb) | | | | | | | + + +| [train-hyperparameter-tune-deploy-with-tensorflow](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) | | | | | | | + + +| [distributed-tensorflow-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb) | | | | | | | + + +| [distributed-tensorflow-with-parameter-server](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb) | | | | | | | + + +| [train-tensorflow-resume-training](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/train-tensorflow-resume-training.ipynb) | | | | | | | + + +| [azure-ml-datadrift](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/monitor-models/data-drift/azure-ml-datadrift.ipynb) | | | | | | | + + +| [Logging APIs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb) | Logging APIs and analyzing results | | None | None | None | None | + + +| [manage-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/manage-runs/manage-runs.ipynb) | | | | | | | + + +| [tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/tensorboard/tensorboard.ipynb) | | | | | | | + + +| [deploy-model](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/using-mlflow/deploy-model/deploy-model.ipynb) | | | | | | | + + +| [train-and-deploy-pytorch](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-deploy-pytorch/train-and-deploy-pytorch.ipynb) | | | | | | | + + +| [train-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-local/train-local.ipynb) | | | | | | | + + +| [train-remote](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-remote/train-remote.ipynb) | | | | | | | + + +| [logging-api](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/logging-api/logging-api.ipynb) | | | | | | | + + +| [manage-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/manage-runs/manage-runs.ipynb) | | | | | | | + + +| [train-hyperparameter-tune-deploy-with-sklearn](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.ipynb) | | | | | | | + + +| [train-in-spark](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-in-spark/train-in-spark.ipynb) | | | | | | | + + +| [train-on-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-on-amlcompute/train-on-amlcompute.ipynb) | | | | | | | + + +| [train-on-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-on-local/train-on-local.ipynb) | | | | | | | + + +| [train-on-remote-vm](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb) | | | | | | | + + +| [train-within-notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb) | | | | | | | + + +| [using-environments](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/using-environments/using-environments.ipynb) | | | | | | | + + +| [distributed-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/distributed-chainer/distributed-chainer.ipynb) | | | | | | | + + +| [distributed-cntk-with-custom-docker](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/distributed-cntk-with-custom-docker/distributed-cntk-with-custom-docker.ipynb) | | | | | | | + + +| [distributed-pytorch-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb) | | | | | | | + + +| [distributed-tensorflow-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb) | | | | | | | + + +| [distributed-tensorflow-with-parameter-server](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb) | | | | | | | + + +| [export-run-history-to-tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/export-run-history-to-tensorboard/export-run-history-to-tensorboard.ipynb) | | | | | | | + + +| [how-to-use-estimator](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb) | | | | | | | + + +| [notebook_example](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/notebook_example.ipynb) | | | | | | | + + +| [tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/tensorboard/tensorboard.ipynb) | | | | | | | + + +| [train-hyperparameter-tune-deploy-with-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.ipynb) | | | | | | | + + +| [train-hyperparameter-tune-deploy-with-keras](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb) | | | | | | | + + +| [train-hyperparameter-tune-deploy-with-pytorch](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) | | | | | | | + + +| [train-hyperparameter-tune-deploy-with-tensorflow](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) | | | | | | | + + +| [train-tensorflow-resume-training](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/train-tensorflow-resume-training/train-tensorflow-resume-training.ipynb) | | | | | | | + + +| [new-york-taxi](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi.ipynb) | | | | | | | + + +| [new-york-taxi_scale-out](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb) | | | | | | | + + +| [add-column-using-expression](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/add-column-using-expression.ipynb) | | | | | | | + + +| [append-columns-and-rows](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/append-columns-and-rows.ipynb) | | | | | | | + + +| [assertions](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/assertions.ipynb) | | | | | | | + + +| [auto-read-file](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/auto-read-file.ipynb) | | | | | | | + + +| [cache](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/cache.ipynb) | | | | | | | + + +| [column-manipulations](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/column-manipulations.ipynb) | | | | | | | + + +| [column-type-transforms](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/column-type-transforms.ipynb) | | | | | | | + + +| [custom-python-transforms](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/custom-python-transforms.ipynb) | | | | | | | + + +| [data-ingestion](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/data-ingestion.ipynb) | | | | | | | + + +| [data-profile](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/data-profile.ipynb) | | | | | | | + + +| [datastore](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/datastore.ipynb) | | | | | | | + + +| [derive-column-by-example](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/derive-column-by-example.ipynb) | | | | | | | + + +| [external-references](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/external-references.ipynb) | | | | | | | + + +| [filtering](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/filtering.ipynb) | | | | | | | + + +| [fuzzy-group](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/fuzzy-group.ipynb) | | | | | | | + + +| [impute-missing-values](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/impute-missing-values.ipynb) | | | | | | | + + +| [join](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/join.ipynb) | | | | | | | + + +| [label-encoder](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/label-encoder.ipynb) | | | | | | | + + +| [min-max-scaler](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/min-max-scaler.ipynb) | | | | | | | + + +| [one-hot-encoder](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/one-hot-encoder.ipynb) | | | | | | | + + +| [open-save-dataflows](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/open-save-dataflows.ipynb) | | | | | | | + + +| [quantile-transformation](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/quantile-transformation.ipynb) | | | | | | | + + +| [random-split](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/random-split.ipynb) | | | | | | | + + +| [replace-datasource-replace-reference](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/replace-datasource-replace-reference.ipynb) | | | | | | | + + +| [replace-fill-error](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/replace-fill-error.ipynb) | | | | | | | + + +| [secrets](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/secrets.ipynb) | | | | | | | + + +| [semantic-types](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/semantic-types.ipynb) | | | | | | | + + +| [split-column-by-example](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/split-column-by-example.ipynb) | | | | | | | + + +| [subsetting-sampling](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/subsetting-sampling.ipynb) | | | | | | | + + +| [summarize](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/summarize.ipynb) | | | | | | | + + +| [working-with-file-streams](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/working-with-file-streams.ipynb) | | | | | | | + + +| [writing-data](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/how-to-guides/writing-data.ipynb) | | | | | | | + + +| [getting-started](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/dataprep/tutorials/getting-started/getting-started.ipynb) | | | | | | | + + +| [datasets-diff](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets/datasets-diff/datasets-diff.ipynb) | | | | | | | + + +| [file-dataset-img-classification](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets/datasets-tutorial/file-dataset-img-classification.ipynb) | | | | | | | + + +| [tabular-dataset-tutorial](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets/datasets-tutorial/tabular-dataset-tutorial.ipynb) | | | | | | | + + +| [tabular-timeseries-dataset-filtering](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets/datasets-tutorial/tabular-timeseries-dataset-filtering.ipynb) | | | | | | | + + +| [train-with-datasets](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets/datasets-tutorial/train-with-datasets.ipynb) | | | | | | | + + +| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master//setup-environment/configuration.ipynb) | | | | | | | + + +| [img-classification-part1-training](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/img-classification-part1-training.ipynb) | | | | | | | + + +| [img-classification-part2-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/img-classification-part2-deploy.ipynb) | | | | | | | + + +| [regression-automated-ml](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/regression-automated-ml.ipynb) | | | | | | | + + +| [tutorial-1st-experiment-sdk-train](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/tutorial-1st-experiment-sdk-train.ipynb) | | | | | | | diff --git a/setup-environment/configuration.ipynb b/setup-environment/configuration.ipynb index f7c376bb..1425c490 100644 --- a/setup-environment/configuration.ipynb +++ b/setup-environment/configuration.ipynb @@ -102,7 +102,7 @@ "source": [ "import azureml.core\n", "\n", - "print(\"This notebook was created using version 1.0.60 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.0.62 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, diff --git a/tutorials/img-classification-part1-training.ipynb b/tutorials/img-classification-part1-training.ipynb index c783128b..87c60300 100644 --- a/tutorials/img-classification-part1-training.ipynb +++ b/tutorials/img-classification-part1-training.ipynb @@ -184,11 +184,10 @@ "\n", "## Explore data\n", "\n", - "Before you train a model, you need to understand the data that you are using to train it. You also need to copy the data into the cloud so it can be accessed by your cloud training environment. In this section you learn how to:\n", + "Before you train a model, you need to understand the data that you are using to train it. In this section you learn how to:\n", "\n", "* Download the MNIST dataset\n", "* Display some sample images\n", - "* Upload data to the cloud\n", "\n", "### Download the MNIST dataset\n", "\n", @@ -254,13 +253,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now you have an idea of what these images look like and the expected prediction outcome.\n", - "\n", - "### Upload data to the cloud\n", - "\n", - "Now make the data accessible remotely by uploading that data from your local machine into Azure so it can be accessed for remote training. The datastore is a convenient construct associated with your workspace for you to upload/download data, and interact with it from your remote compute targets. It is backed by Azure blob storage account.\n", - "\n", - "The MNIST files are uploaded into a directory named `mnist` at the root of the datastore. See [access data from your datastores](https://docs.microsoft.com/bs-latn-ba/azure/machine-learning/service/how-to-access-data) for more information." + "## Create a FileDataset\n", + "A FileDataset references single or multiple files in your datastores or public urls. The files can be of any format. FileDataset provides you with the ability to download or mount the files to your compute. By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. [Learn More](https://aka.ms/azureml/howto/createdatasets)" ] }, { @@ -273,10 +267,44 @@ }, "outputs": [], "source": [ - "ds = ws.get_default_datastore()\n", - "print(ds.datastore_type, ds.account_name, ds.container_name)\n", + "from azureml.core.dataset import Dataset\n", "\n", - "ds.upload(src_dir=data_folder, target_path='mnist', overwrite=True, show_progress=True)" + "web_paths = [\n", + " 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',\n", + " 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',\n", + " 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',\n", + " 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'\n", + " ]\n", + "dataset = Dataset.File.from_files(path = web_paths)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the `register()` method to register datasets to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = dataset.register(workspace = ws,\n", + " name = 'mnist dataset',\n", + " description='training and test dataset',\n", + " create_new_version=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# list the files referenced by dataset\n", + "dataset.to_path()" ] }, { @@ -327,6 +355,7 @@ "import argparse\n", "import os\n", "import numpy as np\n", + "import glob\n", "\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.externals import joblib\n", @@ -334,7 +363,7 @@ "from azureml.core import Run\n", "from utils import load_data\n", "\n", - "# let user feed in 2 parameters, the location of the data files (from datastore), and the regularization rate of the logistic regression model\n", + "# let user feed in 2 parameters, the dataset to mount or download, and the regularization rate of the logistic regression model\n", "parser = argparse.ArgumentParser()\n", "parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')\n", "parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regularization rate')\n", @@ -345,10 +374,11 @@ "\n", "# load train and test set into numpy arrays\n", "# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.\n", - "X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0\n", - "X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0\n", - "y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1)\n", - "y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1)\n", + "X_train = load_data(glob.glob(os.path.join(data_folder, '**/train-images-idx3-ubyte.gz'), recursive=True)[0], False) / 255.0\n", + "X_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-images-idx3-ubyte.gz'), recursive=True)[0], False) / 255.0\n", + "y_train = load_data(glob.glob(os.path.join(data_folder, '**/train-labels-idx1-ubyte.gz'), recursive=True)[0], True).reshape(-1)\n", + "y_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-labels-idx1-ubyte.gz'), recursive=True)[0], True).reshape(-1)\n", + "\n", "print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\\n')\n", "\n", "# get hold of the current run\n", @@ -379,7 +409,7 @@ "source": [ "Notice how the script gets data and saves models:\n", "\n", - "+ The training script reads an argument to find the directory containing the data. When you submit the job later, you point to the datastore for this argument:\n", + "+ The training script reads an argument to find the directory containing the data. When you submit the job later, you point to the dataset for this argument:\n", "`parser.add_argument('--data-folder', type=str, dest='data_folder', help='data directory mounting point')`" ] }, @@ -424,7 +454,23 @@ "* The training script name, train.py\n", "* Parameters required from the training script \n", "\n", - "In this tutorial, this target is AmlCompute. All files in the script folder are uploaded into the cluster nodes for execution. The data_folder is set to use the datastore (`ds.path('mnist').as_mount()`)." + "In this tutorial, the target is AmlCompute. All files in the script folder are uploaded into the cluster nodes for execution. The data_folder is set to use the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.environment import Environment\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "# to install required packages\n", + "env = Environment('my_env')\n", + "cd = CondaDependencies.create(pip_packages=['azureml-sdk','scikit-learn','azureml-dataprep[pandas,fuse]>=1.1.14'])\n", + "\n", + "env.python.conda_dependencies = cd" ] }, { @@ -440,30 +486,16 @@ "from azureml.train.sklearn import SKLearn\n", "\n", "script_params = {\n", - " '--data-folder': ds.path('mnist').as_mount(),\n", + " # to mount files referenced by mnist dataset\n", + " '--data-folder': dataset.as_named_input('mnist').as_mount(),\n", " '--regularization': 0.5\n", "}\n", "\n", "est = SKLearn(source_directory=script_folder,\n", - " script_params=script_params,\n", - " compute_target=compute_target,\n", - " entry_script='train.py')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This is what the mounting point looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(ds.path('mnist').as_mount())" + " script_params=script_params,\n", + " compute_target=compute_target,\n", + " environment_definition=env,\n", + " entry_script='train.py')" ] }, { @@ -684,7 +716,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.9" }, "msauthor": "roastala" }, diff --git a/tutorials/tutorial-pipeline-batch-scoring-classification.ipynb b/tutorials/tutorial-pipeline-batch-scoring-classification.ipynb index 0a1a6af6..72762315 100644 --- a/tutorials/tutorial-pipeline-batch-scoring-classification.ipynb +++ b/tutorials/tutorial-pipeline-batch-scoring-classification.ipynb @@ -1,726 +1,738 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright (c) Microsoft Corporation. All rights reserved. \n", - "Licensed under the MIT License." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Use Azure Machine Learning Pipelines for batch prediction\n", - "\n", - "In this tutorial, you use Azure Machine Learning service pipelines to run a batch scoring image classification job. The example job uses the pre-trained [Inception-V3](https://arxiv.org/abs/1512.00567) CNN (convolutional neural network) Tensorflow model to classify unlabeled images. Machine learning pipelines optimize your workflow with speed, portability, and reuse so you can focus on your expertise, machine learning, rather than on infrastructure and automation. After building and publishing a pipeline, you can configure a REST endpoint to enable triggering the pipeline from any HTTP library on any platform.\n", - "\n", - "\n", - "In this tutorial, you learn the following tasks:\n", - "\n", - "> * Configure workspace and download sample data\n", - "> * Create data objects to fetch and output data\n", - "> * Download, prepare, and register the model to your workspace\n", - "> * Provision compute targets and create a scoring script\n", - "> * Build, run, and publish a pipeline\n", - "> * Enable a REST endpoint for the pipeline\n", - "\n", - "If you don’t have an Azure subscription, create a free account before you begin. Try the [free or paid version of Azure Machine Learning service](https://aka.ms/AMLFree) today." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prerequisites\n", - "\n", - "* Complete the [setup tutorial](https://docs.microsoft.com/azure/machine-learning/service/tutorial-1st-experiment-sdk-setup) if you don't already have an Azure Machine Learning service workspace or notebook virtual machine.\n", - "* After you complete the setup tutorial, open the **tutorials/tutorial-pipeline-batch-scoring-classification.ipynb** notebook using the same notebook server.\n", - "\n", - "This tutorial is also available on [GitHub](https://github.com/Azure/MachineLearningNotebooks/tree/master/tutorials) if you wish to run it in your own [local environment](how-to-configure-environment.md#local). Run `pip install azureml-sdk[notebooks] azureml-pipeline-core azureml-pipeline-steps pandas requests` to get the required packages." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Configure workspace and create datastore" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a workspace object from the existing workspace. A [Workspace](https://docs.microsoft.com/python/api/azureml-core/azureml.core.workspace.workspace?view=azure-ml-py) is a class that accepts your Azure subscription and resource information. It also creates a cloud resource to monitor and track your model runs. `Workspace.from_config()` reads the file **config.json** and loads the authentication details into an object named `ws`. `ws` is used throughout the rest of the code in this tutorial." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core import Workspace\n", - "ws = Workspace.from_config()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a datastore for sample images\n", - "\n", - "Get the ImageNet evaluation public data sample from the public blob container `sampledata` on the account `pipelinedata`. Calling `register_azure_blob_container()` makes the data available to the workspace under the name `images_datastore`. Then specify the workspace default datastore as the output datastore, which you use for scoring output in the pipeline." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.datastore import Datastore\n", - "\n", - "batchscore_blob = Datastore.register_azure_blob_container(ws, \n", - " datastore_name=\"images_datastore\", \n", - " container_name=\"sampledata\", \n", - " account_name=\"pipelinedata\", \n", - " overwrite=True)\n", - "\n", - "def_data_store = ws.get_default_datastore()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create data objects\n", - "\n", - "When building pipelines, `DataReference` objects are used for reading data from workspace datastores, and `PipelineData` objects are used for transferring intermediate data between pipeline steps.\n", - "\n", - "This batch scoring example only uses one pipeline step, but in use-cases with multiple steps, the typical flow will include:\n", - "\n", - "1. Using `DataReference` objects as **inputs** to fetch raw data, performing some transformations, then **outputting** a `PipelineData` object.\n", - "1. Use the previous step's `PipelineData` **output object** as an *input object*, repeated for subsequent steps.\n", - "\n", - "For this scenario you create `DataReference` objects corresponding to the datastore directories for both the input images and the classification labels (y-test values). You also create a `PipelineData` object for the batch scoring output data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.data.data_reference import DataReference\n", - "from azureml.pipeline.core import PipelineData\n", - "\n", - "input_images = DataReference(datastore=batchscore_blob, \n", - " data_reference_name=\"input_images\",\n", - " path_on_datastore=\"batchscoring/images\",\n", - " mode=\"download\"\n", - " )\n", - "\n", - "label_dir = DataReference(datastore=batchscore_blob, \n", - " data_reference_name=\"input_labels\",\n", - " path_on_datastore=\"batchscoring/labels\",\n", - " mode=\"download\" \n", - " )\n", - "\n", - "output_dir = PipelineData(name=\"scores\", \n", - " datastore=def_data_store, \n", - " output_path_on_compute=\"batchscoring/results\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download and register the model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Download the pre-trained Tensorflow model to use it for batch scoring in the pipeline. First create a local directory where you store the model, then download and extract it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import tarfile\n", - "import urllib.request\n", - "\n", - "if not os.path.isdir(\"models\"):\n", - " os.mkdir(\"models\")\n", - " \n", - "response = urllib.request.urlretrieve(\"http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz\", \"model.tar.gz\")\n", - "tar = tarfile.open(\"model.tar.gz\", \"r:gz\")\n", - "tar.extractall(\"models\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now you register the model to your workspace, which allows you to easily retrieve it in the pipeline process. In the `register()` static function, the `model_name` parameter is the key you use to locate your model throughout the SDK." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.model import Model\n", - " \n", - "model = Model.register(model_path=\"models/inception_v3.ckpt\",\n", - " model_name=\"inception\",\n", - " tags={\"pretrained\": \"inception\"},\n", - " description=\"Imagenet trained tensorflow inception\",\n", - " workspace=ws)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create and attach remote compute target\n", - "\n", - "Azure Machine Learning service pipelines cannot be run locally, and only run on cloud resources. Remote compute targets are reusable virtual compute environments where you run experiments and work-flows. Run the following code to create a GPU-enabled [`AmlCompute`](https://docs.microsoft.com/python/api/azureml-core/azureml.core.compute.amlcompute.amlcompute?view=azure-ml-py) target, and attach it to your workspace. See the [conceptual article](https://docs.microsoft.com/azure/machine-learning/service/concept-compute-target) for more information on compute targets." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.compute import AmlCompute, ComputeTarget\n", - "from azureml.exceptions import ComputeTargetException\n", - "compute_name = \"gpu-cluster\"\n", - "\n", - "# checks to see if compute target already exists in workspace, else create it\n", - "try:\n", - " compute_target = ComputeTarget(workspace=ws, name=compute_name)\n", - "except ComputeTargetException:\n", - " config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n", - " vm_priority=\"lowpriority\", \n", - " min_nodes=0, \n", - " max_nodes=1)\n", - "\n", - " compute_target = ComputeTarget.create(workspace=ws, name=compute_name, provisioning_configuration=config)\n", - " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Write a scoring script" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To do the scoring, you create a batch scoring script `batch_scoring.py`, and write it to the current directory. The script takes input images, applies the classification model, and outputs the predictions to a results file.\n", - "\n", - "The script `batch_scoring.py` takes the following parameters, which get passed from the `PythonScriptStep` that you create later:\n", - "\n", - "- `--model_name`: the name of the model being used\n", - "- `--label_dir` : the directory holding the `labels.txt` file \n", - "- `--dataset_path`: the directory containing the input images\n", - "- `--output_dir` : the script will run the model on the data and output a `results-label.txt` to this directory\n", - "- `--batch_size` : the batch size used in running the model\n", - "\n", - "The pipelines infrastructure uses the `ArgumentParser` class to pass parameters into pipeline steps. For example, in the code below the first argument `--model_name` is given the property identifier `model_name`. In the `main()` function, this property is accessed using `Model.get_model_path(args.model_name)`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile batch_scoring.py\n", - "\n", - "import os\n", - "import argparse\n", - "import datetime\n", - "import time\n", - "import tensorflow as tf\n", - "from math import ceil\n", - "import numpy as np\n", - "import shutil\n", - "from tensorflow.contrib.slim.python.slim.nets import inception_v3\n", - "from azureml.core.model import Model\n", - "\n", - "slim = tf.contrib.slim\n", - "\n", - "parser = argparse.ArgumentParser(description=\"Start a tensorflow model serving\")\n", - "parser.add_argument('--model_name', dest=\"model_name\", required=True)\n", - "parser.add_argument('--label_dir', dest=\"label_dir\", required=True)\n", - "parser.add_argument('--dataset_path', dest=\"dataset_path\", required=True)\n", - "parser.add_argument('--output_dir', dest=\"output_dir\", required=True)\n", - "parser.add_argument('--batch_size', dest=\"batch_size\", type=int, required=True)\n", - "\n", - "args = parser.parse_args()\n", - "\n", - "image_size = 299\n", - "num_channel = 3\n", - "\n", - "# create output directory if it does not exist\n", - "os.makedirs(args.output_dir, exist_ok=True)\n", - "\n", - "\n", - "def get_class_label_dict(label_file):\n", - " label = []\n", - " proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()\n", - " for l in proto_as_ascii_lines:\n", - " label.append(l.rstrip())\n", - " return label\n", - "\n", - "\n", - "class DataIterator:\n", - " def __init__(self, data_dir):\n", - " self.file_paths = []\n", - " image_list = os.listdir(data_dir)\n", - " self.file_paths = [data_dir + '/' + file_name.rstrip() for file_name in image_list]\n", - "\n", - " self.labels = [1 for file_name in self.file_paths]\n", - "\n", - " @property\n", - " def size(self):\n", - " return len(self.labels)\n", - "\n", - " def input_pipeline(self, batch_size):\n", - " images_tensor = tf.convert_to_tensor(self.file_paths, dtype=tf.string)\n", - " labels_tensor = tf.convert_to_tensor(self.labels, dtype=tf.int64)\n", - " input_queue = tf.train.slice_input_producer([images_tensor, labels_tensor], shuffle=False)\n", - " labels = input_queue[1]\n", - " images_content = tf.read_file(input_queue[0])\n", - "\n", - " image_reader = tf.image.decode_jpeg(images_content, channels=num_channel, name=\"jpeg_reader\")\n", - " float_caster = tf.cast(image_reader, tf.float32)\n", - " new_size = tf.constant([image_size, image_size], dtype=tf.int32)\n", - " images = tf.image.resize_images(float_caster, new_size)\n", - " images = tf.divide(tf.subtract(images, [0]), [255])\n", - "\n", - " image_batch, label_batch = tf.train.batch([images, labels], batch_size=batch_size, capacity=5 * batch_size)\n", - " return image_batch\n", - "\n", - "\n", - "def main(_):\n", - " label_file_name = os.path.join(args.label_dir, \"labels.txt\")\n", - " label_dict = get_class_label_dict(label_file_name)\n", - " classes_num = len(label_dict)\n", - " test_feeder = DataIterator(data_dir=args.dataset_path)\n", - " total_size = len(test_feeder.labels)\n", - " count = 0\n", - " \n", - " # get model from model registry\n", - " model_path = Model.get_model_path(args.model_name)\n", - " \n", - " with tf.Session() as sess:\n", - " test_images = test_feeder.input_pipeline(batch_size=args.batch_size)\n", - " with slim.arg_scope(inception_v3.inception_v3_arg_scope()):\n", - " input_images = tf.placeholder(tf.float32, [args.batch_size, image_size, image_size, num_channel])\n", - " logits, _ = inception_v3.inception_v3(input_images,\n", - " num_classes=classes_num,\n", - " is_training=False)\n", - " probabilities = tf.argmax(logits, 1)\n", - "\n", - " sess.run(tf.global_variables_initializer())\n", - " sess.run(tf.local_variables_initializer())\n", - " coord = tf.train.Coordinator()\n", - " threads = tf.train.start_queue_runners(sess=sess, coord=coord)\n", - " saver = tf.train.Saver()\n", - " saver.restore(sess, model_path)\n", - " out_filename = os.path.join(args.output_dir, \"result-labels.txt\")\n", - " with open(out_filename, \"w\") as result_file:\n", - " i = 0\n", - " while count < total_size and not coord.should_stop():\n", - " test_images_batch = sess.run(test_images)\n", - " file_names_batch = test_feeder.file_paths[i * args.batch_size:\n", - " min(test_feeder.size, (i + 1) * args.batch_size)]\n", - " results = sess.run(probabilities, feed_dict={input_images: test_images_batch})\n", - " new_add = min(args.batch_size, total_size - count)\n", - " count += new_add\n", - " i += 1\n", - " for j in range(new_add):\n", - " result_file.write(os.path.basename(file_names_batch[j]) + \": \" + label_dict[results[j]] + \"\\n\")\n", - " result_file.flush()\n", - " coord.request_stop()\n", - " coord.join(threads)\n", - "\n", - " shutil.copy(out_filename, \"./outputs/\")\n", - "\n", - "if __name__ == \"__main__\":\n", - " tf.app.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The pipeline in this tutorial only has one step and writes the output to a file, but for multi-step pipelines, you also use `ArgumentParser` to define a directory to write output data for input to subsequent steps. See the [notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb) for an example of passing data between multiple pipeline steps using the `ArgumentParser` design pattern." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Build and run the pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Before running the pipeline, you create an object that defines the python environment and dependencies needed by your script `batch_scoring.py`. The main dependency required is Tensorflow, but you also install `azureml-defaults` for background processes from the SDK. Create a `RunConfiguration` object using the dependencies, and also specify Docker and Docker-GPU support." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.runconfig import DEFAULT_GPU_IMAGE\n", - "from azureml.core.runconfig import CondaDependencies, RunConfiguration\n", - "\n", - "cd = CondaDependencies.create(pip_packages=[\"tensorflow-gpu==1.13.1\", \"azureml-defaults\"])\n", - "\n", - "amlcompute_run_config = RunConfiguration(conda_dependencies=cd)\n", - "amlcompute_run_config.environment.docker.enabled = True\n", - "amlcompute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE\n", - "amlcompute_run_config.environment.spark.precache_packages = False" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Parameterize the pipeline\n", - "\n", - "Define a custom parameter for the pipeline to control the batch size. After the pipeline has been published and exposed via a REST endpoint, any configured parameters are also exposed and can be specified in the JSON payload when rerunning the pipeline with an HTTP request.\n", - "\n", - "Create a `PipelineParameter` object to enable this behavior, and define a name and default value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.pipeline.core.graph import PipelineParameter\n", - "batch_size_param = PipelineParameter(name=\"param_batch_size\", default_value=20)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the pipeline step\n", - "\n", - "A pipeline step is an object that encapsulates everything you need for running a pipeline including:\n", - "\n", - "* environment and dependency settings\n", - "* the compute resource to run the pipeline on\n", - "* input and output data, and any custom parameters\n", - "* reference to a script or SDK-logic to run during the step\n", - "\n", - "There are multiple classes that inherit from the parent class [`PipelineStep`](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.builder.pipelinestep?view=azure-ml-py) to assist with building a step using certain frameworks and stacks. In this example, you use the [`PythonScriptStep`](https://docs.microsoft.com/python/api/azureml-pipeline-steps/azureml.pipeline.steps.python_script_step.pythonscriptstep?view=azure-ml-py) class to define your step logic using a custom python script. Note that if an argument to your script is either an input to the step or output of the step, it must be defined **both** in the `arguments` array, **as well as** in either the `input` or `output` parameter, respectively. \n", - "\n", - "An object reference in the `outputs` array becomes available as an **input** for a subsequent pipeline step, for scenarios where there is more than one step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.pipeline.steps import PythonScriptStep\n", - "\n", - "batch_score_step = PythonScriptStep(\n", - " name=\"batch_scoring\",\n", - " script_name=\"batch_scoring.py\",\n", - " arguments=[\"--dataset_path\", input_images, \n", - " \"--model_name\", \"inception\",\n", - " \"--label_dir\", label_dir, \n", - " \"--output_dir\", output_dir, \n", - " \"--batch_size\", batch_size_param],\n", - " compute_target=compute_target,\n", - " inputs=[input_images, label_dir],\n", - " outputs=[output_dir],\n", - " runconfig=amlcompute_run_config\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For a list of all classes for different step types, see the [steps package](https://docs.microsoft.com/python/api/azureml-pipeline-steps/azureml.pipeline.steps?view=azure-ml-py)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run the pipeline\n", - "\n", - "Now you run the pipeline. First create a `Pipeline` object with your workspace reference and the pipeline step you created. The `steps` parameter is an array of steps, and in this case there is only one step for batch scoring. To build pipelines with multiple steps, you place the steps in order in this array.\n", - "\n", - "Next use the `Experiment.submit()` function to submit the pipeline for execution. You also specify the custom parameter `param_batch_size`. The `wait_for_completion` function will output logs during the pipeline build process, which allows you to see current progress.\n", - "\n", - "Note: The first pipeline run takes roughly **15 minutes**, as all dependencies must be downloaded, a Docker image is created, and the Python environment is provisioned/created. Running it again takes significantly less time as those resources are reused. However, total run time depends on the workload of your scripts and processes running in each pipeline step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core import Experiment\n", - "from azureml.pipeline.core import Pipeline\n", - "\n", - "pipeline = Pipeline(workspace=ws, steps=[batch_score_step])\n", - "pipeline_run = Experiment(ws, 'batch_scoring').submit(pipeline, pipeline_parameters={\"param_batch_size\": 20})\n", - "pipeline_run.wait_for_completion(show_output=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download and review output" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run the following code to download the output file created from the `batch_scoring.py` script, then explore the scoring results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "step_run = list(pipeline_run.get_children())[0]\n", - "step_run.download_file(\"./outputs/result-labels.txt\")\n", - "\n", - "df = pd.read_csv(\"result-labels.txt\", delimiter=\":\", header=None)\n", - "df.columns = [\"Filename\", \"Prediction\"]\n", - "df.head(10)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Publish and run from REST endpoint" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run the following code to publish the pipeline to your workspace. In your workspace in the portal, you can see metadata for the pipeline including run history and durations. You can also run the pipeline manually from the portal.\n", - "\n", - "Additionally, publishing the pipeline enables a REST endpoint to rerun the pipeline from any HTTP library on any platform." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "published_pipeline = pipeline_run.publish_pipeline(\n", - " name=\"Inception_v3_scoring\", description=\"Batch scoring using Inception v3 model\", version=\"1.0\")\n", - "\n", - "published_pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To run the pipeline from the REST endpoint, you first need an OAuth2 Bearer-type authentication header. This example uses interactive authentication for illustration purposes, but for most production scenarios requiring automated or headless authentication, use service principle authentication as [described in this notebook](https://aka.ms/pl-restep-auth).\n", - "\n", - "Service principle authentication involves creating an **App Registration** in **Azure Active Directory**, generating a client secret, and then granting your service principal **role access** to your machine learning workspace. You then use the [`ServicePrincipalAuthentication`](https://docs.microsoft.com/python/api/azureml-core/azureml.core.authentication.serviceprincipalauthentication?view=azure-ml-py) class to manage your auth flow. \n", - "\n", - "Both `InteractiveLoginAuthentication` and `ServicePrincipalAuthentication` inherit from `AbstractAuthentication`, and in both cases you use the `get_authentication_header()` function in the same way to fetch the header." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.authentication import InteractiveLoginAuthentication\n", - "\n", - "interactive_auth = InteractiveLoginAuthentication()\n", - "auth_header = interactive_auth.get_authentication_header()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Get the REST url from the `endpoint` property of the published pipeline object. You can also find the REST url in your workspace in the portal. Build an HTTP POST request to the endpoint, specifying your authentication header. Additionally, add a JSON payload object with the experiment name and the batch size parameter. As a reminder, the `param_batch_size` is passed through to your `batch_scoring.py` script because you defined it as a `PipelineParameter` object in the step configuration.\n", - "\n", - "Make the request to trigger the run. Access the `Id` key from the response dict to get the value of the run id." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "rest_endpoint = published_pipeline.endpoint\n", - "response = requests.post(rest_endpoint, \n", - " headers=auth_header, \n", - " json={\"ExperimentName\": \"batch_scoring\",\n", - " \"ParameterAssignments\": {\"param_batch_size\": 50}})\n", - "run_id = response.json()[\"Id\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use the run id to monitor the status of the new run. This will take another 10-15 min to run and will look similar to the previous pipeline run, so if you don't need to see another pipeline run, you can skip watching the full output." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.pipeline.core.run import PipelineRun\n", - "from azureml.widgets import RunDetails\n", - "\n", - "published_pipeline_run = PipelineRun(ws.experiments[\"batch_scoring\"], run_id)\n", - "RunDetails(published_pipeline_run).show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Clean up resources\n", - "\n", - "Do not complete this section if you plan on running other Azure Machine Learning service tutorials.\n", - "\n", - "### Stop the notebook VM\n", - "\n", - "If you used a cloud notebook server, stop the VM when you are not using it to reduce cost.\n", - "\n", - "1. In your workspace, select **Notebook VMs**.\n", - "1. From the list, select the VM.\n", - "1. Select **Stop**.\n", - "1. When you're ready to use the server again, select **Start**.\n", - "\n", - "### Delete everything\n", - "\n", - "If you don't plan to use the resources you created, delete them, so you don't incur any charges.\n", - "\n", - "1. In the Azure portal, select **Resource groups** on the far left.\n", - "1. From the list, select the resource group you created.\n", - "1. Select **Delete resource group**.\n", - "1. Enter the resource group name. Then select **Delete**.\n", - "\n", - "You can also keep the resource group but delete a single workspace. Display the workspace properties and select **Delete**." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Next steps\n", - "\n", - "In this machine learning pipelines tutorial, you did the following tasks:\n", - "\n", - "> * Built a pipeline with environment dependencies to run on a remote GPU compute resource\n", - "> * Created a scoring script to run batch predictions with a pre-trained Tensorflow model\n", - "> * Published a pipeline and enabled it to be run from a REST endpoint\n", - "\n", - "See the [how-to](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-your-first-pipeline?view=azure-devops) for additional detail on building pipelines with the machine learning SDK." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "authors": [ - { - "name": "sanpil" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved. \n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Use Azure Machine Learning Pipelines for batch prediction\n", + "\n", + "In this tutorial, you use Azure Machine Learning service pipelines to run a batch scoring image classification job. The example job uses the pre-trained [Inception-V3](https://arxiv.org/abs/1512.00567) CNN (convolutional neural network) Tensorflow model to classify unlabeled images. Machine learning pipelines optimize your workflow with speed, portability, and reuse so you can focus on your expertise, machine learning, rather than on infrastructure and automation. After building and publishing a pipeline, you can configure a REST endpoint to enable triggering the pipeline from any HTTP library on any platform.\n", + "\n", + "\n", + "In this tutorial, you learn the following tasks:\n", + "\n", + "> * Configure workspace and download sample data\n", + "> * Create data objects to fetch and output data\n", + "> * Download, prepare, and register the model to your workspace\n", + "> * Provision compute targets and create a scoring script\n", + "> * Build, run, and publish a pipeline\n", + "> * Enable a REST endpoint for the pipeline\n", + "\n", + "If you don\u00e2\u20ac\u2122t have an Azure subscription, create a free account before you begin. Try the [free or paid version of Azure Machine Learning service](https://aka.ms/AMLFree) today." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "\n", + "* Complete the [setup tutorial](https://docs.microsoft.com/azure/machine-learning/service/tutorial-1st-experiment-sdk-setup) if you don't already have an Azure Machine Learning service workspace or notebook virtual machine.\n", + "* After you complete the setup tutorial, open the **tutorials/tutorial-pipeline-batch-scoring-classification.ipynb** notebook using the same notebook server.\n", + "\n", + "This tutorial is also available on [GitHub](https://github.com/Azure/MachineLearningNotebooks/tree/master/tutorials) if you wish to run it in your own [local environment](how-to-configure-environment.md#local). Run `pip install azureml-sdk[notebooks] azureml-pipeline-core azureml-pipeline-steps pandas requests` to get the required packages." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure workspace and create datastore" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a workspace object from the existing workspace. A [Workspace](https://docs.microsoft.com/python/api/azureml-core/azureml.core.workspace.workspace?view=azure-ml-py) is a class that accepts your Azure subscription and resource information. It also creates a cloud resource to monitor and track your model runs. `Workspace.from_config()` reads the file **config.json** and loads the authentication details into an object named `ws`. `ws` is used throughout the rest of the code in this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "ws = Workspace.from_config()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a datastore for sample images\n", + "\n", + "Get the ImageNet evaluation public data sample from the public blob container `sampledata` on the account `pipelinedata`. Calling `register_azure_blob_container()` makes the data available to the workspace under the name `images_datastore`. Then specify the workspace default datastore as the output datastore, which you use for scoring output in the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.datastore import Datastore\n", + "\n", + "batchscore_blob = Datastore.register_azure_blob_container(ws, \n", + " datastore_name=\"images_datastore\", \n", + " container_name=\"sampledata\", \n", + " account_name=\"pipelinedata\", \n", + " overwrite=True)\n", + "\n", + "def_data_store = ws.get_default_datastore()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create data objects\n", + "\n", + "When building pipelines, `DataReference` objects are used for reading data from workspace datastores, and `PipelineData` objects are used for transferring intermediate data between pipeline steps.\n", + "\n", + "This batch scoring example only uses one pipeline step, but in use-cases with multiple steps, the typical flow will include:\n", + "\n", + "1. Using `DataReference` objects as **inputs** to fetch raw data, performing some transformations, then **outputting** a `PipelineData` object.\n", + "1. Use the previous step's `PipelineData` **output object** as an *input object*, repeated for subsequent steps.\n", + "\n", + "For this scenario you create `DataReference` objects corresponding to the datastore directories for both the input images and the classification labels (y-test values). You also create a `PipelineData` object for the batch scoring output data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.data.data_reference import DataReference\n", + "from azureml.pipeline.core import PipelineData\n", + "\n", + "input_images = DataReference(datastore=batchscore_blob, \n", + " data_reference_name=\"input_images\",\n", + " path_on_datastore=\"batchscoring/images\",\n", + " mode=\"download\"\n", + " )\n", + "\n", + "label_dir = DataReference(datastore=batchscore_blob, \n", + " data_reference_name=\"input_labels\",\n", + " path_on_datastore=\"batchscoring/labels\",\n", + " mode=\"download\" \n", + " )\n", + "\n", + "output_dir = PipelineData(name=\"scores\", \n", + " datastore=def_data_store, \n", + " output_path_on_compute=\"batchscoring/results\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download and register the model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download the pre-trained Tensorflow model to use it for batch scoring in the pipeline. First create a local directory where you store the model, then download and extract it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import tarfile\n", + "import urllib.request\n", + "\n", + "if not os.path.isdir(\"models\"):\n", + " os.mkdir(\"models\")\n", + " \n", + "response = urllib.request.urlretrieve(\"http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz\", \"model.tar.gz\")\n", + "tar = tarfile.open(\"model.tar.gz\", \"r:gz\")\n", + "tar.extractall(\"models\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now you register the model to your workspace, which allows you to easily retrieve it in the pipeline process. In the `register()` static function, the `model_name` parameter is the key you use to locate your model throughout the SDK." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.model import Model\n", + " \n", + "model = Model.register(model_path=\"models/inception_v3.ckpt\",\n", + " model_name=\"inception\",\n", + " tags={\"pretrained\": \"inception\"},\n", + " description=\"Imagenet trained tensorflow inception\",\n", + " workspace=ws)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create and attach remote compute target\n", + "\n", + "Azure Machine Learning service pipelines cannot be run locally, and only run on cloud resources. Remote compute targets are reusable virtual compute environments where you run experiments and work-flows. Run the following code to create a GPU-enabled [`AmlCompute`](https://docs.microsoft.com/python/api/azureml-core/azureml.core.compute.amlcompute.amlcompute?view=azure-ml-py) target, and attach it to your workspace. See the [conceptual article](https://docs.microsoft.com/azure/machine-learning/service/concept-compute-target) for more information on compute targets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import AmlCompute, ComputeTarget\n", + "from azureml.exceptions import ComputeTargetException\n", + "compute_name = \"gpu-cluster\"\n", + "\n", + "# checks to see if compute target already exists in workspace, else create it\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=compute_name)\n", + "except ComputeTargetException:\n", + " config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n", + " vm_priority=\"lowpriority\", \n", + " min_nodes=0, \n", + " max_nodes=1)\n", + "\n", + " compute_target = ComputeTarget.create(workspace=ws, name=compute_name, provisioning_configuration=config)\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Write a scoring script" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To do the scoring, you create a batch scoring script `batch_scoring.py`, and write it to the current directory. The script takes input images, applies the classification model, and outputs the predictions to a results file.\n", + "\n", + "The script `batch_scoring.py` takes the following parameters, which get passed from the `PythonScriptStep` that you create later:\n", + "\n", + "- `--model_name`: the name of the model being used\n", + "- `--label_dir` : the directory holding the `labels.txt` file \n", + "- `--dataset_path`: the directory containing the input images\n", + "- `--output_dir` : the script will run the model on the data and output a `results-label.txt` to this directory\n", + "- `--batch_size` : the batch size used in running the model\n", + "\n", + "The pipelines infrastructure uses the `ArgumentParser` class to pass parameters into pipeline steps. For example, in the code below the first argument `--model_name` is given the property identifier `model_name`. In the `main()` function, this property is accessed using `Model.get_model_path(args.model_name)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile batch_scoring.py\n", + "\n", + "import os\n", + "import argparse\n", + "import datetime\n", + "import time\n", + "import tensorflow as tf\n", + "from math import ceil\n", + "import numpy as np\n", + "import shutil\n", + "from tensorflow.contrib.slim.python.slim.nets import inception_v3\n", + "from azureml.core.model import Model\n", + "\n", + "slim = tf.contrib.slim\n", + "\n", + "parser = argparse.ArgumentParser(description=\"Start a tensorflow model serving\")\n", + "parser.add_argument('--model_name', dest=\"model_name\", required=True)\n", + "parser.add_argument('--label_dir', dest=\"label_dir\", required=True)\n", + "parser.add_argument('--dataset_path', dest=\"dataset_path\", required=True)\n", + "parser.add_argument('--output_dir', dest=\"output_dir\", required=True)\n", + "parser.add_argument('--batch_size', dest=\"batch_size\", type=int, required=True)\n", + "\n", + "args = parser.parse_args()\n", + "\n", + "image_size = 299\n", + "num_channel = 3\n", + "\n", + "# create output directory if it does not exist\n", + "os.makedirs(args.output_dir, exist_ok=True)\n", + "\n", + "\n", + "def get_class_label_dict(label_file):\n", + " label = []\n", + " proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()\n", + " for l in proto_as_ascii_lines:\n", + " label.append(l.rstrip())\n", + " return label\n", + "\n", + "\n", + "class DataIterator:\n", + " def __init__(self, data_dir):\n", + " self.file_paths = []\n", + " image_list = os.listdir(data_dir)\n", + " self.file_paths = [data_dir + '/' + file_name.rstrip() for file_name in image_list]\n", + "\n", + " self.labels = [1 for file_name in self.file_paths]\n", + "\n", + " @property\n", + " def size(self):\n", + " return len(self.labels)\n", + "\n", + " def input_pipeline(self, batch_size):\n", + " images_tensor = tf.convert_to_tensor(self.file_paths, dtype=tf.string)\n", + " labels_tensor = tf.convert_to_tensor(self.labels, dtype=tf.int64)\n", + " input_queue = tf.train.slice_input_producer([images_tensor, labels_tensor], shuffle=False)\n", + " labels = input_queue[1]\n", + " images_content = tf.read_file(input_queue[0])\n", + "\n", + " image_reader = tf.image.decode_jpeg(images_content, channels=num_channel, name=\"jpeg_reader\")\n", + " float_caster = tf.cast(image_reader, tf.float32)\n", + " new_size = tf.constant([image_size, image_size], dtype=tf.int32)\n", + " images = tf.image.resize_images(float_caster, new_size)\n", + " images = tf.divide(tf.subtract(images, [0]), [255])\n", + "\n", + " image_batch, label_batch = tf.train.batch([images, labels], batch_size=batch_size, capacity=5 * batch_size)\n", + " return image_batch\n", + "\n", + "\n", + "def main(_):\n", + " label_file_name = os.path.join(args.label_dir, \"labels.txt\")\n", + " label_dict = get_class_label_dict(label_file_name)\n", + " classes_num = len(label_dict)\n", + " test_feeder = DataIterator(data_dir=args.dataset_path)\n", + " total_size = len(test_feeder.labels)\n", + " count = 0\n", + " \n", + " # get model from model registry\n", + " model_path = Model.get_model_path(args.model_name)\n", + " \n", + " with tf.Session() as sess:\n", + " test_images = test_feeder.input_pipeline(batch_size=args.batch_size)\n", + " with slim.arg_scope(inception_v3.inception_v3_arg_scope()):\n", + " input_images = tf.placeholder(tf.float32, [args.batch_size, image_size, image_size, num_channel])\n", + " logits, _ = inception_v3.inception_v3(input_images,\n", + " num_classes=classes_num,\n", + " is_training=False)\n", + " probabilities = tf.argmax(logits, 1)\n", + "\n", + " sess.run(tf.global_variables_initializer())\n", + " sess.run(tf.local_variables_initializer())\n", + " coord = tf.train.Coordinator()\n", + " threads = tf.train.start_queue_runners(sess=sess, coord=coord)\n", + " saver = tf.train.Saver()\n", + " saver.restore(sess, model_path)\n", + " out_filename = os.path.join(args.output_dir, \"result-labels.txt\")\n", + " with open(out_filename, \"w\") as result_file:\n", + " i = 0\n", + " while count < total_size and not coord.should_stop():\n", + " test_images_batch = sess.run(test_images)\n", + " file_names_batch = test_feeder.file_paths[i * args.batch_size:\n", + " min(test_feeder.size, (i + 1) * args.batch_size)]\n", + " results = sess.run(probabilities, feed_dict={input_images: test_images_batch})\n", + " new_add = min(args.batch_size, total_size - count)\n", + " count += new_add\n", + " i += 1\n", + " for j in range(new_add):\n", + " result_file.write(os.path.basename(file_names_batch[j]) + \": \" + label_dict[results[j]] + \"\\n\")\n", + " result_file.flush()\n", + " coord.request_stop()\n", + " coord.join(threads)\n", + "\n", + " shutil.copy(out_filename, \"./outputs/\")\n", + "\n", + "if __name__ == \"__main__\":\n", + " tf.app.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The pipeline in this tutorial only has one step and writes the output to a file, but for multi-step pipelines, you also use `ArgumentParser` to define a directory to write output data for input to subsequent steps. See the [notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb) for an example of passing data between multiple pipeline steps using the `ArgumentParser` design pattern." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build and run the pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before running the pipeline, you create an object that defines the python environment and dependencies needed by your script `batch_scoring.py`. The main dependency required is Tensorflow, but you also install `azureml-defaults` for background processes from the SDK. Create a `RunConfiguration` object using the dependencies, and also specify Docker and Docker-GPU support." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import DEFAULT_GPU_IMAGE\n", + "from azureml.core.runconfig import CondaDependencies, RunConfiguration\n", + "\n", + "cd = CondaDependencies.create(pip_packages=[\"tensorflow-gpu==1.13.1\", \"azureml-defaults\"])\n", + "\n", + "amlcompute_run_config = RunConfiguration(conda_dependencies=cd)\n", + "amlcompute_run_config.environment.docker.enabled = True\n", + "amlcompute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE\n", + "amlcompute_run_config.environment.spark.precache_packages = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parameterize the pipeline\n", + "\n", + "Define a custom parameter for the pipeline to control the batch size. After the pipeline has been published and exposed via a REST endpoint, any configured parameters are also exposed and can be specified in the JSON payload when rerunning the pipeline with an HTTP request.\n", + "\n", + "Create a `PipelineParameter` object to enable this behavior, and define a name and default value." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core.graph import PipelineParameter\n", + "batch_size_param = PipelineParameter(name=\"param_batch_size\", default_value=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the pipeline step\n", + "\n", + "A pipeline step is an object that encapsulates everything you need for running a pipeline including:\n", + "\n", + "* environment and dependency settings\n", + "* the compute resource to run the pipeline on\n", + "* input and output data, and any custom parameters\n", + "* reference to a script or SDK-logic to run during the step\n", + "\n", + "There are multiple classes that inherit from the parent class [`PipelineStep`](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.builder.pipelinestep?view=azure-ml-py) to assist with building a step using certain frameworks and stacks. In this example, you use the [`PythonScriptStep`](https://docs.microsoft.com/python/api/azureml-pipeline-steps/azureml.pipeline.steps.python_script_step.pythonscriptstep?view=azure-ml-py) class to define your step logic using a custom python script. Note that if an argument to your script is either an input to the step or output of the step, it must be defined **both** in the `arguments` array, **as well as** in either the `input` or `output` parameter, respectively. \n", + "\n", + "An object reference in the `outputs` array becomes available as an **input** for a subsequent pipeline step, for scenarios where there is more than one step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.steps import PythonScriptStep\n", + "\n", + "batch_score_step = PythonScriptStep(\n", + " name=\"batch_scoring\",\n", + " script_name=\"batch_scoring.py\",\n", + " arguments=[\"--dataset_path\", input_images, \n", + " \"--model_name\", \"inception\",\n", + " \"--label_dir\", label_dir, \n", + " \"--output_dir\", output_dir, \n", + " \"--batch_size\", batch_size_param],\n", + " compute_target=compute_target,\n", + " inputs=[input_images, label_dir],\n", + " outputs=[output_dir],\n", + " runconfig=amlcompute_run_config\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For a list of all classes for different step types, see the [steps package](https://docs.microsoft.com/python/api/azureml-pipeline-steps/azureml.pipeline.steps?view=azure-ml-py)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run the pipeline\n", + "\n", + "Now you run the pipeline. First create a `Pipeline` object with your workspace reference and the pipeline step you created. The `steps` parameter is an array of steps, and in this case there is only one step for batch scoring. To build pipelines with multiple steps, you place the steps in order in this array.\n", + "\n", + "Next use the `Experiment.submit()` function to submit the pipeline for execution. You also specify the custom parameter `param_batch_size`. The `wait_for_completion` function will output logs during the pipeline build process, which allows you to see current progress.\n", + "\n", + "Note: The first pipeline run takes roughly **15 minutes**, as all dependencies must be downloaded, a Docker image is created, and the Python environment is provisioned/created. Running it again takes significantly less time as those resources are reused. However, total run time depends on the workload of your scripts and processes running in each pipeline step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "from azureml.pipeline.core import Pipeline\n", + "\n", + "pipeline = Pipeline(workspace=ws, steps=[batch_score_step])\n", + "pipeline_run = Experiment(ws, 'batch_scoring').submit(pipeline, pipeline_parameters={\"param_batch_size\": 20})\n", + "pipeline_run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download and review output" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run the following code to download the output file created from the `batch_scoring.py` script, then explore the scoring results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "step_run = list(pipeline_run.get_children())[0]\n", + "step_run.download_file(\"./outputs/result-labels.txt\")\n", + "\n", + "df = pd.read_csv(\"result-labels.txt\", delimiter=\":\", header=None)\n", + "df.columns = [\"Filename\", \"Prediction\"]\n", + "df.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Publish and run from REST endpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run the following code to publish the pipeline to your workspace. In your workspace in the portal, you can see metadata for the pipeline including run history and durations. You can also run the pipeline manually from the portal.\n", + "\n", + "Additionally, publishing the pipeline enables a REST endpoint to rerun the pipeline from any HTTP library on any platform." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "published_pipeline = pipeline_run.publish_pipeline(\n", + " name=\"Inception_v3_scoring\", description=\"Batch scoring using Inception v3 model\", version=\"1.0\")\n", + "\n", + "published_pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To run the pipeline from the REST endpoint, you first need an OAuth2 Bearer-type authentication header. This example uses interactive authentication for illustration purposes, but for most production scenarios requiring automated or headless authentication, use service principle authentication as [described in this notebook](https://aka.ms/pl-restep-auth).\n", + "\n", + "Service principle authentication involves creating an **App Registration** in **Azure Active Directory**, generating a client secret, and then granting your service principal **role access** to your machine learning workspace. You then use the [`ServicePrincipalAuthentication`](https://docs.microsoft.com/python/api/azureml-core/azureml.core.authentication.serviceprincipalauthentication?view=azure-ml-py) class to manage your auth flow. \n", + "\n", + "Both `InteractiveLoginAuthentication` and `ServicePrincipalAuthentication` inherit from `AbstractAuthentication`, and in both cases you use the `get_authentication_header()` function in the same way to fetch the header." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.authentication import InteractiveLoginAuthentication\n", + "\n", + "interactive_auth = InteractiveLoginAuthentication()\n", + "auth_header = interactive_auth.get_authentication_header()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the REST url from the `endpoint` property of the published pipeline object. You can also find the REST url in your workspace in the portal. Build an HTTP POST request to the endpoint, specifying your authentication header. Additionally, add a JSON payload object with the experiment name and the batch size parameter. As a reminder, the `param_batch_size` is passed through to your `batch_scoring.py` script because you defined it as a `PipelineParameter` object in the step configuration.\n", + "\n", + "Make the request to trigger the run. Access the `Id` key from the response dict to get the value of the run id." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "rest_endpoint = published_pipeline.endpoint\n", + "response = requests.post(rest_endpoint, \n", + " headers=auth_header, \n", + " json={\"ExperimentName\": \"batch_scoring\",\n", + " \"ParameterAssignments\": {\"param_batch_size\": 50}})\n", + "run_id = response.json()[\"Id\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the run id to monitor the status of the new run. This will take another 10-15 min to run and will look similar to the previous pipeline run, so if you don't need to see another pipeline run, you can skip watching the full output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core.run import PipelineRun\n", + "from azureml.widgets import RunDetails\n", + "\n", + "published_pipeline_run = PipelineRun(ws.experiments[\"batch_scoring\"], run_id)\n", + "RunDetails(published_pipeline_run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up resources\n", + "\n", + "Do not complete this section if you plan on running other Azure Machine Learning service tutorials.\n", + "\n", + "### Stop the notebook VM\n", + "\n", + "If you used a cloud notebook server, stop the VM when you are not using it to reduce cost.\n", + "\n", + "1. In your workspace, select **Notebook VMs**.\n", + "1. From the list, select the VM.\n", + "1. Select **Stop**.\n", + "1. When you're ready to use the server again, select **Start**.\n", + "\n", + "### Delete everything\n", + "\n", + "If you don't plan to use the resources you created, delete them, so you don't incur any charges.\n", + "\n", + "1. In the Azure portal, select **Resource groups** on the far left.\n", + "1. From the list, select the resource group you created.\n", + "1. Select **Delete resource group**.\n", + "1. Enter the resource group name. Then select **Delete**.\n", + "\n", + "You can also keep the resource group but delete a single workspace. Display the workspace properties and select **Delete**." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next steps\n", + "\n", + "In this machine learning pipelines tutorial, you did the following tasks:\n", + "\n", + "> * Built a pipeline with environment dependencies to run on a remote GPU compute resource\n", + "> * Created a scoring script to run batch predictions with a pre-trained Tensorflow model\n", + "> * Published a pipeline and enabled it to be run from a REST endpoint\n", + "\n", + "See the [how-to](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-your-first-pipeline?view=azure-devops) for additional detail on building pipelines with the machine learning SDK." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } ], - "friendly_name": "Use pipelines for batch scoring", - "exclude_from_index": false, - "index_order": 1, - "category": "tutorial", - "star_tag": ["featured"], - "task": "Batch scoring", - "datasets": ["None"], - "compute": ["AmlCompute"], - "deployment": ["Published pipeline"], - "framework": ["Azure ML Pipelines"], - "tags": ["None"], - "kernelspec": { - "display_name": "Python 3.6", - "language": "python", - "name": "python36" + "metadata": { + "authors": [ + { + "name": "sanpil" + } + ], + "friendly_name": "Use pipelines for batch scoring", + "exclude_from_index": false, + "index_order": 1, + "category": "tutorial", + "star_tag": [ + "featured" + ], + "task": "Batch scoring", + "datasets": [ + "None" + ], + "compute": [ + "AmlCompute" + ], + "deployment": [ + "Published pipeline" + ], + "framework": [ + "Azure ML Pipelines" + ], + "tags": [ + "None" + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "msauthor": "trbye" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - }, - "msauthor": "trbye" - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file