mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 09:37:04 -05:00
Compare commits
1 Commits
azureml-sd
...
release_up
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a26f250f2 |
@@ -103,7 +103,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -21,8 +21,8 @@ dependencies:
|
|||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-widgets~=1.24.0
|
- azureml-widgets~=1.25.0
|
||||||
- pytorch-transformers==1.0.0
|
- pytorch-transformers==1.0.0
|
||||||
- spacy==2.1.8
|
- spacy==2.1.8
|
||||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_win32_requirements.txt [--no-deps]
|
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.25.0/validated_win32_requirements.txt [--no-deps]
|
||||||
|
|||||||
@@ -21,8 +21,8 @@ dependencies:
|
|||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-widgets~=1.24.0
|
- azureml-widgets~=1.25.0
|
||||||
- pytorch-transformers==1.0.0
|
- pytorch-transformers==1.0.0
|
||||||
- spacy==2.1.8
|
- spacy==2.1.8
|
||||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_linux_requirements.txt [--no-deps]
|
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.25.0/validated_linux_requirements.txt [--no-deps]
|
||||||
|
|||||||
@@ -22,8 +22,8 @@ dependencies:
|
|||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-widgets~=1.24.0
|
- azureml-widgets~=1.25.0
|
||||||
- pytorch-transformers==1.0.0
|
- pytorch-transformers==1.0.0
|
||||||
- spacy==2.1.8
|
- spacy==2.1.8
|
||||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_darwin_requirements.txt [--no-deps]
|
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.25.0/validated_darwin_requirements.txt [--no-deps]
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ if [ $? -ne 0 ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
sed -i '' 's/AZUREML-SDK-VERSION/latest/' $AUTOML_ENV_FILE
|
sed -i '' 's/AZUREML-SDK-VERSION/latest/' $AUTOML_ENV_FILE
|
||||||
|
brew install libomp
|
||||||
|
|
||||||
if source activate $CONDA_ENV_NAME 2> /dev/null
|
if source activate $CONDA_ENV_NAME 2> /dev/null
|
||||||
then
|
then
|
||||||
|
|||||||
@@ -105,7 +105,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -93,7 +93,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -96,7 +96,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -81,7 +81,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -39,6 +39,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Introduction\n",
|
"## Introduction\n",
|
||||||
"In this example we use an experimental feature, Model Proxy, to do a predict on the best generated model without downloading the model locally. The prediction will happen on same compute and environment that was used to train the model. This feature is currently in the experimental state, which means that the API is prone to changing, please make sure to run on the latest version of this notebook if you face any issues.\n",
|
"In this example we use an experimental feature, Model Proxy, to do a predict on the best generated model without downloading the model locally. The prediction will happen on same compute and environment that was used to train the model. This feature is currently in the experimental state, which means that the API is prone to changing, please make sure to run on the latest version of this notebook if you face any issues.\n",
|
||||||
|
"This notebook will also leverage MLFlow for saving models, allowing for more portability of the resulting models. See https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-mlflow for more details around MLFlow is AzureML.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -90,7 +91,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -212,10 +213,11 @@
|
|||||||
" \"n_cross_validations\": 3,\n",
|
" \"n_cross_validations\": 3,\n",
|
||||||
" \"primary_metric\": 'r2_score',\n",
|
" \"primary_metric\": 'r2_score',\n",
|
||||||
" \"enable_early_stopping\": True, \n",
|
" \"enable_early_stopping\": True, \n",
|
||||||
" \"experiment_timeout_hours\": 0.3, #for real scenarios we reccommend a timeout of at least one hour \n",
|
" \"experiment_timeout_hours\": 0.3, #for real scenarios we recommend a timeout of at least one hour \n",
|
||||||
" \"max_concurrent_iterations\": 4,\n",
|
" \"max_concurrent_iterations\": 4,\n",
|
||||||
" \"max_cores_per_iteration\": -1,\n",
|
" \"max_cores_per_iteration\": -1,\n",
|
||||||
" \"verbosity\": logging.INFO,\n",
|
" \"verbosity\": logging.INFO,\n",
|
||||||
|
" \"save_mlflow\": True,\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"automl_config = AutoMLConfig(task = 'regression',\n",
|
"automl_config = AutoMLConfig(task = 'regression',\n",
|
||||||
|
|||||||
@@ -113,7 +113,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -365,7 +365,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.automl.core.forecasting_parameters import ForecastingParameters\n",
|
"from azureml.automl.core.forecasting_parameters import ForecastingParameters\n",
|
||||||
"forecasting_parameters = ForecastingParameters(\n",
|
"forecasting_parameters = ForecastingParameters(\n",
|
||||||
" time_column_name=time_column_name, forecast_horizon=forecast_horizon\n",
|
" time_column_name=time_column_name,\n",
|
||||||
|
" forecast_horizon=forecast_horizon,\n",
|
||||||
|
" freq='MS' # Set the forecast frequency to be monthly (start of the month)\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"automl_config = AutoMLConfig(task='forecasting', \n",
|
"automl_config = AutoMLConfig(task='forecasting', \n",
|
||||||
|
|||||||
@@ -87,7 +87,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -318,7 +318,8 @@
|
|||||||
" time_column_name=time_column_name,\n",
|
" time_column_name=time_column_name,\n",
|
||||||
" forecast_horizon=forecast_horizon,\n",
|
" forecast_horizon=forecast_horizon,\n",
|
||||||
" country_or_region_for_holidays='US', # set country_or_region will trigger holiday featurizer\n",
|
" country_or_region_for_holidays='US', # set country_or_region will trigger holiday featurizer\n",
|
||||||
" target_lags='auto' # use heuristic based lag setting \n",
|
" target_lags='auto', # use heuristic based lag setting\n",
|
||||||
|
" freq='D' # Set the forecast frequency to be daily\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"automl_config = AutoMLConfig(task='forecasting', \n",
|
"automl_config = AutoMLConfig(task='forecasting', \n",
|
||||||
|
|||||||
@@ -97,7 +97,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -342,7 +342,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.automl.core.forecasting_parameters import ForecastingParameters\n",
|
"from azureml.automl.core.forecasting_parameters import ForecastingParameters\n",
|
||||||
"forecasting_parameters = ForecastingParameters(\n",
|
"forecasting_parameters = ForecastingParameters(\n",
|
||||||
" time_column_name=time_column_name, forecast_horizon=forecast_horizon\n",
|
" time_column_name=time_column_name,\n",
|
||||||
|
" forecast_horizon=forecast_horizon,\n",
|
||||||
|
" freq='H' # Set the forecast frequency to be hourly\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"automl_config = AutoMLConfig(task='forecasting', \n",
|
"automl_config = AutoMLConfig(task='forecasting', \n",
|
||||||
|
|||||||
@@ -94,7 +94,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -319,7 +319,8 @@
|
|||||||
" time_column_name=TIME_COLUMN_NAME,\n",
|
" time_column_name=TIME_COLUMN_NAME,\n",
|
||||||
" forecast_horizon=forecast_horizon,\n",
|
" forecast_horizon=forecast_horizon,\n",
|
||||||
" time_series_id_column_names=[ TIME_SERIES_ID_COLUMN_NAME ],\n",
|
" time_series_id_column_names=[ TIME_SERIES_ID_COLUMN_NAME ],\n",
|
||||||
" target_lags=lags\n",
|
" target_lags=lags,\n",
|
||||||
|
" freq='H' # Set the forecast frequency to be hourly\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -82,7 +82,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -423,7 +423,8 @@
|
|||||||
"forecasting_parameters = ForecastingParameters(\n",
|
"forecasting_parameters = ForecastingParameters(\n",
|
||||||
" time_column_name=time_column_name,\n",
|
" time_column_name=time_column_name,\n",
|
||||||
" forecast_horizon=n_test_periods,\n",
|
" forecast_horizon=n_test_periods,\n",
|
||||||
" time_series_id_column_names=time_series_id_column_names\n",
|
" time_series_id_column_names=time_series_id_column_names,\n",
|
||||||
|
" freq='W-THU' # Set the forecast frequency to be weekly (start on each Thursday)\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"automl_config = AutoMLConfig(task='forecasting',\n",
|
"automl_config = AutoMLConfig(task='forecasting',\n",
|
||||||
|
|||||||
@@ -96,7 +96,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -96,7 +96,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ automl_run = Run(experiment=experiment, run_id='<<run_id>>')
|
|||||||
|
|
||||||
# Check if this AutoML model is explainable
|
# Check if this AutoML model is explainable
|
||||||
if not automl_check_model_if_explainable(automl_run):
|
if not automl_check_model_if_explainable(automl_run):
|
||||||
raise Exception("Model explanations is currently not supported for " + automl_run.get_properties().get(
|
raise Exception("Model explanations are currently not supported for " + automl_run.get_properties().get(
|
||||||
'run_algorithm'))
|
'run_algorithm'))
|
||||||
|
|
||||||
# Download the best model from the artifact store
|
# Download the best model from the artifact store
|
||||||
@@ -38,16 +38,16 @@ fitted_model = joblib.load('model.pkl')
|
|||||||
|
|
||||||
# Get the train dataset from the workspace
|
# Get the train dataset from the workspace
|
||||||
train_dataset = Dataset.get_by_name(workspace=ws, name='<<train_dataset_name>>')
|
train_dataset = Dataset.get_by_name(workspace=ws, name='<<train_dataset_name>>')
|
||||||
# Drop the lablled column to get the training set.
|
# Drop the labeled column to get the training set.
|
||||||
X_train = train_dataset.drop_columns(columns=['<<target_column_name>>'])
|
X_train = train_dataset.drop_columns(columns=['<<target_column_name>>'])
|
||||||
y_train = train_dataset.keep_columns(columns=['<<target_column_name>>'], validate=True)
|
y_train = train_dataset.keep_columns(columns=['<<target_column_name>>'], validate=True)
|
||||||
|
|
||||||
# Get the train dataset from the workspace
|
# Get the test dataset from the workspace
|
||||||
test_dataset = Dataset.get_by_name(workspace=ws, name='<<test_dataset_name>>')
|
test_dataset = Dataset.get_by_name(workspace=ws, name='<<test_dataset_name>>')
|
||||||
# Drop the lablled column to get the testing set.
|
# Drop the labeled column to get the testing set.
|
||||||
X_test = test_dataset.drop_columns(columns=['<<target_column_name>>'])
|
X_test = test_dataset.drop_columns(columns=['<<target_column_name>>'])
|
||||||
|
|
||||||
# Setup the class for explaining the AtuoML models
|
# Setup the class for explaining the AutoML models
|
||||||
automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, '<<task>>',
|
automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, '<<task>>',
|
||||||
X=X_train, X_test=X_test,
|
X=X_train, X_test=X_test,
|
||||||
y=y_train)
|
y=y_train)
|
||||||
|
|||||||
@@ -92,7 +92,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -81,12 +81,12 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||||
"from azureml.core.datastore import Datastore\n",
|
"from azureml.core import Datastore, Dataset\n",
|
||||||
"from azureml.data.data_reference import DataReference\n",
|
"from azureml.pipeline.core import Pipeline\n",
|
||||||
"from azureml.pipeline.core import Pipeline, PipelineData\n",
|
|
||||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||||
"from azureml.core.runconfig import CondaDependencies, RunConfiguration\n",
|
"from azureml.core.runconfig import CondaDependencies, RunConfiguration\n",
|
||||||
"from azureml.core.compute_target import ComputeTargetException"
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"from azureml.data import OutputFileDatasetConfig"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -297,9 +297,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"video_name=os.getenv(\"STYLE_TRANSFER_VIDEO_NAME\", \"orangutan.mp4\") \n",
|
"video_name=os.getenv(\"STYLE_TRANSFER_VIDEO_NAME\", \"orangutan.mp4\") \n",
|
||||||
"orangutan_video = DataReference(datastore=video_ds,\n",
|
"orangutan_video = Dataset.File.from_files((video_ds,video_name))"
|
||||||
" data_reference_name=\"video\",\n",
|
|
||||||
" path_on_datastore=video_name, mode=\"download\")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -325,13 +323,11 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"ffmpeg_audio = PipelineData(name=\"ffmpeg_audio\", datastore=default_datastore)\n",
|
"ffmpeg_audio = OutputFileDatasetConfig(name=\"ffmpeg_audio\")\n",
|
||||||
"processed_images = PipelineData(name=\"processed_images\", datastore=default_datastore)\n",
|
"processed_images = OutputFileDatasetConfig(name=\"processed_images\")\n",
|
||||||
"output_video = PipelineData(name=\"output_video\", datastore=default_datastore)\n",
|
"output_video = OutputFileDatasetConfig(name=\"output_video\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ffmpeg_images_ds_name = \"ffmpeg_images_data\"\n",
|
"ffmpeg_images = OutputFileDatasetConfig(name=\"ffmpeg_images\")"
|
||||||
"ffmpeg_images = PipelineData(name=\"ffmpeg_images\", datastore=default_datastore)\n",
|
|
||||||
"ffmpeg_images_file_dataset = ffmpeg_images.as_dataset()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -367,13 +363,10 @@
|
|||||||
"split_video_step = PythonScriptStep(\n",
|
"split_video_step = PythonScriptStep(\n",
|
||||||
" name=\"split video\",\n",
|
" name=\"split video\",\n",
|
||||||
" script_name=\"process_video.py\",\n",
|
" script_name=\"process_video.py\",\n",
|
||||||
" arguments=[\"--input_video\", orangutan_video,\n",
|
" arguments=[\"--input_video\", orangutan_video.as_mount(),\n",
|
||||||
" \"--output_audio\", ffmpeg_audio,\n",
|
" \"--output_audio\", ffmpeg_audio,\n",
|
||||||
" \"--output_images\", ffmpeg_images_file_dataset,\n",
|
" \"--output_images\", ffmpeg_images],\n",
|
||||||
" ],\n",
|
|
||||||
" compute_target=cpu_cluster,\n",
|
" compute_target=cpu_cluster,\n",
|
||||||
" inputs=[orangutan_video],\n",
|
|
||||||
" outputs=[ffmpeg_images_file_dataset, ffmpeg_audio],\n",
|
|
||||||
" runconfig=amlcompute_run_config,\n",
|
" runconfig=amlcompute_run_config,\n",
|
||||||
" source_directory=scripts_folder\n",
|
" source_directory=scripts_folder\n",
|
||||||
")\n",
|
")\n",
|
||||||
@@ -381,12 +374,10 @@
|
|||||||
"stitch_video_step = PythonScriptStep(\n",
|
"stitch_video_step = PythonScriptStep(\n",
|
||||||
" name=\"stitch\",\n",
|
" name=\"stitch\",\n",
|
||||||
" script_name=\"stitch_video.py\",\n",
|
" script_name=\"stitch_video.py\",\n",
|
||||||
" arguments=[\"--images_dir\", processed_images, \n",
|
" arguments=[\"--images_dir\", processed_images.as_input(), \n",
|
||||||
" \"--input_audio\", ffmpeg_audio, \n",
|
" \"--input_audio\", ffmpeg_audio.as_input(), \n",
|
||||||
" \"--output_dir\", output_video],\n",
|
" \"--output_dir\", output_video],\n",
|
||||||
" compute_target=cpu_cluster,\n",
|
" compute_target=cpu_cluster,\n",
|
||||||
" inputs=[processed_images, ffmpeg_audio],\n",
|
|
||||||
" outputs=[output_video],\n",
|
|
||||||
" runconfig=amlcompute_run_config,\n",
|
" runconfig=amlcompute_run_config,\n",
|
||||||
" source_directory=scripts_folder\n",
|
" source_directory=scripts_folder\n",
|
||||||
")"
|
")"
|
||||||
@@ -415,7 +406,6 @@
|
|||||||
"parallel_cd.add_conda_package(\"torchvision\")\n",
|
"parallel_cd.add_conda_package(\"torchvision\")\n",
|
||||||
"parallel_cd.add_conda_package(\"pillow<7\") # needed for torchvision==0.4.0\n",
|
"parallel_cd.add_conda_package(\"pillow<7\") # needed for torchvision==0.4.0\n",
|
||||||
"parallel_cd.add_pip_package(\"azureml-core\")\n",
|
"parallel_cd.add_pip_package(\"azureml-core\")\n",
|
||||||
"parallel_cd.add_pip_package(\"azureml-dataset-runtime[fuse]\")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"styleenvironment = Environment(name=\"styleenvironment\")\n",
|
"styleenvironment = Environment(name=\"styleenvironment\")\n",
|
||||||
"styleenvironment.python.conda_dependencies=parallel_cd\n",
|
"styleenvironment.python.conda_dependencies=parallel_cd\n",
|
||||||
@@ -457,7 +447,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"distributed_style_transfer_step = ParallelRunStep(\n",
|
"distributed_style_transfer_step = ParallelRunStep(\n",
|
||||||
" name=parallel_step_name,\n",
|
" name=parallel_step_name,\n",
|
||||||
" inputs=[ffmpeg_images_file_dataset], # Input file share/blob container/file dataset\n",
|
" inputs=[ffmpeg_images], # Input file share/blob container/file dataset\n",
|
||||||
" output=processed_images, # Output file share/blob container\n",
|
" output=processed_images, # Output file share/blob container\n",
|
||||||
" arguments=[\"--style\", style_param],\n",
|
" arguments=[\"--style\", style_param],\n",
|
||||||
" parallel_run_config=parallel_run_config,\n",
|
" parallel_run_config=parallel_run_config,\n",
|
||||||
@@ -552,8 +542,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"def download_video(run, target_dir=None):\n",
|
"def download_video(run, target_dir=None):\n",
|
||||||
" stitch_run = run.find_step_run(stitch_video_step.name)[0]\n",
|
" stitch_run = run.find_step_run(stitch_video_step.name)[0]\n",
|
||||||
" port_data = stitch_run.get_output_data(output_video.name)\n",
|
" port_data = stitch_run.get_details()['outputDatasets'][0]['dataset']\n",
|
||||||
" port_data.download(target_dir, show_progress=True)"
|
" port_data.download(target_dir)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from azureml.core import Run
|
|||||||
def on_train_result(info):
|
def on_train_result(info):
|
||||||
'''Callback on train result to record metrics returned by trainer.
|
'''Callback on train result to record metrics returned by trainer.
|
||||||
'''
|
'''
|
||||||
run = Run.get_context()
|
run = Run.get_context().parent
|
||||||
run.log(
|
run.log(
|
||||||
name='episode_reward_mean',
|
name='episode_reward_mean',
|
||||||
value=info["result"]["episode_reward_mean"])
|
value=info["result"]["episode_reward_mean"])
|
||||||
|
|||||||
@@ -423,9 +423,6 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.contrib.train.rl import WorkerConfiguration\n",
|
"from azureml.contrib.train.rl import WorkerConfiguration\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Pip packages we will use for both head and worker\n",
|
|
||||||
"pip_packages=[\"ray[rllib]==0.8.3\"] # Latest version of Ray has fixes for isses related to object transfers\n",
|
|
||||||
"\n",
|
|
||||||
"# Specify the Ray worker configuration\n",
|
"# Specify the Ray worker configuration\n",
|
||||||
"worker_conf = WorkerConfiguration(\n",
|
"worker_conf = WorkerConfiguration(\n",
|
||||||
" \n",
|
" \n",
|
||||||
@@ -439,7 +436,6 @@
|
|||||||
" use_gpu=False, \n",
|
" use_gpu=False, \n",
|
||||||
" \n",
|
" \n",
|
||||||
" # PIP packages to use\n",
|
" # PIP packages to use\n",
|
||||||
" pip_packages=pip_packages\n",
|
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -508,14 +504,11 @@
|
|||||||
" # The Azure Machine Learning compute target set up for Ray head nodes\n",
|
" # The Azure Machine Learning compute target set up for Ray head nodes\n",
|
||||||
" compute_target=head_compute_target,\n",
|
" compute_target=head_compute_target,\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Pip packages\n",
|
|
||||||
" pip_packages=pip_packages,\n",
|
|
||||||
" \n",
|
|
||||||
" # GPU usage\n",
|
" # GPU usage\n",
|
||||||
" use_gpu=True,\n",
|
" use_gpu=True,\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Reinforcement learning framework. Currently must be Ray.\n",
|
" # Reinforcement learning framework. Currently must be Ray.\n",
|
||||||
" rl_framework=Ray(),\n",
|
" rl_framework=Ray('0.8.3'),\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Ray worker configuration defined above.\n",
|
" # Ray worker configuration defined above.\n",
|
||||||
" worker_configuration=worker_conf,\n",
|
" worker_configuration=worker_conf,\n",
|
||||||
@@ -651,14 +644,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Get all child runs\n",
|
|
||||||
"child_runs = list(run.get_children(_rehydrate_runs=False))\n",
|
|
||||||
"\n",
|
|
||||||
"# Get the reward metrics from worker run\n",
|
"# Get the reward metrics from worker run\n",
|
||||||
"if child_runs[0].id.endswith(\"_worker\"):\n",
|
"episode_reward_mean = run.get_metrics(name='episode_reward_mean')"
|
||||||
" episode_reward_mean = child_runs[0].get_metrics(name='episode_reward_mean')\n",
|
|
||||||
"else:\n",
|
|
||||||
" episode_reward_mean = child_runs[1].get_metrics(name='episode_reward_mean')"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from azureml.core import Run
|
|||||||
def on_train_result(info):
|
def on_train_result(info):
|
||||||
'''Callback on train result to record metrics returned by trainer.
|
'''Callback on train result to record metrics returned by trainer.
|
||||||
'''
|
'''
|
||||||
run = Run.get_context()
|
run = Run.get_context().parent
|
||||||
run.log(
|
run.log(
|
||||||
name='episode_reward_mean',
|
name='episode_reward_mean',
|
||||||
value=info["result"]["episode_reward_mean"])
|
value=info["result"]["episode_reward_mean"])
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from azureml.core import Run
|
|||||||
def on_train_result(info):
|
def on_train_result(info):
|
||||||
'''Callback on train result to record metrics returned by trainer.
|
'''Callback on train result to record metrics returned by trainer.
|
||||||
'''
|
'''
|
||||||
run = Run.get_context()
|
run = Run.get_context().parent
|
||||||
run.log(
|
run.log(
|
||||||
name='episode_reward_mean',
|
name='episode_reward_mean',
|
||||||
value=info["result"]["episode_reward_mean"])
|
value=info["result"]["episode_reward_mean"])
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ dependencies:
|
|||||||
- azureml-sdk
|
- azureml-sdk
|
||||||
- azureml-interpret
|
- azureml-interpret
|
||||||
- azureml-contrib-fairness
|
- azureml-contrib-fairness
|
||||||
- interpret-community[visualization]
|
|
||||||
- fairlearn==0.4.6
|
- fairlearn==0.4.6
|
||||||
- matplotlib
|
- matplotlib
|
||||||
- azureml-dataset-runtime
|
- azureml-dataset-runtime
|
||||||
|
|||||||
@@ -100,7 +100,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Check core SDK version number\n",
|
"# Check core SDK version number\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"This notebook was created using SDK version 1.24.0, you are currently running version\", azureml.core.VERSION)"
|
"print(\"This notebook was created using SDK version 1.25.0, you are currently running version\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -179,12 +179,14 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
|
"from azureml.core.runconfig import DockerConfiguration\n",
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = Environment(\"myenv\")\n",
|
"myenv = Environment(\"myenv\")\n",
|
||||||
|
"myenv.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn', 'packaging'])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv.docker.enabled = True\n",
|
"# Enable Docker\n",
|
||||||
"myenv.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn', 'packaging'])"
|
"docker_config = DockerConfiguration(use_docker=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -245,7 +247,8 @@
|
|||||||
"src = ScriptRunConfig(source_directory=project_folder, \n",
|
"src = ScriptRunConfig(source_directory=project_folder, \n",
|
||||||
" script='train.py', \n",
|
" script='train.py', \n",
|
||||||
" compute_target=cpu_cluster, \n",
|
" compute_target=cpu_cluster, \n",
|
||||||
" environment=myenv)\n",
|
" environment=myenv,\n",
|
||||||
|
" docker_runtime_config=docker_config)\n",
|
||||||
" \n",
|
" \n",
|
||||||
"run = experiment.submit(config=src)\n",
|
"run = experiment.submit(config=src)\n",
|
||||||
"run"
|
"run"
|
||||||
|
|||||||
@@ -1,402 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Introduction to labeled datasets\n",
|
|
||||||
"\n",
|
|
||||||
"Labeled datasets are output from Azure Machine Learning [labeling projects](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-create-labeling-projects). It captures the reference to the data (e.g. image files) and its labels. \n",
|
|
||||||
"\n",
|
|
||||||
"This tutorial introduces the capabilities of labeled datasets and how to use it in training.\n",
|
|
||||||
"\n",
|
|
||||||
"Learn how-to:\n",
|
|
||||||
"\n",
|
|
||||||
"> * Set up your development environment\n",
|
|
||||||
"> * Explore labeled datasets\n",
|
|
||||||
"> * Train a simple deep learning neural network on a remote cluster\n",
|
|
||||||
"\n",
|
|
||||||
"## Prerequisite:\n",
|
|
||||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
|
||||||
"* Go through Azure Machine Learning [labeling projects](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-labeling-projects) and export the labels as an Azure Machine Learning dataset\n",
|
|
||||||
"* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
|
||||||
" * install the latest version of azureml-sdk\n",
|
|
||||||
" * install the latest version of azureml-contrib-dataset\n",
|
|
||||||
" * install [PyTorch](https://pytorch.org/)\n",
|
|
||||||
" * create a workspace and its configuration file (`config.json`)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Set up your development environment\n",
|
|
||||||
"\n",
|
|
||||||
"All the setup for your development work can be accomplished in a Python notebook. Setup includes:\n",
|
|
||||||
"\n",
|
|
||||||
"* Importing Python packages\n",
|
|
||||||
"* Connecting to a workspace to enable communication between your local computer and remote resources\n",
|
|
||||||
"* Creating an experiment to track all your runs\n",
|
|
||||||
"* Creating a remote compute target to use for training\n",
|
|
||||||
"\n",
|
|
||||||
"### Import packages\n",
|
|
||||||
"\n",
|
|
||||||
"Import Python packages you need in this session. Also display the Azure Machine Learning SDK version."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"import azureml.contrib.dataset\n",
|
|
||||||
"from azureml.core import Dataset, Workspace, Experiment\n",
|
|
||||||
"from azureml.contrib.dataset import FileHandlingOption\n",
|
|
||||||
"\n",
|
|
||||||
"# check core SDK version number\n",
|
|
||||||
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)\n",
|
|
||||||
"print(\"Azure ML Contrib Version\", azureml.contrib.dataset.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Connect to workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Create a workspace object from the existing workspace. `Workspace.from_config()` reads the file **config.json** and loads the details into an object named `workspace`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# load workspace\n",
|
|
||||||
"workspace = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + workspace.name, \n",
|
|
||||||
" 'Azure region: ' + workspace.location, \n",
|
|
||||||
" 'Subscription id: ' + workspace.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + workspace.resource_group, sep='\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create experiment and a directory\n",
|
|
||||||
"\n",
|
|
||||||
"Create an experiment to track the runs in your workspace and a directory to deliver the necessary code from your computer to the remote resource."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# create an ML experiment\n",
|
|
||||||
"exp = Experiment(workspace=workspace, name='labeled-datasets')\n",
|
|
||||||
"\n",
|
|
||||||
"# create a directory\n",
|
|
||||||
"script_folder = './labeled-datasets'\n",
|
|
||||||
"os.makedirs(script_folder, exist_ok=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create or Attach existing compute resource\n",
|
|
||||||
"By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you will create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n",
|
|
||||||
"\n",
|
|
||||||
"**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
|
||||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
|
||||||
"\n",
|
|
||||||
"# choose a name for your cluster\n",
|
|
||||||
"cluster_name = \"openhack\"\n",
|
|
||||||
"\n",
|
|
||||||
"try:\n",
|
|
||||||
" compute_target = ComputeTarget(workspace=workspace, name=cluster_name)\n",
|
|
||||||
" print('Found existing compute target')\n",
|
|
||||||
"except ComputeTargetException:\n",
|
|
||||||
" print('Creating a new compute target...')\n",
|
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
|
||||||
" max_nodes=4)\n",
|
|
||||||
"\n",
|
|
||||||
" # create the cluster\n",
|
|
||||||
" compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)\n",
|
|
||||||
"\n",
|
|
||||||
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
|
||||||
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
|
||||||
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
|
||||||
"\n",
|
|
||||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
|
||||||
"print(compute_target.get_status().serialize())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explore labeled datasets\n",
|
|
||||||
"\n",
|
|
||||||
"**Note**: How to create labeled datasets is not covered in this tutorial. To create labeled datasets, you can go through [labeling projects](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-labeling-projects) and export the output labels as Azure Machine Lerning datasets. \n",
|
|
||||||
"\n",
|
|
||||||
"`animal_labels` used in this tutorial section is the output from a labeling project, with the task type of \"Object Identification\"."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get animal_labels dataset from the workspace\n",
|
|
||||||
"animal_labels = Dataset.get_by_name(workspace, 'animal_labels')\n",
|
|
||||||
"animal_labels"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can load labeled datasets into pandas DataFrame. There are 3 file handling option that you can choose to load the data files referenced by the labeled datasets:\n",
|
|
||||||
"* Streaming: The default option to load data files.\n",
|
|
||||||
"* Download: Download your data files to a local path.\n",
|
|
||||||
"* Mount: Mount your data files to a mount point. Mount only works for Linux-based compute, including Azure Machine Learning notebook VM and Azure Machine Learning Compute."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"animal_pd = animal_labels.to_pandas_dataframe(file_handling_option=FileHandlingOption.DOWNLOAD, target_path='./download/', overwrite_download=True)\n",
|
|
||||||
"animal_pd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"import matplotlib.image as mpimg\n",
|
|
||||||
"\n",
|
|
||||||
"# read images from downloaded path\n",
|
|
||||||
"img = mpimg.imread(animal_pd.loc[0,'image_url'])\n",
|
|
||||||
"imgplot = plt.imshow(img)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can also load labeled datasets into [torchvision datasets](https://pytorch.org/docs/stable/torchvision/datasets.html), so that you can leverage on the open source libraries provided by PyTorch for image transformation and training."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from torchvision.transforms import functional as F\n",
|
|
||||||
"\n",
|
|
||||||
"# load animal_labels dataset into torchvision dataset\n",
|
|
||||||
"pytorch_dataset = animal_labels.to_torchvision()\n",
|
|
||||||
"img = pytorch_dataset[0][0]\n",
|
|
||||||
"print(type(img))\n",
|
|
||||||
"\n",
|
|
||||||
"# use methods from torchvision to transform the img into grayscale\n",
|
|
||||||
"pil_image = F.to_pil_image(img)\n",
|
|
||||||
"gray_image = F.to_grayscale(pil_image, num_output_channels=3)\n",
|
|
||||||
"\n",
|
|
||||||
"imgplot = plt.imshow(gray_image)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train an image classification model\n",
|
|
||||||
"\n",
|
|
||||||
" `crack_labels` dataset used in this tutorial section is the output from a labeling project, with the task type of \"Image Classification Multi-class\". We will use this dataset to train an image classification model that classify whether an image has cracks or not."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get crack_labels dataset from the workspace\n",
|
|
||||||
"crack_labels = Dataset.get_by_name(workspace, 'crack_labels')\n",
|
|
||||||
"crack_labels"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Configure training job\n",
|
|
||||||
"\n",
|
|
||||||
"You can ask the system to build a conda environment based on your dependency specification. Once the environment is built, and if you don't change your dependencies, it will be reused in subsequent runs."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Environment\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"conda_env = Environment('conda-env')\n",
|
|
||||||
"conda_env.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk',\n",
|
|
||||||
" 'azureml-contrib-dataset',\n",
|
|
||||||
" 'torch','torchvision',\n",
|
|
||||||
" 'azureml-dataset-runtime[pandas]'])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"A ScriptRunConfig object is used to submit the run. Create a ScriptRunConfig by specifying\n",
|
|
||||||
"\n",
|
|
||||||
"* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n",
|
|
||||||
"* The training script name, train.py\n",
|
|
||||||
"* The input dataset for training\n",
|
|
||||||
"* The compute target. In this case you will use the AmlCompute you created\n",
|
|
||||||
"* The environment for the experiment"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
|
||||||
"\n",
|
|
||||||
"src = ScriptRunConfig(source_directory=script_folder,\n",
|
|
||||||
" script='train.py',\n",
|
|
||||||
" arguments=[crack_labels.as_named_input('crack_labels')],\n",
|
|
||||||
" compute_target=compute_target,\n",
|
|
||||||
" enviroment=conda_env)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit job to run\n",
|
|
||||||
"\n",
|
|
||||||
"Submit the ScriptRunConfig to the Azure ML experiment to kick off the execution."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run = exp.submit(src)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "sihhu"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"category": "tutorial",
|
|
||||||
"compute": [
|
|
||||||
"Remote"
|
|
||||||
],
|
|
||||||
"deployment": [
|
|
||||||
"None"
|
|
||||||
],
|
|
||||||
"exclude_from_index": false,
|
|
||||||
"framework": [
|
|
||||||
"Azure ML"
|
|
||||||
],
|
|
||||||
"friendly_name": "Introduction to labeled datasets",
|
|
||||||
"index_order": 1,
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.9"
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"version": "nteract-front-end@1.0.0"
|
|
||||||
},
|
|
||||||
"star_tag": [
|
|
||||||
"featured"
|
|
||||||
],
|
|
||||||
"tags": [
|
|
||||||
"Dataset",
|
|
||||||
"label",
|
|
||||||
"Estimator"
|
|
||||||
],
|
|
||||||
"task": "Train"
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
import os
|
|
||||||
import torchvision
|
|
||||||
import torchvision.transforms as transforms
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
import torch.optim as optim
|
|
||||||
|
|
||||||
from azureml.core import Dataset, Run
|
|
||||||
import azureml.contrib.dataset
|
|
||||||
from azureml.contrib.dataset import FileHandlingOption, LabeledDatasetTask
|
|
||||||
|
|
||||||
run = Run.get_context()
|
|
||||||
|
|
||||||
# get input dataset by name
|
|
||||||
labeled_dataset = run.input_datasets['crack_labels']
|
|
||||||
pytorch_dataset = labeled_dataset.to_torchvision()
|
|
||||||
|
|
||||||
|
|
||||||
indices = torch.randperm(len(pytorch_dataset)).tolist()
|
|
||||||
dataset_train = torch.utils.data.Subset(pytorch_dataset, indices[:40])
|
|
||||||
dataset_test = torch.utils.data.Subset(pytorch_dataset, indices[-10:])
|
|
||||||
|
|
||||||
trainloader = torch.utils.data.DataLoader(dataset_train, batch_size=4,
|
|
||||||
shuffle=True, num_workers=0)
|
|
||||||
|
|
||||||
testloader = torch.utils.data.DataLoader(dataset_test, batch_size=4,
|
|
||||||
shuffle=True, num_workers=0)
|
|
||||||
|
|
||||||
|
|
||||||
class Net(nn.Module):
|
|
||||||
def __init__(self):
|
|
||||||
super(Net, self).__init__()
|
|
||||||
self.conv1 = nn.Conv2d(3, 6, 5)
|
|
||||||
self.pool = nn.MaxPool2d(2, 2)
|
|
||||||
self.conv2 = nn.Conv2d(6, 16, 5)
|
|
||||||
self.fc1 = nn.Linear(16 * 71 * 71, 120)
|
|
||||||
self.fc2 = nn.Linear(120, 84)
|
|
||||||
self.fc3 = nn.Linear(84, 10)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
x = self.pool(F.relu(self.conv1(x)))
|
|
||||||
x = self.pool(F.relu(self.conv2(x)))
|
|
||||||
x = x.view(x.size(0), 16 * 71 * 71)
|
|
||||||
x = F.relu(self.fc1(x))
|
|
||||||
x = F.relu(self.fc2(x))
|
|
||||||
x = self.fc3(x)
|
|
||||||
return x
|
|
||||||
|
|
||||||
|
|
||||||
net = Net()
|
|
||||||
|
|
||||||
criterion = nn.CrossEntropyLoss()
|
|
||||||
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
|
|
||||||
|
|
||||||
|
|
||||||
for epoch in range(2): # loop over the dataset multiple times
|
|
||||||
|
|
||||||
running_loss = 0.0
|
|
||||||
for i, data in enumerate(trainloader, 0):
|
|
||||||
# get the inputs; data is a list of [inputs, labels]
|
|
||||||
inputs, labels = data
|
|
||||||
|
|
||||||
# zero the parameter gradients
|
|
||||||
optimizer.zero_grad()
|
|
||||||
|
|
||||||
# forward + backward + optimize
|
|
||||||
outputs = net(inputs)
|
|
||||||
loss = criterion(outputs, labels)
|
|
||||||
loss.backward()
|
|
||||||
optimizer.step()
|
|
||||||
|
|
||||||
# print statistics
|
|
||||||
running_loss += loss.item()
|
|
||||||
if i % 5 == 4: # print every 5 mini-batches
|
|
||||||
print('[%d, %5d] loss: %.3f' %
|
|
||||||
(epoch + 1, i + 1, running_loss / 5))
|
|
||||||
running_loss = 0.0
|
|
||||||
|
|
||||||
print('Finished Training')
|
|
||||||
classes = trainloader.dataset.dataset.labels
|
|
||||||
PATH = './cifar_net.pth'
|
|
||||||
torch.save(net.state_dict(), PATH)
|
|
||||||
|
|
||||||
dataiter = iter(testloader)
|
|
||||||
images, labels = dataiter.next()
|
|
||||||
|
|
||||||
net = Net()
|
|
||||||
net.load_state_dict(torch.load(PATH))
|
|
||||||
|
|
||||||
outputs = net(images)
|
|
||||||
|
|
||||||
_, predicted = torch.max(outputs, 1)
|
|
||||||
|
|
||||||
correct = 0
|
|
||||||
total = 0
|
|
||||||
with torch.no_grad():
|
|
||||||
for data in testloader:
|
|
||||||
images, labels = data
|
|
||||||
outputs = net(images)
|
|
||||||
_, predicted = torch.max(outputs.data, 1)
|
|
||||||
total += labels.size(0)
|
|
||||||
correct += (predicted == labels).sum().item()
|
|
||||||
|
|
||||||
print('Accuracy of the network on the 10 test images: %d %%' % (100 * correct / total))
|
|
||||||
pass
|
|
||||||
6
index.md
6
index.md
@@ -19,7 +19,6 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
|||||||
| [Forecasting orange juice sales with deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb) | Forecasting | Orange Juice Sales | Remote | Azure Container Instance | Azure ML AutoML | None |
|
| [Forecasting orange juice sales with deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb) | Forecasting | Orange Juice Sales | Remote | Azure Container Instance | Azure ML AutoML | None |
|
||||||
| [Register a model and deploy locally](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local.ipynb) | Deployment | None | Local | Local | None | None |
|
| [Register a model and deploy locally](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local.ipynb) | Deployment | None | Local | Local | None | None |
|
||||||
| :star:[Data drift quickdemo](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datadrift-tutorial/datadrift-tutorial.ipynb) | Filtering | NOAA | Remote | None | Azure ML | Dataset, Timeseries, Drift |
|
| :star:[Data drift quickdemo](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datadrift-tutorial/datadrift-tutorial.ipynb) | Filtering | NOAA | Remote | None | Azure ML | Dataset, Timeseries, Drift |
|
||||||
| :star:[Introduction to labeled datasets](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/labeled-datasets/labeled-datasets.ipynb) | Train | | Remote | None | Azure ML | Dataset, label, Estimator |
|
|
||||||
| :star:[Datasets with ML Pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/pipeline-for-image-classification.ipynb) | Train | Fashion MNIST | Remote | None | Azure ML | Dataset, Pipeline, Estimator, ScriptRun |
|
| :star:[Datasets with ML Pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/pipeline-for-image-classification.ipynb) | Train | Fashion MNIST | Remote | None | Azure ML | Dataset, Pipeline, Estimator, ScriptRun |
|
||||||
| :star:[Filtering data using Tabular Timeseiries Dataset related API](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/timeseries-datasets/tabular-timeseries-dataset-filtering.ipynb) | Filtering | NOAA | Local | None | Azure ML | Dataset, Tabular Timeseries |
|
| :star:[Filtering data using Tabular Timeseiries Dataset related API](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/timeseries-datasets/tabular-timeseries-dataset-filtering.ipynb) | Filtering | NOAA | Local | None | Azure ML | Dataset, Tabular Timeseries |
|
||||||
| :star:[Train with Datasets (Tabular and File)](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb) | Train | Iris, Diabetes | Remote | None | Azure ML | Dataset, Estimator, ScriptRun |
|
| :star:[Train with Datasets (Tabular and File)](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb) | Train | Iris, Diabetes | Remote | None | Azure ML | Dataset, Estimator, ScriptRun |
|
||||||
@@ -110,6 +109,8 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
|||||||
| [auto-ml-regression](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb) | | | | | | |
|
| [auto-ml-regression](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb) | | | | | | |
|
||||||
| [automl-databricks-local-01](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb) | | | | | | |
|
| [automl-databricks-local-01](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb) | | | | | | |
|
||||||
| [automl-databricks-local-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb) | | | | | | |
|
| [automl-databricks-local-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb) | | | | | | |
|
||||||
|
| [spark_job_on_synapse_spark_pool](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/spark_job_on_synapse_spark_pool.ipynb) | | | | | | |
|
||||||
|
| [spark_session_on_synapse_spark_pool](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/spark_session_on_synapse_spark_pool.ipynb) | | | | | | |
|
||||||
| [multi-model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-multi-model/multi-model-register-and-deploy.ipynb) | | | | | | |
|
| [multi-model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-multi-model/multi-model-register-and-deploy.ipynb) | | | | | | |
|
||||||
| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | |
|
| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | |
|
||||||
| [enable-app-insights-in-production-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) | | | | | | |
|
| [enable-app-insights-in-production-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) | | | | | | |
|
||||||
@@ -141,4 +142,7 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
|||||||
| [img-classification-part3-deploy-encrypted](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/image-classification-mnist-data/img-classification-part3-deploy-encrypted.ipynb) | | | | | | |
|
| [img-classification-part3-deploy-encrypted](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/image-classification-mnist-data/img-classification-part3-deploy-encrypted.ipynb) | | | | | | |
|
||||||
| [tutorial-pipeline-batch-scoring-classification](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/machine-learning-pipelines-advanced/tutorial-pipeline-batch-scoring-classification.ipynb) | | | | | | |
|
| [tutorial-pipeline-batch-scoring-classification](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/machine-learning-pipelines-advanced/tutorial-pipeline-batch-scoring-classification.ipynb) | | | | | | |
|
||||||
| [azureml-quickstart](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/quickstart/azureml-quickstart.ipynb) | | | | | | |
|
| [azureml-quickstart](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/quickstart/azureml-quickstart.ipynb) | | | | | | |
|
||||||
|
| [AzureMLIn10mins](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/quickstart-ci/AzureMLIn10mins.ipynb) | | | | | | |
|
||||||
|
| [ClassificationWithAutomatedML](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/quickstart-ci/ClassificationWithAutomatedML.ipynb) | | | | | | |
|
||||||
|
| [GettingStartedWithPythonSDK](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/quickstart-ci/GettingStartedWithPythonSDK.ipynb) | | | | | | |
|
||||||
| [regression-automated-ml](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/regression-automl-nyc-taxi-data/regression-automated-ml.ipynb) | | | | | | |
|
| [regression-automated-ml](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/regression-automl-nyc-taxi-data/regression-automated-ml.ipynb) | | | | | | |
|
||||||
|
|||||||
@@ -102,7 +102,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -23,6 +23,10 @@ The following tutorials are intended to provide an introductory overview of Azur
|
|||||||
| [Deploy an image classification model](https://docs.microsoft.com/azure/machine-learning/tutorial-deploy-models-with-aml) | Deploy a scikit-learn image classification model to Azure Container Instances. | [img-classification-part2-deploy.ipynb](image-classification-mnist-data/img-classification-part2-deploy.ipynb) | Image Classification | Scikit-Learn
|
| [Deploy an image classification model](https://docs.microsoft.com/azure/machine-learning/tutorial-deploy-models-with-aml) | Deploy a scikit-learn image classification model to Azure Container Instances. | [img-classification-part2-deploy.ipynb](image-classification-mnist-data/img-classification-part2-deploy.ipynb) | Image Classification | Scikit-Learn
|
||||||
| [Deploy an encrypted inferencing service](https://docs.microsoft.com/azure/machine-learning/tutorial-deploy-models-with-aml) |Deploy an image classification model for encrypted inferencing in Azure Container Instances | [img-classification-part3-deploy-encrypted.ipynb](image-classification-mnist-data/img-classification-part3-deploy-encrypted.ipynb) | Image Classification | Scikit-Learn
|
| [Deploy an encrypted inferencing service](https://docs.microsoft.com/azure/machine-learning/tutorial-deploy-models-with-aml) |Deploy an image classification model for encrypted inferencing in Azure Container Instances | [img-classification-part3-deploy-encrypted.ipynb](image-classification-mnist-data/img-classification-part3-deploy-encrypted.ipynb) | Image Classification | Scikit-Learn
|
||||||
| [Use automated machine learning to predict taxi fares](https://docs.microsoft.com/azure/machine-learning/tutorial-auto-train-models) | Train a regression model to predict taxi fares using Automated Machine Learning. | [regression-part2-automated-ml.ipynb](regression-automl-nyc-taxi-data/regression-automated-ml.ipynb) | Regression | Automated ML
|
| [Use automated machine learning to predict taxi fares](https://docs.microsoft.com/azure/machine-learning/tutorial-auto-train-models) | Train a regression model to predict taxi fares using Automated Machine Learning. | [regression-part2-automated-ml.ipynb](regression-automl-nyc-taxi-data/regression-automated-ml.ipynb) | Regression | Automated ML
|
||||||
|
| Azure ML in 10 minutes, to be run on a Compute Instance |Learn how to run an image classification model, track model metrics, and deploy a model in 10 minutes. | [AzureMLIn10mins.ipynb](quickstart-ci/AzureMLIn10mins.ipynb) | Image Classification | Scikit-Learn |
|
||||||
|
| Get started with Azure ML Job Submission, to be run on a Compute Instance |Learn how to use the Azure Machine Learning Python SDK to submit batch jobs. | [GettingStartedWithPythonSDK.ipynb](quickstart-ci/GettingStartedWithPythonSDK.ipynb) | Image Classification | Scikit-Learn |
|
||||||
|
| Get started with Automated ML, to be run on a Compute Instance | Learn how to use Automated ML for Fraud classification. | [ClassificationWithAutomatedML.ipynb](quickstart-ci/ClassificationWithAutomatedML.ipynb) | Classification | Automated ML |
|
||||||
|
|
||||||
|
|
||||||
## Advanced Samples
|
## Advanced Samples
|
||||||
|
|
||||||
|
|||||||
@@ -337,7 +337,7 @@
|
|||||||
" error_threshold=1,\n",
|
" error_threshold=1,\n",
|
||||||
" compute_target=compute_target,\n",
|
" compute_target=compute_target,\n",
|
||||||
" process_count_per_node=2,\n",
|
" process_count_per_node=2,\n",
|
||||||
" node_count=1\n",
|
" node_count=2\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -367,10 +367,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.pipeline.steps import ParallelRunStep\n",
|
"from azureml.pipeline.steps import ParallelRunStep\n",
|
||||||
"from datetime import datetime\n",
|
"from datetime import datetime\n",
|
||||||
|
"import uuid\n",
|
||||||
"\n",
|
"\n",
|
||||||
"parallel_step_name = \"batchscoring-\" + datetime.now().strftime(\"%Y%m%d%H%M\")\n",
|
"parallel_step_name = \"batchscoring-\" + datetime.now().strftime(\"%Y%m%d%H%M\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"label_config = label_ds.as_named_input(\"labels_input\")\n",
|
"label_config = label_ds.as_named_input(\"labels_input\").as_mount(\"/tmp/{}\".format(str(uuid.uuid4())))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"batch_score_step = ParallelRunStep(\n",
|
"batch_score_step = ParallelRunStep(\n",
|
||||||
" name=parallel_step_name,\n",
|
" name=parallel_step_name,\n",
|
||||||
|
|||||||
669
tutorials/quickstart-ci/AzureMLIn10mins.ipynb
Normal file
669
tutorials/quickstart-ci/AzureMLIn10mins.ipynb
Normal file
@@ -0,0 +1,669 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Quickstart: Train and deploy a model in Azure Machine Learning in 10 minutes\n",
|
||||||
|
"\n",
|
||||||
|
"In this quickstart, learn how to get started with Azure Machine Learning. You'll train an image classification model using the [MNIST](https://azure.microsoft.com/services/open-datasets/catalog/mnist/) dataset.\n",
|
||||||
|
"\n",
|
||||||
|
"You'll learn how to:\n",
|
||||||
|
"\n",
|
||||||
|
"> * Download a dataset and look at the data\n",
|
||||||
|
"> * Train an image classification model and log metrics\n",
|
||||||
|
"> * Deploy the model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Connect to your workspace and create an experiment\n",
|
||||||
|
"\n",
|
||||||
|
"Import some libraries and create an experiment to track the runs in your workspace. A workspace can have multiple experiments, and all users that have access to the workspace can collaborate on them."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965916889
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"# connect to your workspace\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"\n",
|
||||||
|
"# create experiment and start logging to a new run in the experiment\n",
|
||||||
|
"experiment_name = \"azure-ml-in10-mins-tutorial\"\n",
|
||||||
|
"exp = Experiment(workspace=ws, name=experiment_name)\n",
|
||||||
|
"run = exp.start_logging(snapshot_directory=None)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Import Data\n",
|
||||||
|
"\n",
|
||||||
|
"Before you train a model, you need to understand the data you're using to train it. In this section, learn how to:\n",
|
||||||
|
"\n",
|
||||||
|
"* Download the MNIST dataset\n",
|
||||||
|
"* Display some sample images\n",
|
||||||
|
"\n",
|
||||||
|
"### Download the MNIST dataset\n",
|
||||||
|
"\n",
|
||||||
|
"You'll use Azure Open Datasets to get the raw MNIST data files. [Azure Open Datasets](https://docs.microsoft.com/azure/open-datasets/overview-what-are-open-datasets) are curated public datasets that you can use to add scenario-specific features to machine learning solutions for better models. Each dataset has a corresponding class, `MNIST` in this case, to retrieve the data in different ways."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965922274
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"from azureml.core import Dataset\n",
|
||||||
|
"from azureml.opendatasets import MNIST\n",
|
||||||
|
"\n",
|
||||||
|
"data_folder = os.path.join(os.getcwd(), \"data\")\n",
|
||||||
|
"os.makedirs(data_folder, exist_ok=True)\n",
|
||||||
|
"\n",
|
||||||
|
"mnist_file_dataset = MNIST.get_file_dataset()\n",
|
||||||
|
"mnist_file_dataset.download(data_folder, overwrite=True)\n",
|
||||||
|
"\n",
|
||||||
|
"mnist_file_dataset = mnist_file_dataset.register(\n",
|
||||||
|
" workspace=ws,\n",
|
||||||
|
" name=\"mnist_opendataset\",\n",
|
||||||
|
" description=\"training and test dataset\",\n",
|
||||||
|
" create_new_version=True,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Take a look at the data\n",
|
||||||
|
"\n",
|
||||||
|
"Load the compressed files into `numpy` arrays. Then use `matplotlib` to plot 30 random images from the dataset with their labels above them. \n",
|
||||||
|
"\n",
|
||||||
|
"Note this step requires a `load_data` function that's included in an `utils.py` file. This file is placed in the same folder as this notebook. The `load_data` function simply parses the compressed files into numpy arrays."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965929041
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from utils import load_data\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import glob\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the model converge faster.\n",
|
||||||
|
"X_train = (\n",
|
||||||
|
" load_data(\n",
|
||||||
|
" glob.glob(\n",
|
||||||
|
" os.path.join(data_folder, \"**/train-images-idx3-ubyte.gz\"), recursive=True\n",
|
||||||
|
" )[0],\n",
|
||||||
|
" False,\n",
|
||||||
|
" )\n",
|
||||||
|
" / 255.0\n",
|
||||||
|
")\n",
|
||||||
|
"X_test = (\n",
|
||||||
|
" load_data(\n",
|
||||||
|
" glob.glob(\n",
|
||||||
|
" os.path.join(data_folder, \"**/t10k-images-idx3-ubyte.gz\"), recursive=True\n",
|
||||||
|
" )[0],\n",
|
||||||
|
" False,\n",
|
||||||
|
" )\n",
|
||||||
|
" / 255.0\n",
|
||||||
|
")\n",
|
||||||
|
"y_train = load_data(\n",
|
||||||
|
" glob.glob(\n",
|
||||||
|
" os.path.join(data_folder, \"**/train-labels-idx1-ubyte.gz\"), recursive=True\n",
|
||||||
|
" )[0],\n",
|
||||||
|
" True,\n",
|
||||||
|
").reshape(-1)\n",
|
||||||
|
"y_test = load_data(\n",
|
||||||
|
" glob.glob(\n",
|
||||||
|
" os.path.join(data_folder, \"**/t10k-labels-idx1-ubyte.gz\"), recursive=True\n",
|
||||||
|
" )[0],\n",
|
||||||
|
" True,\n",
|
||||||
|
").reshape(-1)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# now let's show some randomly chosen images from the traininng set.\n",
|
||||||
|
"count = 0\n",
|
||||||
|
"sample_size = 30\n",
|
||||||
|
"plt.figure(figsize=(16, 6))\n",
|
||||||
|
"for i in np.random.permutation(X_train.shape[0])[:sample_size]:\n",
|
||||||
|
" count = count + 1\n",
|
||||||
|
" plt.subplot(1, sample_size, count)\n",
|
||||||
|
" plt.axhline(\"\")\n",
|
||||||
|
" plt.axvline(\"\")\n",
|
||||||
|
" plt.text(x=10, y=-10, s=y_train[i], fontsize=18)\n",
|
||||||
|
" plt.imshow(X_train[i].reshape(28, 28), cmap=plt.cm.Greys)\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Train model and log metrics\n",
|
||||||
|
"\n",
|
||||||
|
"You'll train the model using the code below. Your training runs and metrics will be registered in the experiment you created, so that this information is available after you've finished.\n",
|
||||||
|
"\n",
|
||||||
|
"You'll be using the [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) classifier from the [SciKit Learn framework](https://scikit-learn.org/) to classify the data.\n",
|
||||||
|
"\n",
|
||||||
|
"> **Note: The model training takes around 1 minute to complete.**"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612966046970
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# create the model\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from sklearn.linear_model import LogisticRegression\n",
|
||||||
|
"\n",
|
||||||
|
"reg = 0.5\n",
|
||||||
|
"clf = LogisticRegression(\n",
|
||||||
|
" C=1.0 / reg, solver=\"liblinear\", multi_class=\"auto\", random_state=42\n",
|
||||||
|
")\n",
|
||||||
|
"clf.fit(X_train, y_train)\n",
|
||||||
|
"\n",
|
||||||
|
"# make predictions using the test set and calculate the accuracy\n",
|
||||||
|
"y_hat = clf.predict(X_test)\n",
|
||||||
|
"\n",
|
||||||
|
"# calculate accuracy on the prediction\n",
|
||||||
|
"acc = np.average(y_hat == y_test)\n",
|
||||||
|
"print(\"Accuracy is\", acc)\n",
|
||||||
|
"\n",
|
||||||
|
"run.log(\"regularization rate\", np.float(reg))\n",
|
||||||
|
"run.log(\"accuracy\", np.float(acc))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"## Version control your models with the model registry\n",
|
||||||
|
"\n",
|
||||||
|
"You can use model registration to store and version your models in your workspace. Registered models are identified by name and version. Each time you register a model with the same name as an existing one, the registry increments the version. Azure Machine Learning supports any model that can be loaded through Python 3.\n",
|
||||||
|
"\n",
|
||||||
|
"The code below:\n",
|
||||||
|
"\n",
|
||||||
|
"1. Saves the model to disk\n",
|
||||||
|
"1. Uploads the model file to the run \n",
|
||||||
|
"1. Registers the uploaded model file\n",
|
||||||
|
"1. Transitions the run to a completed state"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612881042710
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import joblib\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"path = \"sklearn_mnist_model.pkl\"\n",
|
||||||
|
"joblib.dump(value=clf, filename=path)\n",
|
||||||
|
"\n",
|
||||||
|
"run.upload_file(name=path, path_or_stream=path)\n",
|
||||||
|
"\n",
|
||||||
|
"model = run.register_model(\n",
|
||||||
|
" model_name=\"sklearn_mnist_model\",\n",
|
||||||
|
" model_path=path,\n",
|
||||||
|
" description=\"Mnist handwriting recognition\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"run.complete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploy the model\n",
|
||||||
|
"\n",
|
||||||
|
"The next cell deploys the model to an Azure Container Instance so that you can score data in real-time (Azure Machine Learning also provides mechanisms to do batch scoring). A real-time endpoint allows application developers to integrate machine learning into their apps."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612881061728
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# create environment for the deploy\n",
|
||||||
|
"from azureml.core.environment import Environment\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"# to install required packages\n",
|
||||||
|
"env = Environment(\"quickstart-env\")\n",
|
||||||
|
"cd = CondaDependencies.create(\n",
|
||||||
|
" pip_packages=[\"azureml-dataset-runtime[pandas,fuse]\", \"azureml-defaults\"],\n",
|
||||||
|
" conda_packages=[\"scikit-learn==0.22.1\"],\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"env.python.conda_dependencies = cd\n",
|
||||||
|
"\n",
|
||||||
|
"# Register environment to re-use later\n",
|
||||||
|
"env.register(workspace=ws)\n",
|
||||||
|
"\n",
|
||||||
|
"# create config file\n",
|
||||||
|
"from azureml.core.webservice import AciWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"aciconfig = AciWebservice.deploy_configuration(\n",
|
||||||
|
" cpu_cores=1,\n",
|
||||||
|
" memory_gb=1,\n",
|
||||||
|
" tags={\"data\": \"MNIST\", \"method\": \"sklearn\"},\n",
|
||||||
|
" description=\"Predict MNIST with sklearn\",\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"> **Note: The deployment takes around 3 minutes to complete.**"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"import uuid\n",
|
||||||
|
"from azureml.core.webservice import Webservice\n",
|
||||||
|
"from azureml.core.model import InferenceConfig\n",
|
||||||
|
"from azureml.core.environment import Environment\n",
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"model = Model(ws, \"sklearn_mnist_model\")\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"myenv = Environment.get(workspace=ws, name=\"quickstart-env\", version=\"1\")\n",
|
||||||
|
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
|
||||||
|
"\n",
|
||||||
|
"service_name = \"sklearn-mnist-svc-\" + str(uuid.uuid4())[:4]\n",
|
||||||
|
"service = Model.deploy(\n",
|
||||||
|
" workspace=ws,\n",
|
||||||
|
" name=service_name,\n",
|
||||||
|
" models=[model],\n",
|
||||||
|
" inference_config=inference_config,\n",
|
||||||
|
" deployment_config=aciconfig,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"service.wait_for_deployment(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The [*scoring script*](score.py) file referenced in the code above can be found in the same folder as this notebook, and has two functions:\n",
|
||||||
|
"\n",
|
||||||
|
"1. an `init` function that executes once when the service starts - in this function you normally get the model from the registry and set global variables\n",
|
||||||
|
"1. a `run(data)` function that executes each time a call is made to the service. In this function, you normally format the input data, run a prediction, and output the predicted result."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Test the model service\n",
|
||||||
|
"\n",
|
||||||
|
"You can test the model by sending a raw HTTP request to test the web service. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612881527399
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# scoring web service HTTP endpoint\n",
|
||||||
|
"print(service.scoring_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612881538381
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# send raw HTTP request to test the web service.\n",
|
||||||
|
"import requests\n",
|
||||||
|
"\n",
|
||||||
|
"# send a random row from the test set to score\n",
|
||||||
|
"random_index = np.random.randint(0, len(X_test) - 1)\n",
|
||||||
|
"input_data = '{\"data\": [' + str(list(X_test[random_index])) + \"]}\"\n",
|
||||||
|
"\n",
|
||||||
|
"headers = {\"Content-Type\": \"application/json\"}\n",
|
||||||
|
"\n",
|
||||||
|
"# for AKS deployment you'd need to the service key in the header as well\n",
|
||||||
|
"# api_key = service.get_key()\n",
|
||||||
|
"# headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}\n",
|
||||||
|
"\n",
|
||||||
|
"resp = requests.post(service.scoring_uri, input_data, headers=headers)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"POST to url\", service.scoring_uri)\n",
|
||||||
|
"# print(\"input data:\", input_data)\n",
|
||||||
|
"print(\"label:\", y_test[random_index])\n",
|
||||||
|
"print(\"prediction:\", resp.text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"### View the results of your training\n",
|
||||||
|
"\n",
|
||||||
|
"When you're finished with an experiment run, you can always return to view the results of your model training here in the Azure Machine Learning studio:\n",
|
||||||
|
"\n",
|
||||||
|
"1. Select **Experiments** (left-hand menu)\n",
|
||||||
|
"1. Select **azure-ml-in10-mins-tutorial**\n",
|
||||||
|
"1. Select **Run 1**\n",
|
||||||
|
"1. Select the **Metrics** Tab\n",
|
||||||
|
"\n",
|
||||||
|
"The metrics tab will display the parameter values that were logged to the run."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### View the model in the model registry\n",
|
||||||
|
"\n",
|
||||||
|
"You can see the stored model by navigating to **Models** in the left-hand menu bar. Select the **sklearn_mnist_model** to see the details of the model, including the experiment run ID that created the model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Clean up resources\n",
|
||||||
|
"\n",
|
||||||
|
"If you're not going to continue to use this model, delete the Model service using:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612881556520
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# if you want to keep workspace and only delete endpoint (it will incur cost while running)\n",
|
||||||
|
"service.delete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"If you want to control cost further, stop the compute instance by selecting the \"Stop compute\" button next to the **Compute** dropdown. Then start the compute instance again the next time you need it."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"## Next Steps\n",
|
||||||
|
"\n",
|
||||||
|
"In this quickstart, you learned how to run machine learning code in Azure Machine Learning.\n",
|
||||||
|
"\n",
|
||||||
|
"Now that you have working code in a development environment, learn how to submit a **_job_** - ideally on a schedule or trigger (for example, arrival of new data).\n",
|
||||||
|
"\n",
|
||||||
|
" [**Learn how to get started with Azure ML Job Submission**](GettingStartedWithPythonSDK.ipynb) "
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "cewidste"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python36",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
},
|
||||||
|
"microsoft": {
|
||||||
|
"host": {
|
||||||
|
"AzureML": {
|
||||||
|
"notebookHasBeenCompleted": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License.",
|
||||||
|
"nteract": {
|
||||||
|
"version": "nteract-front-end@1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
11
tutorials/quickstart-ci/AzureMLIn10mins.yml
Normal file
11
tutorials/quickstart-ci/AzureMLIn10mins.yml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
name: AzureMLIn10mins
|
||||||
|
dependencies:
|
||||||
|
- pip:
|
||||||
|
- azureml-sdk
|
||||||
|
- sklearn
|
||||||
|
- numpy
|
||||||
|
- matplotlib
|
||||||
|
- joblib
|
||||||
|
- uuid
|
||||||
|
- requests
|
||||||
|
- azureml-opendatasets
|
||||||
505
tutorials/quickstart-ci/ClassificationWithAutomatedML.ipynb
Normal file
505
tutorials/quickstart-ci/ClassificationWithAutomatedML.ipynb
Normal file
@@ -0,0 +1,505 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Quickstart: Fraud Classification using Automated ML\n",
|
||||||
|
"\n",
|
||||||
|
"In this quickstart, you use automated machine learning in Azure Machine Learning service to train a classification model on an associated fraud credit card dataset. This process accepts training data and configuration settings, and automatically iterates through combinations of different feature normalization/standardization methods, models, and hyperparameter settings to arrive at the best model.\n",
|
||||||
|
"\n",
|
||||||
|
"You will learn how to:\n",
|
||||||
|
"\n",
|
||||||
|
"> * Download a dataset and look at the data\n",
|
||||||
|
"> * Train a machine learning classification model using autoML \n",
|
||||||
|
"> * Explore the results\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Connect to your workspace and create an experiment\n",
|
||||||
|
"\n",
|
||||||
|
"You start with importing some libraries and creating an experiment to track the runs in your workspace. A workspace can have multiple experiments, and all the users that have access to the workspace can collaborate on them. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612968646250
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import logging\n",
|
||||||
|
"\n",
|
||||||
|
"from matplotlib import pyplot as plt\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core.experiment import Experiment\n",
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"from azureml.core.dataset import Dataset\n",
|
||||||
|
"from azureml.train.automl import AutoMLConfig"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612968706273
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your experiment\n",
|
||||||
|
"experiment_name = \"fraud-classification-automl-tutorial\"\n",
|
||||||
|
"\n",
|
||||||
|
"experiment = Experiment(ws, experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Load Data\n",
|
||||||
|
"\n",
|
||||||
|
"Load the credit card dataset from a csv file containing both training features and labels. The features are inputs to the model, while the training labels represent the expected output of the model. Next, we'll split the data using random_split and extract the training data for the model.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"Follow this [how-to](https://aka.ms/azureml/howto/createdatasets) if you want to learn more about Datasets and how to use them.\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612968722555
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard.csv\"\n",
|
||||||
|
"dataset = Dataset.Tabular.from_delimited_files(data)\n",
|
||||||
|
"training_data, validation_data = dataset.random_split(percentage=0.8, seed=223)\n",
|
||||||
|
"label_column_name = \"Class\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Train\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"When you use automated machine learning in Azure ML, you input training data and configuration settings, and the process automatically iterates through combinations of different feature normalization/standardization methods, models, and hyperparameter settings to arrive at the best model. \n",
|
||||||
|
"Learn more about how you configure automated ML [here](https://docs.microsoft.com/azure/machine-learning/how-to-configure-auto-train).\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"Instantiate an [AutoMLConfig](https://docs.microsoft.com/python/api/azureml-train-automl-client/azureml.train.automl.automlconfig.automlconfig?view=azure-ml-py) object. This defines the settings and data used to run the experiment.\n",
|
||||||
|
"\n",
|
||||||
|
"|Property|Description|\n",
|
||||||
|
"|-|-|\n",
|
||||||
|
"|**task**|classification or regression|\n",
|
||||||
|
"|**primary_metric**|This is the metric that you want to optimize. \n",
|
||||||
|
"|**enable_early_stopping** | Stop the run if the metric score is not showing improvement.|\n",
|
||||||
|
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||||
|
"|**training_data**|Input dataset, containing both features and label column.|\n",
|
||||||
|
"|**label_column_name**|The name of the label column.|\n",
|
||||||
|
"\n",
|
||||||
|
"You can find more information about primary metrics [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612968806233
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"automl_settings = {\n",
|
||||||
|
" \"n_cross_validations\": 3,\n",
|
||||||
|
" \"primary_metric\": \"average_precision_score_weighted\",\n",
|
||||||
|
" \"experiment_timeout_hours\": 0.25, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ability to find the best model possible\n",
|
||||||
|
" \"verbosity\": logging.INFO,\n",
|
||||||
|
" \"enable_stack_ensemble\": False,\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"automl_config = AutoMLConfig(\n",
|
||||||
|
" task=\"classification\",\n",
|
||||||
|
" debug_log=\"automl_errors.log\",\n",
|
||||||
|
" training_data=training_data,\n",
|
||||||
|
" label_column_name=label_column_name,\n",
|
||||||
|
" **automl_settings,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Call the `submit` method on the experiment object and pass the run configuration. \n",
|
||||||
|
"\n",
|
||||||
|
"**Note: Depending on the data and the number of iterations an AutoML run can take a while to complete.**\n",
|
||||||
|
"\n",
|
||||||
|
"In this example, we specify `show_output = True` to print currently running iterations to the console. It is also possible to navigate to the experiment through the **Experiment** activity tab in the left menu, and monitor the run status from there."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612970125369
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612976292559
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Analyze results\n",
|
||||||
|
"\n",
|
||||||
|
"Below we select the best model from our iterations. The `get_output` method on `automl_classifier` returns the best run and the model for the run."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612976298373
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"best_run, best_model = local_run.get_output()\n",
|
||||||
|
"best_model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Tests\n",
|
||||||
|
"\n",
|
||||||
|
"Now that the model is trained, split the data in the same way the data was split for training (The difference here is the data is being split locally) and then run the test data through the trained model to get the predicted values."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612976320370
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# convert the test data to dataframe\n",
|
||||||
|
"X_test_df = validation_data.drop_columns(\n",
|
||||||
|
" columns=[label_column_name]\n",
|
||||||
|
").to_pandas_dataframe()\n",
|
||||||
|
"y_test_df = validation_data.keep_columns(\n",
|
||||||
|
" columns=[label_column_name], validate=True\n",
|
||||||
|
").to_pandas_dataframe()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612976325829
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# call the predict functions on the model\n",
|
||||||
|
"y_pred = best_model.predict(X_test_df)\n",
|
||||||
|
"y_pred"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"### Calculate metrics for the prediction\n",
|
||||||
|
"\n",
|
||||||
|
"Now visualize the data to show what our truth (actual) values are compared to the predicted values \n",
|
||||||
|
"from the trained model that was returned.\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612976330108
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.metrics import confusion_matrix\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import itertools\n",
|
||||||
|
"\n",
|
||||||
|
"cf = confusion_matrix(y_test_df.values, y_pred)\n",
|
||||||
|
"plt.imshow(cf, cmap=plt.cm.Blues, interpolation=\"nearest\")\n",
|
||||||
|
"plt.colorbar()\n",
|
||||||
|
"plt.title(\"Confusion Matrix\")\n",
|
||||||
|
"plt.xlabel(\"Predicted\")\n",
|
||||||
|
"plt.ylabel(\"Actual\")\n",
|
||||||
|
"class_labels = [\"False\", \"True\"]\n",
|
||||||
|
"tick_marks = np.arange(len(class_labels))\n",
|
||||||
|
"plt.xticks(tick_marks, class_labels)\n",
|
||||||
|
"plt.yticks([-0.5, 0, 1, 1.5], [\"\", \"False\", \"True\", \"\"])\n",
|
||||||
|
"# plotting text value inside cells\n",
|
||||||
|
"thresh = cf.max() / 2.0\n",
|
||||||
|
"for i, j in itertools.product(range(cf.shape[0]), range(cf.shape[1])):\n",
|
||||||
|
" plt.text(\n",
|
||||||
|
" j,\n",
|
||||||
|
" i,\n",
|
||||||
|
" format(cf[i, j], \"d\"),\n",
|
||||||
|
" horizontalalignment=\"center\",\n",
|
||||||
|
" color=\"white\" if cf[i, j] > thresh else \"black\",\n",
|
||||||
|
" )\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Control cost and further exploration\n",
|
||||||
|
"\n",
|
||||||
|
"If you want to control cost you can stop the compute instance this notebook is running on by clicking the \"Stop compute\" button next to the status dropdown in the menu above.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"If you want to run more notebook samples, you can click on **Sample Notebooks** next to the **Files** view and explore the notebooks made available for you there."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "cewidste"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.9"
|
||||||
|
},
|
||||||
|
"microsoft": {
|
||||||
|
"host": {
|
||||||
|
"AzureML": {
|
||||||
|
"notebookHasBeenCompleted": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License.",
|
||||||
|
"nteract": {
|
||||||
|
"version": "nteract-front-end@1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
name: ClassificationWithAutomatedML
|
||||||
|
dependencies:
|
||||||
|
- pip:
|
||||||
|
- azureml-sdk
|
||||||
710
tutorials/quickstart-ci/GettingStartedWithPythonSDK.ipynb
Normal file
710
tutorials/quickstart-ci/GettingStartedWithPythonSDK.ipynb
Normal file
@@ -0,0 +1,710 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Quickstart: Learn how to get started with Azure ML Job Submission\n",
|
||||||
|
"\n",
|
||||||
|
"In this quickstart, you train a machine learning model by submitting a Job to a compute target. \n",
|
||||||
|
"When training, it is common to start on your local computer, and then later scale out to a cloud-based cluster. \n",
|
||||||
|
"\n",
|
||||||
|
"All you need to do is define the environment for each compute target within a script run configuration. Then, when you want to run your training experiment on a different compute target, specify the run configuration for that compute.\n",
|
||||||
|
"\n",
|
||||||
|
"This quickstart trains a simple logistic regression using the [MNIST](https://azure.microsoft.com/services/open-datasets/catalog/mnist/) dataset and [scikit-learn](http://scikit-learn.org) with Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing a number from 0 to 9. The goal is to create a multi-class classifier to identify the digit a given image represents. \n",
|
||||||
|
"\n",
|
||||||
|
"You will learn how to:\n",
|
||||||
|
"\n",
|
||||||
|
"> * Download a dataset and look at the data\n",
|
||||||
|
"> * Train an image classification model by submitting a batch job to a compute resource\n",
|
||||||
|
"> * Review training results, find and register the best model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Connect to your workspace and create an experiment\n",
|
||||||
|
"\n",
|
||||||
|
"You start with importing some libraries and creating an experiment to track the runs in your workspace. A workspace can have multiple experiments, and all the users that have access to the workspace can collaborate on them. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965838618
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"# connect to your workspace\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"\n",
|
||||||
|
"experiment_name = \"get-started-with-jobsubmission-tutorial\"\n",
|
||||||
|
"exp = Experiment(workspace=ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Import Data\n",
|
||||||
|
"\n",
|
||||||
|
"Before you train a model, you need to understand the data that you are using to train it. In this section you will:\n",
|
||||||
|
"\n",
|
||||||
|
"* Download the MNIST dataset\n",
|
||||||
|
"* Display some sample images\n",
|
||||||
|
"\n",
|
||||||
|
"### Download the MNIST dataset\n",
|
||||||
|
"\n",
|
||||||
|
"Use Azure Open Datasets to get the raw MNIST data files. [Azure Open Datasets](https://docs.microsoft.com/azure/open-datasets/overview-what-are-open-datasets) are curated public datasets that you can use to add scenario-specific features to machine learning solutions for more accurate models. Each dataset has a corresponding class, `MNIST` in this case, to retrieve the data in different ways.\n",
|
||||||
|
"\n",
|
||||||
|
"Follow this [how-to](https://aka.ms/azureml/howto/createdatasets) if you want to learn more about Datasets and how to use them.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965850391
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"from azureml.core import Dataset\n",
|
||||||
|
"from azureml.opendatasets import MNIST\n",
|
||||||
|
"\n",
|
||||||
|
"data_folder = os.path.join(os.getcwd(), \"data\")\n",
|
||||||
|
"os.makedirs(data_folder, exist_ok=True)\n",
|
||||||
|
"\n",
|
||||||
|
"mnist_file_dataset = MNIST.get_file_dataset()\n",
|
||||||
|
"mnist_file_dataset.download(data_folder, overwrite=True)\n",
|
||||||
|
"\n",
|
||||||
|
"mnist_file_dataset = mnist_file_dataset.register(\n",
|
||||||
|
" workspace=ws,\n",
|
||||||
|
" name=\"mnist_opendataset\",\n",
|
||||||
|
" description=\"training and test dataset\",\n",
|
||||||
|
" create_new_version=True,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Take a look at the data\n",
|
||||||
|
"You will load the compressed files into `numpy` arrays. Then use `matplotlib` to plot 30 random images from the dataset with their labels above them. Note this step requires a `load_data` function that's included in an `utils.py` file. This file is placed in the same folder as this notebook. The `load_data` function simply parses the compressed files into numpy arrays. \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965857960
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# make sure utils.py is in the same directory as this code\n",
|
||||||
|
"from utils import load_data\n",
|
||||||
|
"import glob\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the model converge faster.\n",
|
||||||
|
"X_train = (\n",
|
||||||
|
" load_data(\n",
|
||||||
|
" glob.glob(\n",
|
||||||
|
" os.path.join(data_folder, \"**/train-images-idx3-ubyte.gz\"), recursive=True\n",
|
||||||
|
" )[0],\n",
|
||||||
|
" False,\n",
|
||||||
|
" )\n",
|
||||||
|
" / 255.0\n",
|
||||||
|
")\n",
|
||||||
|
"X_test = (\n",
|
||||||
|
" load_data(\n",
|
||||||
|
" glob.glob(\n",
|
||||||
|
" os.path.join(data_folder, \"**/t10k-images-idx3-ubyte.gz\"), recursive=True\n",
|
||||||
|
" )[0],\n",
|
||||||
|
" False,\n",
|
||||||
|
" )\n",
|
||||||
|
" / 255.0\n",
|
||||||
|
")\n",
|
||||||
|
"y_train = load_data(\n",
|
||||||
|
" glob.glob(\n",
|
||||||
|
" os.path.join(data_folder, \"**/train-labels-idx1-ubyte.gz\"), recursive=True\n",
|
||||||
|
" )[0],\n",
|
||||||
|
" True,\n",
|
||||||
|
").reshape(-1)\n",
|
||||||
|
"y_test = load_data(\n",
|
||||||
|
" glob.glob(\n",
|
||||||
|
" os.path.join(data_folder, \"**/t10k-labels-idx1-ubyte.gz\"), recursive=True\n",
|
||||||
|
" )[0],\n",
|
||||||
|
" True,\n",
|
||||||
|
").reshape(-1)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# now let's show some randomly chosen images from the training set.\n",
|
||||||
|
"count = 0\n",
|
||||||
|
"sample_size = 30\n",
|
||||||
|
"plt.figure(figsize=(16, 6))\n",
|
||||||
|
"for i in np.random.permutation(X_train.shape[0])[:sample_size]:\n",
|
||||||
|
" count = count + 1\n",
|
||||||
|
" plt.subplot(1, sample_size, count)\n",
|
||||||
|
" plt.axhline(\"\")\n",
|
||||||
|
" plt.axvline(\"\")\n",
|
||||||
|
" plt.text(x=10, y=-10, s=y_train[i], fontsize=18)\n",
|
||||||
|
" plt.imshow(X_train[i].reshape(28, 28), cmap=plt.cm.Greys)\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Submit your training job\n",
|
||||||
|
"\n",
|
||||||
|
"In this quickstart you submit a job to run on the local compute, but you can use the same code to submit this training job to other compute targets. With Azure Machine Learning, you can run your script on various compute targets without having to change your training script. \n",
|
||||||
|
"\n",
|
||||||
|
"To submit a job you need:\n",
|
||||||
|
"* A directory\n",
|
||||||
|
"* A training script\n",
|
||||||
|
"* Create a script run configuration\n",
|
||||||
|
"* Submit the job \n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"### Directory and training script \n",
|
||||||
|
"\n",
|
||||||
|
"You need a directory to deliver the necessary code from your computer to the remote resource. A directory with a training script has been created for you and can be found in the same folder as this notebook.\n",
|
||||||
|
"\n",
|
||||||
|
"Take a few minutes to examine the training script."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965865707
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open(\"sklearn-mnist-batch/train.py\", \"r\") as f:\n",
|
||||||
|
" print(f.read())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Notice how the script gets data and saves models:\n",
|
||||||
|
"\n",
|
||||||
|
"+ The training script reads an argument to find the directory containing the data. When you submit the job later, you point to the dataset for this argument:\n",
|
||||||
|
"`parser.add_argument('--data-folder', type=str, dest='data_folder', help='data directory mounting point')`\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"+ The training script saves your model into a directory named outputs. <br/>\n",
|
||||||
|
"`joblib.dump(value=clf, filename='outputs/sklearn_mnist_model.pkl')`<br/>\n",
|
||||||
|
"Anything written in this directory is automatically uploaded into your workspace. You'll access your model from this directory later in the tutorial.\n",
|
||||||
|
"\n",
|
||||||
|
"The file `utils.py` is referenced from the training script to load the dataset correctly. This script is also copied into the script folder so that it can be accessed along with the training script."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Configure the training job\n",
|
||||||
|
"\n",
|
||||||
|
"Create a [ScriptRunConfig]() object to specify the configuration details of your training job, including your training script, environment to use, and the compute target to run on. Configure the ScriptRunConfig by specifying:\n",
|
||||||
|
"\n",
|
||||||
|
"* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n",
|
||||||
|
"* The compute target. In this case you will point to local compute\n",
|
||||||
|
"* The training script name, train.py\n",
|
||||||
|
"* An environment that contains the libraries needed to run the script\n",
|
||||||
|
"* Arguments required from the training script. \n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"An Environment defines Python packages, environment variables, and Docker settings that are used in machine learning experiments. Here you will be using a curated environment that has already been made available through the workspace. \n",
|
||||||
|
"\n",
|
||||||
|
"Read [this article](https://docs.microsoft.com/azure/machine-learning/how-to-use-environments) if you want to learn more about Environments and how to use them."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965877458
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.environment import Environment\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"# use a curated environment that has already been built for you\n",
|
||||||
|
"\n",
|
||||||
|
"env = Environment.get(workspace=ws, name=\"AzureML-Scikit-learn-0.20.3\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Create a [ScriptRunConfig](https://docs.microsoft.com/python/api/azureml-core/azureml.core.scriptrunconfig?preserve-view=true&view=azure-ml-py) object to specify the configuration details of your training job, including your training script, environment to use, and the compute target to run on. A script run configuration is used to configure the information necessary for submitting a training run as part of an experiment. \n",
|
||||||
|
"\n",
|
||||||
|
"Read more about configuring and submitting training runs [here](https://docs.microsoft.com/azure/machine-learning/how-to-set-up-training-targets). "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965882781
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
|
"\n",
|
||||||
|
"args = [\"--data-folder\", mnist_file_dataset.as_mount(), \"--regularization\", 0.5]\n",
|
||||||
|
"\n",
|
||||||
|
"script_folder = \"sklearn-mnist-batch\"\n",
|
||||||
|
"src = ScriptRunConfig(\n",
|
||||||
|
" source_directory=script_folder,\n",
|
||||||
|
" script=\"train.py\",\n",
|
||||||
|
" arguments=args,\n",
|
||||||
|
" compute_target=\"local\",\n",
|
||||||
|
" environment=env,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Submit the job\n",
|
||||||
|
"\n",
|
||||||
|
"Run the experiment by submitting the ScriptRunConfig object. After this there are many options for monitoring your run. You can either navigate to the experiment \"get-started-with-jobsubmission-tutorial\" in the left menu item Experiments to monitor the run (quick link to the run details page in the cell output below), or you can monitor the run inline in this notebook by using the Jupyter widget activated below."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612965911435
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = exp.submit(config=src)\n",
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Jupyter widget\n",
|
||||||
|
"\n",
|
||||||
|
"Watch the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes.\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612966026710
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"\n",
|
||||||
|
"RunDetails(run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"if you want to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Get log results upon completion\n",
|
||||||
|
"\n",
|
||||||
|
"Model training happens in the background. You can use `wait_for_completion` to block and wait until the model has completed training before running more code. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612966045110
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# specify show_output to True for a verbose log\n",
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Display run results\n",
|
||||||
|
"\n",
|
||||||
|
"You now have a trained model. Retrieve all the metrics logged during the run, including the accuracy of the model:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612966059052
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(run.get_metrics())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Register model\n",
|
||||||
|
"\n",
|
||||||
|
"The last step in the training script wrote the file `outputs/sklearn_mnist_model.pkl` in a directory named `outputs` on the compute where the job is executed. `outputs` is a special directory in that all content in this directory is automatically uploaded to your workspace. This content appears in the run record in the experiment under your workspace. Hence, the model file is now also available in your workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612966064041
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(run.get_file_names())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Register the model in the workspace so that you (or your team members with access to the workspace) can later query, examine, and deploy this model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1612966068862
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# register model\n",
|
||||||
|
"model = run.register_model(\n",
|
||||||
|
" model_name=\"sklearn_mnist\", model_path=\"outputs/sklearn_mnist_model.pkl\"\n",
|
||||||
|
")\n",
|
||||||
|
"print(model.name, model.id, model.version, sep=\"\\t\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Control Cost\n",
|
||||||
|
"\n",
|
||||||
|
"If you want to control cost you can stop the compute instance this notebook is running on by clicking the \"Stop compute\" button next to the status dropdown in the menu above.\n",
|
||||||
|
"\n",
|
||||||
|
" ## Next Steps\n",
|
||||||
|
"\n",
|
||||||
|
"In this quickstart, you have seen how to run jobs-based machine learning code in Azure Machine Learning. \n",
|
||||||
|
"\n",
|
||||||
|
"It is also possible to use automated machine learning in Azure Machine Learning service to find the best model in an automated fashion. To see how this works, we recommend that you follow the next quickstart in this series, [**Fraud Classification using Automated ML**](ClassificationWithAutomatedML.ipynb). This quickstart is focused on AutoML using the Python SDK."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "cewidste"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.9"
|
||||||
|
},
|
||||||
|
"notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License.",
|
||||||
|
"nteract": {
|
||||||
|
"version": "nteract-front-end@1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
11
tutorials/quickstart-ci/GettingStartedWithPythonSDK.yml
Normal file
11
tutorials/quickstart-ci/GettingStartedWithPythonSDK.yml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
name: GettingStartedWithPythonSDK
|
||||||
|
dependencies:
|
||||||
|
- pip:
|
||||||
|
- azureml-sdk
|
||||||
|
- sklearn
|
||||||
|
- numpy
|
||||||
|
- matplotlib
|
||||||
|
- joblib
|
||||||
|
- uuid
|
||||||
|
- requests
|
||||||
|
- azureml-opendatasets
|
||||||
21
tutorials/quickstart-ci/score.py
Normal file
21
tutorials/quickstart-ci/score.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import joblib
|
||||||
|
|
||||||
|
|
||||||
|
def init():
|
||||||
|
global model
|
||||||
|
# AZUREML_MODEL_DIR is an environment variable created during deployment.
|
||||||
|
# It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
|
||||||
|
# For multiple models, it points to the folder containing all deployed models (./azureml-models)
|
||||||
|
model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "sklearn_mnist_model.pkl")
|
||||||
|
model = joblib.load(model_path)
|
||||||
|
|
||||||
|
|
||||||
|
def run(raw_data):
|
||||||
|
data = np.array(json.loads(raw_data)["data"])
|
||||||
|
# make prediction
|
||||||
|
y_hat = model.predict(data)
|
||||||
|
# you can return any data type as long as it is JSON-serializable
|
||||||
|
return y_hat.tolist()
|
||||||
82
tutorials/quickstart-ci/sklearn-mnist-batch/train.py
Normal file
82
tutorials/quickstart-ci/sklearn-mnist-batch/train.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import glob
|
||||||
|
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
import joblib
|
||||||
|
|
||||||
|
from azureml.core import Run
|
||||||
|
from utils import load_data
|
||||||
|
|
||||||
|
# let user feed in 2 parameters, the dataset to mount or download,
|
||||||
|
# and the regularization rate of the logistic regression model
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--data-folder", type=str, dest="data_folder", help="data folder mounting point"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--regularization", type=float, dest="reg", default=0.01, help="regularization rate"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
data_folder = args.data_folder
|
||||||
|
print("Data folder:", data_folder)
|
||||||
|
|
||||||
|
# load train and test set into numpy arrays
|
||||||
|
# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
|
||||||
|
X_train = (
|
||||||
|
load_data(
|
||||||
|
glob.glob(
|
||||||
|
os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"), recursive=True
|
||||||
|
)[0],
|
||||||
|
False,
|
||||||
|
) /
|
||||||
|
255.0
|
||||||
|
)
|
||||||
|
X_test = (
|
||||||
|
load_data(
|
||||||
|
glob.glob(
|
||||||
|
os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"), recursive=True
|
||||||
|
)[0],
|
||||||
|
False,
|
||||||
|
) /
|
||||||
|
255.0
|
||||||
|
)
|
||||||
|
y_train = load_data(
|
||||||
|
glob.glob(
|
||||||
|
os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"), recursive=True
|
||||||
|
)[0],
|
||||||
|
True,
|
||||||
|
).reshape(-1)
|
||||||
|
y_test = load_data(
|
||||||
|
glob.glob(
|
||||||
|
os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"), recursive=True
|
||||||
|
)[0],
|
||||||
|
True,
|
||||||
|
).reshape(-1)
|
||||||
|
|
||||||
|
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep="\n")
|
||||||
|
|
||||||
|
# get hold of the current run
|
||||||
|
run = Run.get_context()
|
||||||
|
|
||||||
|
print("Train a logistic regression model with regularization rate of", args.reg)
|
||||||
|
clf = LogisticRegression(
|
||||||
|
C=1.0 / args.reg, solver="liblinear", multi_class="auto", random_state=42
|
||||||
|
)
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
|
||||||
|
print("Predict the test set")
|
||||||
|
y_hat = clf.predict(X_test)
|
||||||
|
|
||||||
|
# calculate accuracy on the prediction
|
||||||
|
acc = np.average(y_hat == y_test)
|
||||||
|
print("Accuracy is", acc)
|
||||||
|
|
||||||
|
run.log("regularization rate", np.float(args.reg))
|
||||||
|
run.log("accuracy", np.float(acc))
|
||||||
|
|
||||||
|
os.makedirs("outputs", exist_ok=True)
|
||||||
|
# note file saved in the outputs folder is automatically uploaded into experiment record
|
||||||
|
joblib.dump(value=clf, filename="outputs/sklearn_mnist_model.pkl")
|
||||||
24
tutorials/quickstart-ci/sklearn-mnist-batch/utils.py
Normal file
24
tutorials/quickstart-ci/sklearn-mnist-batch/utils.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import gzip
|
||||||
|
import numpy as np
|
||||||
|
import struct
|
||||||
|
|
||||||
|
|
||||||
|
# load compressed MNIST gz files and return numpy arrays
|
||||||
|
def load_data(filename, label=False):
|
||||||
|
with gzip.open(filename) as gz:
|
||||||
|
struct.unpack("I", gz.read(4))
|
||||||
|
n_items = struct.unpack(">I", gz.read(4))
|
||||||
|
if not label:
|
||||||
|
n_rows = struct.unpack(">I", gz.read(4))[0]
|
||||||
|
n_cols = struct.unpack(">I", gz.read(4))[0]
|
||||||
|
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
|
||||||
|
res = res.reshape(n_items[0], n_rows * n_cols)
|
||||||
|
else:
|
||||||
|
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
|
||||||
|
res = res.reshape(n_items[0], 1)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
# one-hot encode a 1-D array
|
||||||
|
def one_hot_encode(array, num_of_classes):
|
||||||
|
return np.eye(num_of_classes)[array.reshape(-1)]
|
||||||
24
tutorials/quickstart-ci/utils.py
Normal file
24
tutorials/quickstart-ci/utils.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import gzip
|
||||||
|
import numpy as np
|
||||||
|
import struct
|
||||||
|
|
||||||
|
|
||||||
|
# load compressed MNIST gz files and return numpy arrays
|
||||||
|
def load_data(filename, label=False):
|
||||||
|
with gzip.open(filename) as gz:
|
||||||
|
struct.unpack("I", gz.read(4))
|
||||||
|
n_items = struct.unpack(">I", gz.read(4))
|
||||||
|
if not label:
|
||||||
|
n_rows = struct.unpack(">I", gz.read(4))[0]
|
||||||
|
n_cols = struct.unpack(">I", gz.read(4))[0]
|
||||||
|
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
|
||||||
|
res = res.reshape(n_items[0], n_rows * n_cols)
|
||||||
|
else:
|
||||||
|
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
|
||||||
|
res = res.reshape(n_items[0], 1)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
# one-hot encode a 1-D array
|
||||||
|
def one_hot_encode(array, num_of_classes):
|
||||||
|
return np.eye(num_of_classes)[array.reshape(-1)]
|
||||||
Reference in New Issue
Block a user