mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-22 18:42:41 -05:00
update samples from Release-57 as a part of SDK release
This commit is contained in:
@@ -71,6 +71,33 @@
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create trained model\n",
|
||||
"\n",
|
||||
"For this example, we will train a small model on scikit-learn's [diabetes dataset](https://scikit-learn.org/stable/datasets/index.html#diabetes-dataset). "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import joblib\n",
|
||||
"\n",
|
||||
"from sklearn.datasets import load_diabetes\n",
|
||||
"from sklearn.linear_model import Ridge\n",
|
||||
"\n",
|
||||
"dataset_x, dataset_y = load_diabetes(return_X_y=True)\n",
|
||||
"\n",
|
||||
"sk_model = Ridge().fit(dataset_x, dataset_y)\n",
|
||||
"\n",
|
||||
"joblib.dump(sk_model, \"sklearn_regression_model.pkl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -82,9 +109,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the name `sklearn_regression_model` in the workspace.\n",
|
||||
"Here we are registering the serialized file `sklearn_regression_model.pkl` in the current directory as a model with the name `sklearn_regression_model` in the workspace.\n",
|
||||
"\n",
|
||||
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
|
||||
"You can add tags and descriptions to your models. Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -119,11 +146,62 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import sklearn\n",
|
||||
"\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"environment = Environment(\"LocalDeploy\")\n",
|
||||
"environment.python.conda_dependencies = CondaDependencies(\"myenv.yml\")"
|
||||
"environment.python.conda_dependencies.add_pip_package(\"inference-schema[numpy-support]\")\n",
|
||||
"environment.python.conda_dependencies.add_pip_package(\"joblib\")\n",
|
||||
"environment.python.conda_dependencies.add_pip_package(\"scikit-learn=={}\".format(sklearn.__version__))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Provide the Scoring Script\n",
|
||||
"\n",
|
||||
"This Python script handles the model execution inside the service container. The `init()` method loads the model file, and `run(data)` is called for every input to the service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score.py\n",
|
||||
"import joblib\n",
|
||||
"import json\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
||||
"from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global model\n",
|
||||
" # AZUREML_MODEL_DIR is an environment variable created during deployment.\n",
|
||||
" # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)\n",
|
||||
" # For multiple models, it points to the folder containing all deployed models (./azureml-models)\n",
|
||||
" model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'sklearn_regression_model.pkl')\n",
|
||||
" # Deserialize the model file back into a sklearn model.\n",
|
||||
" model = joblib.load(model_path)\n",
|
||||
"\n",
|
||||
"input_sample = np.array([[10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]])\n",
|
||||
"output_sample = np.array([3726.995])\n",
|
||||
"\n",
|
||||
"@input_schema('data', NumpyParameterType(input_sample))\n",
|
||||
"@output_schema(NumpyParameterType(output_sample))\n",
|
||||
"def run(data):\n",
|
||||
" try:\n",
|
||||
" result = model.predict(data)\n",
|
||||
" # You can return any JSON-serializable object.\n",
|
||||
" return result.tolist()\n",
|
||||
" except Exception as e:\n",
|
||||
" error = str(e)\n",
|
||||
" return error"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -145,114 +223,6 @@
|
||||
" environment=environment)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Model Profiling\n",
|
||||
"\n",
|
||||
"Profile your model to understand how much CPU and memory the service, created as a result of its deployment, will need. Profiling returns information such as CPU usage, memory usage, and response latency. It also provides a CPU and memory recommendation based on the resource usage. You can profile your model (or more precisely the service built based on your model) on any CPU and/or memory combination where 0.1 <= CPU <= 3.5 and 0.1GB <= memory <= 15GB. If you do not provide a CPU and/or memory requirement, we will test it on the default configuration of 3.5 CPU and 15GB memory.\n",
|
||||
"\n",
|
||||
"In order to profile your model you will need:\n",
|
||||
"- a registered model\n",
|
||||
"- an entry script\n",
|
||||
"- an inference configuration\n",
|
||||
"- a single column tabular dataset, where each row contains a string representing sample request data sent to the service.\n",
|
||||
"\n",
|
||||
"Please, note that profiling is a long running operation and can take up to 25 minutes depending on the size of the dataset.\n",
|
||||
"\n",
|
||||
"At this point we only support profiling of services that expect their request data to be a string, for example: string serialized json, text, string serialized image, etc. The content of each row of the dataset (string) will be put into the body of the HTTP request and sent to the service encapsulating the model for scoring.\n",
|
||||
"\n",
|
||||
"Below is an example of how you can construct an input dataset to profile a service which expects its incoming requests to contain serialized json. In this case we created a dataset based one hundred instances of the same request data. In real world scenarios however, we suggest that you use larger datasets with various inputs, especially if your model resource usage/behavior is input dependent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from azureml.core import Datastore\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.data import dataset_type_definitions\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# create a string that can be put in the body of the request\n",
|
||||
"serialized_input_json = json.dumps({\n",
|
||||
" 'data': [\n",
|
||||
" [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
|
||||
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
|
||||
" ]\n",
|
||||
"})\n",
|
||||
"dataset_content = []\n",
|
||||
"for i in range(100):\n",
|
||||
" dataset_content.append(serialized_input_json)\n",
|
||||
"dataset_content = '\\n'.join(dataset_content)\n",
|
||||
"file_name = 'sample_request_data_diabetes.txt'\n",
|
||||
"f = open(file_name, 'w')\n",
|
||||
"f.write(dataset_content)\n",
|
||||
"f.close()\n",
|
||||
"\n",
|
||||
"# upload the txt file created above to the Datastore and create a dataset from it\n",
|
||||
"data_store = Datastore.get_default(ws)\n",
|
||||
"data_store.upload_files(['./' + file_name], target_path='sample_request_data_diabetes')\n",
|
||||
"datastore_path = [(data_store, 'sample_request_data_diabetes' +'/' + file_name)]\n",
|
||||
"sample_request_data_diabetes = Dataset.Tabular.from_delimited_files(\n",
|
||||
" datastore_path,\n",
|
||||
" separator='\\n',\n",
|
||||
" infer_column_types=True,\n",
|
||||
" header=dataset_type_definitions.PromoteHeadersBehavior.NO_HEADERS)\n",
|
||||
"sample_request_data_diabetes = sample_request_data_diabetes.register(workspace=ws,\n",
|
||||
" name='sample_request_data_diabetes',\n",
|
||||
" create_new_version=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that we have an input dataset we are ready to go ahead with profiling. In this case we are testing the previously introduced sklearn regression model on 1 CPU and 0.5 GB memory. The memory usage and recommendation presented in the result is measured in Gigabytes. The CPU usage and recommendation is measured in CPU cores."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"from azureml.core import Environment\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"from azureml.core.model import Model, InferenceConfig\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"environment = Environment('my-sklearn-environment')\n",
|
||||
"environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[\n",
|
||||
" 'azureml-defaults',\n",
|
||||
" 'inference-schema[numpy-support]',\n",
|
||||
" 'joblib',\n",
|
||||
" 'numpy',\n",
|
||||
" 'scikit-learn==0.19.1',\n",
|
||||
" 'scipy'\n",
|
||||
"])\n",
|
||||
"inference_config = InferenceConfig(entry_script='score.py', environment=environment)\n",
|
||||
"# if cpu and memory_in_gb parameters are not provided\n",
|
||||
"# the model will be profiled on default configuration of\n",
|
||||
"# 3.5CPU and 15GB memory\n",
|
||||
"profile = Model.profile(ws,\n",
|
||||
" 'profile-%s' % datetime.now().strftime('%m%d%Y-%H%M%S'),\n",
|
||||
" [model],\n",
|
||||
" inference_config,\n",
|
||||
" input_dataset=sample_request_data_diabetes,\n",
|
||||
" cpu=1.0,\n",
|
||||
" memory_in_gb=0.5)\n",
|
||||
"\n",
|
||||
"# profiling is a long running operation and may take up to 25 min\n",
|
||||
"profile.wait_for_completion(True)\n",
|
||||
"details = profile.get_details()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -339,15 +309,10 @@
|
||||
"import json\n",
|
||||
"\n",
|
||||
"sample_input = json.dumps({\n",
|
||||
" 'data': [\n",
|
||||
" [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
|
||||
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
|
||||
" ]\n",
|
||||
" 'data': dataset_x[0:2].tolist()\n",
|
||||
"})\n",
|
||||
"\n",
|
||||
"sample_input = bytes(sample_input, encoding='utf-8')\n",
|
||||
"\n",
|
||||
"local_service.run(input_data=sample_input)"
|
||||
"local_service.run(sample_input)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -366,12 +331,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score.py\n",
|
||||
"import os\n",
|
||||
"import pickle\n",
|
||||
"import joblib\n",
|
||||
"import json\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from sklearn.linear_model import Ridge\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
||||
"from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
|
||||
@@ -382,10 +345,10 @@
|
||||
" # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)\n",
|
||||
" # For multiple models, it points to the folder containing all deployed models (./azureml-models)\n",
|
||||
" model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'sklearn_regression_model.pkl')\n",
|
||||
" # deserialize the model file back into a sklearn model\n",
|
||||
" # Deserialize the model file back into a sklearn model.\n",
|
||||
" model = joblib.load(model_path)\n",
|
||||
"\n",
|
||||
"input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
|
||||
"input_sample = np.array([[10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]])\n",
|
||||
"output_sample = np.array([3726.995])\n",
|
||||
"\n",
|
||||
"@input_schema('data', NumpyParameterType(input_sample))\n",
|
||||
@@ -393,8 +356,8 @@
|
||||
"def run(data):\n",
|
||||
" try:\n",
|
||||
" result = model.predict(data)\n",
|
||||
" # you can return any datatype as long as it is JSON-serializable\n",
|
||||
" return 'hello from updated score.py'\n",
|
||||
" # You can return any JSON-serializable object.\n",
|
||||
" return 'Hello from the updated score.py: ' + str(result.tolist())\n",
|
||||
" except Exception as e:\n",
|
||||
" error = str(e)\n",
|
||||
" return error"
|
||||
@@ -410,7 +373,7 @@
|
||||
"print(\"--------------------------------------------------------------\")\n",
|
||||
"\n",
|
||||
"# After calling reload(), run() will return the updated message.\n",
|
||||
"local_service.run(input_data=sample_input)"
|
||||
"local_service.run(sample_input)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user