Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/production-deploy-to-aks-gpu/production-deploy-to-aks-gpu.png)

# Deploying a web service to Azure Kubernetes Service (AKS)
This notebook shows the steps for deploying a service: registering a model, creating an image, provisioning a cluster (one time action), and deploying a service to it. 
We then test and delete the service, image and model.

In [None]:
import azureml.core
print(azureml.core.VERSION)

# Get workspace
Load existing workspace from the config file info.

In [None]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

# Download the model

Prior to registering the model, you should have a TensorFlow [Saved Model](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md) in the `resnet50` directory. This cell will download a [pretrained resnet50](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW_jpg.tar.gz) and unpack it to that directory.

In [None]:
import os
import requests
import shutil
import tarfile
import tempfile

from io import BytesIO

model_url = "http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW_jpg.tar.gz"

archive_prefix = "./resnet_v1_fp32_savedmodel_NCHW_jpg/1538686758/"
target_folder = "resnet50"

if not os.path.exists(target_folder):
 response = requests.get(model_url)
 archive = tarfile.open(fileobj=BytesIO(response.content))
 with tempfile.TemporaryDirectory() as temp_folder:
 archive.extractall(temp_folder)
 shutil.copytree(os.path.join(temp_folder, archive_prefix), target_folder)

# Register the model
Register an existing trained model, add description and tags.

In [None]:
from azureml.core.model import Model

model = Model.register(model_path="resnet50", # This points to the local directory to upload.
 model_name="resnet50", # This is the name the model is registered as.
 tags={'area': "Image classification", 'type': "classification"},
 description="Image classification trained on Imagenet Dataset",
 workspace=ws)

print(model.name, model.description, model.version)

# Provision the AKS Cluster
This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete the cluster or the resource group that contains it, then you would have to recreate it.

In [None]:
from azureml.core.compute import ComputeTarget, AksCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your GPU cluster
gpu_cluster_name = "aks-gpu-cluster"

# Verify that cluster does not exist already
try:
 gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)
 print("Found existing gpu cluster")
except ComputeTargetException:
 print("Creating new gpu-cluster")
 
 # Specify the configuration for the new cluster
 compute_config = AksCompute.provisioning_configuration(cluster_purpose=AksCompute.ClusterPurpose.DEV_TEST,
 agent_count=1,
 vm_size="Standard_NV6")
 # Create the cluster with the specified name and configuration
 gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)

 # Wait for the cluster to complete, show the output log
 gpu_cluster.wait_for_completion(show_output=True)

# Deploy the model as a web service to AKS

First create a scoring script

In [None]:
%%writefile score.py
import tensorflow as tf
import numpy as np
import json
import os
from azureml.contrib.services.aml_request import AMLRequest, rawhttp
from azureml.contrib.services.aml_response import AMLResponse

def init():
 global session
 global input_name
 global output_name
 
 session = tf.Session()

 # AZUREML_MODEL_DIR is an environment variable created during deployment.
 # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
 # For multiple models, it points to the folder containing all deployed models (./azureml-models)
 model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'resnet50')
 model = tf.saved_model.loader.load(session, ['serve'], model_path)
 if len(model.signature_def['serving_default'].inputs) > 1:
 raise ValueError("This score.py only supports one input")
 input_name = [tensor.name for tensor in model.signature_def['serving_default'].inputs.values()][0]
 output_name = [tensor.name for tensor in model.signature_def['serving_default'].outputs.values()]
 

@rawhttp
def run(request):
 if request.method == 'POST':
 reqBody = request.get_data(False)
 resp = score(reqBody)
 return AMLResponse(resp, 200)
 if request.method == 'GET':
 respBody = str.encode("GET is not supported")
 return AMLResponse(respBody, 405)
 return AMLResponse("bad request", 500)

def score(data):
 result = session.run(output_name, {input_name: [data]})
 return json.dumps(result[1].tolist())

if __name__ == "__main__":
 init()
 with open("test_image.jpg", 'rb') as f:
 content = f.read()
 print(score(content))

Now create the deployment configuration objects and deploy the model as a webservice.

In [None]:
# Set the web service configuration (using default here)
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AksWebservice
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.environment import Environment, DEFAULT_GPU_IMAGE

env = Environment('deploytocloudenv')
# Please see [Azure ML Containers repository](https://github.com/Azure/AzureML-Containers#featured-tags)
# for open-sourced GPU base images.
env.docker.base_image = DEFAULT_GPU_IMAGE
env.python.conda_dependencies = CondaDependencies.create(conda_packages=['tensorflow-gpu==1.12.0','numpy'],
 pip_packages=['azureml-contrib-services', 'azureml-defaults'])

inference_config = InferenceConfig(entry_script="score.py", environment=env)
aks_config = AksWebservice.deploy_configuration()

# # Enable token auth and disable (key) auth on the webservice
# aks_config = AksWebservice.deploy_configuration(token_auth_enabled=True, auth_enabled=False)

In [None]:
%%time
aks_service_name ='gpu-rn50'

aks_service = Model.deploy(workspace=ws,
 name=aks_service_name,
 models=[model],
 inference_config=inference_config,
 deployment_config=aks_config,
 deployment_target=gpu_cluster)

aks_service.wait_for_deployment(show_output = True)
print(aks_service.state)

# Test the web service
We test the web sevice by passing the test images content.

In [None]:
%%time
import requests

# if (key) auth is enabled, fetch keys and include in the request
key1, key2 = aks_service.get_keys()

headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}

# # if token auth is enabled, fetch token and include in the request
# access_token, fetch_after = aks_service.get_token()
# headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + access_token}

test_sample = open('snowleopardgaze.jpg', 'rb').read()
resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)

# Clean up
Delete the service, image, model and compute target

In [None]:
%%time
aks_service.delete()
model.delete()
gpu_cluster.delete()
