Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer-parallel-run.png)

# Neural style transfer on video
Using modified code from `pytorch`'s neural style [example](https://pytorch.org/tutorials/advanced/neural_style_tutorial.html), we show how to setup a pipeline for doing style transfer on video. The pipeline has following steps:
1. Split a video into images
2. Run neural style on each image using one of the provided models (from `pytorch` pretrained models for this example).
3. Stitch the image back into a video.

> **Tip**
If your system requires low-latency processing (to process a single document or small set of documents quickly), use [real-time scoring](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-consume-web-service) instead of batch prediction.

## Prerequisites
If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc. 

## Initialize Workspace

Initialize a workspace object from persisted configuration.

In [None]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

In [None]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
 'Azure region: ' + ws.location, 
 'Subscription id: ' + ws.subscription_id, 
 'Resource group: ' + ws.resource_group, sep = '\n')

In [None]:
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.datastore import Datastore
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.core.runconfig import CondaDependencies, RunConfiguration
from azureml.core.compute_target import ComputeTargetException

# Download models

In [None]:
import os

# create directory for model
model_dir = 'models'
if not os.path.isdir(model_dir):
 os.mkdir(model_dir)

In [None]:
import urllib.request

def download_model(model_name):
 # downloaded models from https://pytorch.org/tutorials/advanced/neural_style_tutorial.html are kept here
 url = "https://pipelinedata.blob.core.windows.net/styletransfer/saved_models/" + model_name
 local_path = os.path.join(model_dir, model_name)
 urllib.request.urlretrieve(url, local_path)

# Register all Models

In [None]:
from azureml.core.model import Model
mosaic_model = None
candy_model = None

models = Model.list(workspace=ws, tags=['scenario'])
for m in models:
 print("Name:", m.name,"\tVersion:", m.version, "\tDescription:", m.description, m.tags)
 if m.name == 'mosaic' and mosaic_model is None:
 mosaic_model = m
 elif m.name == 'candy' and candy_model is None:
 candy_model = m

if mosaic_model is None:
 print('Mosaic model does not exist, registering it')
 download_model('mosaic.pth')
 mosaic_model = Model.register(model_path = os.path.join(model_dir, "mosaic.pth"),
 model_name = "mosaic",
 tags = {'type': "mosaic", 'scenario': "Style transfer using batch inference"},
 description = "Style transfer - Mosaic",
 workspace = ws)
else:
 print('Reusing existing mosaic model')
 

if candy_model is None:
 print('Candy model does not exist, registering it')
 download_model('candy.pth')
 candy_model = Model.register(model_path = os.path.join(model_dir, "candy.pth"),
 model_name = "candy",
 tags = {'type': "candy", 'scenario': "Style transfer using batch inference"},
 description = "Style transfer - Candy",
 workspace = ws)
else:
 print('Reusing existing candy model')

# Create or use existing compute

In [None]:
# AmlCompute
cpu_cluster_name = "cpu-cluster"
try:
 cpu_cluster = AmlCompute(ws, cpu_cluster_name)
 print("found existing cluster.")
except ComputeTargetException:
 print("creating new cluster")
 provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_v2",
 max_nodes = 1)

 # create the cluster
 cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, provisioning_config)
 cpu_cluster.wait_for_completion(show_output=True)
 
# AmlCompute
gpu_cluster_name = "gpu-cluster"
try:
 gpu_cluster = AmlCompute(ws, gpu_cluster_name)
 print("found existing cluster.")
except ComputeTargetException:
 print("creating new cluster")
 provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_NC6",
 max_nodes = 3)

 # create the cluster
 gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)
 gpu_cluster.wait_for_completion(show_output=True)

# Python Scripts
We use an edited version of `neural_style_mpi.py` (original is [here](https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py)). Scripts to split and stitch the video are thin wrappers to calls to `ffmpeg`. 

We install `ffmpeg` through conda dependencies.

In [None]:
scripts_folder = "scripts"

In [None]:
process_video_script_file = "process_video.py"

# peek at contents
with open(os.path.join(scripts_folder, process_video_script_file)) as process_video_file:
 print(process_video_file.read())

In [None]:
stitch_video_script_file = "stitch_video.py"

# peek at contents
with open(os.path.join(scripts_folder, stitch_video_script_file)) as stitch_video_file:
 print(stitch_video_file.read())

The sample video **organutan.mp4** is stored at a publicly shared datastore. We are registering the datastore below. If you want to take a look at the original video, click here. (https://pipelinedata.blob.core.windows.net/sample-videos/orangutan.mp4)

In [None]:
# datastore for input video
account_name = "pipelinedata"
video_ds = Datastore.register_azure_blob_container(ws, "videos", "sample-videos",
 account_name=account_name, overwrite=True)

# the default blob store attached to a workspace
default_datastore = ws.get_default_datastore()

# Sample video

In [None]:
video_name=os.getenv("STYLE_TRANSFER_VIDEO_NAME", "orangutan.mp4") 
orangutan_video = DataReference(datastore=video_ds,
 data_reference_name="video",
 path_on_datastore=video_name, mode="download")

In [None]:
cd = CondaDependencies()

cd.add_channel("conda-forge")
cd.add_conda_package("ffmpeg==4.0.2")

# Runconfig
amlcompute_run_config = RunConfiguration(conda_dependencies=cd)
amlcompute_run_config.environment.docker.base_image = "pytorch/pytorch"
amlcompute_run_config.environment.spark.precache_packages = False

In [None]:
ffmpeg_audio = PipelineData(name="ffmpeg_audio", datastore=default_datastore)
processed_images = PipelineData(name="processed_images", datastore=default_datastore)
output_video = PipelineData(name="output_video", datastore=default_datastore)

ffmpeg_images_ds_name = "ffmpeg_images_data"
ffmpeg_images = PipelineData(name="ffmpeg_images", datastore=default_datastore)
ffmpeg_images_file_dataset = ffmpeg_images.as_dataset()

# Define tweakable parameters to pipeline
These parameters can be changed when the pipeline is published and rerun from a REST call.
As part of ParallelRunStep following 2 pipeline parameters will be created which can be used to override values.
 node_count
 process_count_per_node

In [None]:
from azureml.pipeline.core.graph import PipelineParameter
# create a parameter for style (one of "candy", "mosaic") to transfer the images to
style_param = PipelineParameter(name="style", default_value="mosaic")
# create a parameter for the number of nodes to use in step no. 2 (style transfer)
nodecount_param = PipelineParameter(name="nodecount", default_value=2)

In [None]:
split_video_step = PythonScriptStep(
 name="split video",
 script_name="process_video.py",
 arguments=["--input_video", orangutan_video,
 "--output_audio", ffmpeg_audio,
 "--output_images", ffmpeg_images_file_dataset,
 ],
 compute_target=cpu_cluster,
 inputs=[orangutan_video],
 outputs=[ffmpeg_images_file_dataset, ffmpeg_audio],
 runconfig=amlcompute_run_config,
 source_directory=scripts_folder
)

stitch_video_step = PythonScriptStep(
 name="stitch",
 script_name="stitch_video.py",
 arguments=["--images_dir", processed_images, 
 "--input_audio", ffmpeg_audio, 
 "--output_dir", output_video],
 compute_target=cpu_cluster,
 inputs=[processed_images, ffmpeg_audio],
 outputs=[output_video],
 runconfig=amlcompute_run_config,
 source_directory=scripts_folder
)

# Create environment, parallel step run config and parallel run step

In [None]:
from azureml.core import Environment
from azureml.core.runconfig import DEFAULT_GPU_IMAGE

parallel_cd = CondaDependencies()

parallel_cd.add_channel("pytorch")
parallel_cd.add_conda_package("pytorch")
parallel_cd.add_conda_package("torchvision")
parallel_cd.add_conda_package("pillow<7") # needed for torchvision==0.4.0
parallel_cd.add_pip_package("azureml-core")
parallel_cd.add_pip_package("azureml-dataset-runtime[fuse]")

styleenvironment = Environment(name="styleenvironment")
styleenvironment.python.conda_dependencies=parallel_cd
styleenvironment.docker.base_image = DEFAULT_GPU_IMAGE

In [None]:
from azureml.pipeline.core import PipelineParameter
from azureml.pipeline.steps import ParallelRunConfig

parallel_run_config = ParallelRunConfig(
 environment=styleenvironment,
 entry_script='transform.py',
 output_action='summary_only',
 mini_batch_size="1",
 error_threshold=1,
 source_directory=scripts_folder,
 compute_target=gpu_cluster, 
 node_count=nodecount_param,
 process_count_per_node=2
)

In [None]:
from azureml.pipeline.steps import ParallelRunStep
from datetime import datetime

parallel_step_name = 'styletransfer-' + datetime.now().strftime('%Y%m%d%H%M')

distributed_style_transfer_step = ParallelRunStep(
 name=parallel_step_name,
 inputs=[ffmpeg_images_file_dataset], # Input file share/blob container/file dataset
 output=processed_images, # Output file share/blob container
 arguments=["--style", style_param],
 parallel_run_config=parallel_run_config,
 allow_reuse=False #[optional - default value True]
)

# Run the pipeline

In [None]:
pipeline = Pipeline(workspace=ws, steps=[stitch_video_step])

pipeline.validate()

In [None]:
# submit the pipeline and provide values for the PipelineParameters used in the pipeline
pipeline_run = Experiment(ws, 'styletransfer_parallel_mosaic').submit(pipeline)

# Monitor pipeline run

The pipeline run status could be checked in Azure Machine Learning portal (https://ml.azure.com). The link to the pipeline run could be retrieved by inspecting the `pipeline_run` object.



In [None]:
# This will output information of the pipeline run, including the link to the details page of portal.
pipeline_run

### Optional: View detailed logs (streaming) 

In [None]:
# Wait the run for completion and show output log to console
pipeline_run.wait_for_completion(show_output=True)

# Download output video

Downloads the video in `output_video` folder

In [None]:
def download_video(run, target_dir=None):
 stitch_run = run.find_step_run(stitch_video_step.name)[0]
 port_data = stitch_run.get_output_data(output_video.name)
 port_data.download(target_dir, show_progress=True)

In [None]:
pipeline_run.wait_for_completion()
download_video(pipeline_run, "output_video_mosaic")

# Publish pipeline

In [None]:
pipeline_name = "style-transfer-batch-inference"
print(pipeline_name)

published_pipeline = pipeline.publish(
 name=pipeline_name, 
 description=pipeline_name)
print("Newly published pipeline id: {}".format(published_pipeline.id))

# Get published pipeline
This is another way to get the published pipeline.

In [None]:
from azureml.pipeline.core import PublishedPipeline

# You could retrieve all pipelines that are published, or 
# just get the published pipeline object that you have the ID for.

# Get all published pipeline objects in the workspace
all_pub_pipelines = PublishedPipeline.list(ws)

# We will iterate through the list of published pipelines and 
# use the last ID in the list for Schelue operations: 
print("Published pipelines found in the workspace:")
for pub_pipeline in all_pub_pipelines:
 print("Name:", pub_pipeline.name,"\tDescription:", pub_pipeline.description, "\tId:", pub_pipeline.id, "\tStatus:", pub_pipeline.status)
 if(pub_pipeline.name == pipeline_name):
 published_pipeline = pub_pipeline

print("Published pipeline id: {}".format(published_pipeline.id))

# Run pipeline through REST calls for other styles

# Get AAD token

In [None]:
from azureml.core.authentication import InteractiveLoginAuthentication
import requests

auth = InteractiveLoginAuthentication()
aad_token = auth.get_authentication_header()

# Get endpoint URL

In [None]:
rest_endpoint = published_pipeline.endpoint
print("Pipeline REST endpoing: {}".format(rest_endpoint))

# Send request and monitor

In [None]:
experiment_name = 'styletransfer_parallel_candy'
response = requests.post(rest_endpoint, 
 headers=aad_token,
 json={"ExperimentName": experiment_name,
 "ParameterAssignments": {"style": "candy", "NodeCount": 3}})

run_id = response.json()["Id"]

from azureml.pipeline.core.run import PipelineRun
published_pipeline_run_candy = PipelineRun(ws.experiments[experiment_name], run_id)

# Show detail information of run
published_pipeline_run_candy

# Download output from re-run

In [None]:
published_pipeline_run_candy.wait_for_completion()

In [None]:
download_video(published_pipeline_run_candy, target_dir="output_video_candy")