MachineLearningNotebooks/how-to-use-azureml/deployment/spark/score.py

import traceback
from pyspark.ml.linalg import VectorUDT
from azureml.core.model import Model
from pyspark.ml.classification import LogisticRegressionModel
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType
from pyspark.sql import SQLContext
from pyspark import SparkContext

sc = SparkContext.getOrCreate()
sqlContext = SQLContext(sc)
spark = sqlContext.sparkSession

input_schema = StructType([StructField("features", VectorUDT()), StructField("label", DoubleType())])
reader = spark.read
reader.schema(input_schema)


def init():
    global model
    # note here "iris.model" is the name of the model registered under the workspace
    # this call should return the path to the model.pkl file on the local disk.
    model_path = Model.get_model_path('iris.model')
    # Load the model file back into a LogisticRegression model
    model = LogisticRegressionModel.load(model_path)


def run(data):
    try:
        input_df = reader.json(sc.parallelize([data]))
        result = model.transform(input_df)
        # you can return any datatype as long as it is JSON-serializable
        return result.collect()[0]['prediction']
    except Exception as e:
        traceback.print_exc()
        error = str(e)
        return error