{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Copyright (c) Microsoft Corporation. All rights reserved. \n", "\n", "Licensed under the MIT License." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/azure-arcadia/Synapse_Session_Scala_Support.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Interactive Spark Session on Synapse Spark Pool" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install -U \"azureml-synapse\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For JupyterLab, please additionally run:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!jupyter lab build --minimize=False" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## PLEASE restart kernel and then refresh web page before starting spark session." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 0. Magic Usage" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2020-06-05T03:22:14.965395Z", "iopub.status.busy": "2020-06-05T03:22:14.965395Z", "iopub.status.idle": "2020-06-05T03:22:14.970398Z", "shell.execute_reply": "2020-06-05T03:22:14.969397Z", "shell.execute_reply.started": "2020-06-05T03:22:14.965395Z" }, "gather": { "logged": 1615594584642 } }, "outputs": [], "source": [ "# show help\n", "%synapse ?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1. Start Synapse Session" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1615577715289 } }, "outputs": [], "source": [ "%synapse start -c linktestpool --start-timeout 1000" ] }, { "cell_type": "markdown", "metadata": { "nteract": { "transient": { "deleting": false } } }, "source": [ "# 2. Use Scala" ] }, { "cell_type": "markdown", "metadata": { "nteract": { "transient": { "deleting": false } } }, "source": [ "## (1) Read Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "%%synapse scala\n", "\n", "var df = spark.read.option(\"header\", \"true\").csv(\"wasbs://demo@dprepdata.blob.core.windows.net/Titanic.csv\")\n", "df.show(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## (2) Use Scala Sql" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "%%synapse scala\n", "\n", "df.createOrReplaceTempView(\"titanic\")\n", "var sqlDF = spark.sql(\"SELECT Name, Fare from titanic\")\n", "sqlDF.show(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Stop Session" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "%synapse stop" ] } ], "metadata": { "authors": [ { "name": "feli1" } ], "kernelspec": { "display_name": "Python 3.8 - AzureML", "language": "python", "name": "python38-azureml" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "nteract": { "version": "0.28.0" } }, "nbformat": 4, "nbformat_minor": 4 }