From 4c6a28e4ed4dfbd9a9163eedbe7cebcdc2253f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Shan=C3=A9=20Winner?= <43390034+swinner95@users.noreply.github.com> Date: Wed, 21 Aug 2019 10:15:25 -0700 Subject: [PATCH] Delete label-encoder.ipynb --- .../how-to-guides/label-encoder.ipynb | 168 ------------------ 1 file changed, 168 deletions(-) delete mode 100644 work-with-data/dataprep/how-to-guides/label-encoder.ipynb diff --git a/work-with-data/dataprep/how-to-guides/label-encoder.ipynb b/work-with-data/dataprep/how-to-guides/label-encoder.ipynb deleted file mode 100644 index bc7b78c1..00000000 --- a/work-with-data/dataprep/how-to-guides/label-encoder.ipynb +++ /dev/null @@ -1,168 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/work-with-data/dataprep/how-to-guides/label-encoder.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Label Encoder\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Data Prep has the ability to encode labels with values between 0 and (number of classes - 1) using `label_encode`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import azureml.dataprep as dprep\n", - "from datetime import datetime\n", - "dflow = dprep.read_csv(path='../data/crime-spring.csv')\n", - "dflow.head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To use `label_encode` from a Dataflow, simply specify the source column and the new column name. `label_encode` will figure out all the distinct values or classes in the source column, and it will return a new Dataflow with a new column containing the labels." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dflow = dflow.label_encode(source_column='Primary Type', new_column_name='Primary Type Label')\n", - "dflow.head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To have more control over the encoded labels, create a builder with `dataflow.builders.label_encode`.\n", - "The builder allows you to preview and modify the encoded labels before generating a new Dataflow with the results. \n", - "To get started, create a builder object with `dataflow.builders.label_encode` specifying the source column and the new column name. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "builder = dflow.builders.label_encode(source_column='Location Description', new_column_name='Location Description Label')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate the encoded labels, call the `learn` method on the builder object:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "builder.learn()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To check the result, access the generated labels through the property `encoded_labels`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "builder.encoded_labels" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To modify the generated results, just assign a new value to `encoded_labels`. The following example adds a missing label not found in the sample data. `builder.encoded_labels` is saved into a variable `encoded_labels`, modified, and assigned back to `builder.encoded_labels`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "encoded_labels = builder.encoded_labels\n", - "encoded_labels['TOWNHOUSE'] = 6\n", - "\n", - "builder.encoded_labels = encoded_labels\n", - "builder.encoded_labels" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once the desired results are achieved, call `builder.to_dataflow` to get the new Dataflow with the encoded labels." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataflow = builder.to_dataflow()\n", - "dataflow.head(5)" - ] - } - ], - "metadata": { - "authors": [ - { - "name": "sihhu" - } - ], - "kernelspec": { - "display_name": "Python 3.6", - "language": "python", - "name": "python36" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.4" - }, - "notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License." - }, - "nbformat": 4, - "nbformat_minor": 2 -} \ No newline at end of file