diff --git a/work-with-data/dataprep/how-to-guides/replace-fill-error.ipynb b/work-with-data/dataprep/how-to-guides/replace-fill-error.ipynb deleted file mode 100644 index 04dad995..00000000 --- a/work-with-data/dataprep/how-to-guides/replace-fill-error.ipynb +++ /dev/null @@ -1,239 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/work-with-data/dataprep/how-to-guides/replace-fill-error.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Replace, Fill, Error\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can use the methods in this notebook to change values in your dataset.\n", - "\n", - "* replace - use this method to replace a value with another value. You can also use this to replace null with a value, or a value with null\n", - "* error - use this method to replace a value with an error.\n", - "* fill_nulls - this method lets you fill all nulls in a column with a certain value.\n", - "* fill_errors - this method lets you fill all errors in a column with a certain value." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import azureml.dataprep as dprep" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dflow = dprep.read_csv('../data/crime-spring.csv')\n", - "dflow.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dflow = dflow.to_datetime('Date', ['%m/%d/%Y %H:%M'])\n", - "dflow = dflow.to_number(['IUCR', 'District', 'FBI Code'])\n", - "dflow.head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Replace " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### String\n", - "Use `replace` to swap a string value with another string value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dflow = dflow.replace('Primary Type', 'THEFT', 'STOLEN')\n", - "head = dflow.head(5)\n", - "head" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use `replace` to remove a certain string value from the column, replacing it with null. Note that Pandas shows null values as None." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dflow = dflow.replace('Primary Type', 'DECEPTIVE PRACTICE', None)\n", - "head = dflow.head(5)\n", - "head" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Numeric\n", - "Use `replace` to swap a numeric value with another numeric value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dflow = dflow.replace('District', 5, 1)\n", - "head = dflow.head(5)\n", - "head" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Date\n", - "Use `replace` to swap in a new Date for an existing Date in the data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datetime import datetime, timezone\n", - "dflow = dflow.replace('Date', \n", - " datetime(2016, 4, 15, 9, 0, tzinfo=timezone.utc), \n", - " datetime(2018, 7, 4, 0, 0, tzinfo=timezone.utc))\n", - "head = dflow.head(5)\n", - "head" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Error \n", - "\n", - "The `error` method lets you create Error values. You can pass to this function the value that you want to find, along with the Error code to use in any Errors created." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dflow = dflow.error('IUCR', 890, 'Invalid value')\n", - "head = dflow.head(5)\n", - "head" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Fill Nulls \n", - "\n", - "Use the `fill_nulls` method to replace all null values in columns with another value. This is similar to Panda's fillna() method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dflow = dflow.fill_nulls('Primary Type', 'N/A')\n", - "head = dflow.head(5)\n", - "head" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Fill Errors \n", - "\n", - "Use the `fill_errors` method to replace all error values in columns with another value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dflow = dflow.fill_errors('IUCR', -1)\n", - "head = dflow.head(5)\n", - "head" - ] - } - ], - "metadata": { - "authors": [ - { - "name": "sihhu" - } - ], - "kernelspec": { - "display_name": "Python 3.6", - "language": "python", - "name": "python36" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.4" - }, - "notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License." - }, - "nbformat": 4, - "nbformat_minor": 2 -} \ No newline at end of file