mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-23 02:52:39 -05:00
Delete replace-fill-error.ipynb
This commit is contained in:
@@ -1,239 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Replace, Fill, Error\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can use the methods in this notebook to change values in your dataset.\n",
|
||||
"\n",
|
||||
"* <a href='#replace'>replace</a> - use this method to replace a value with another value. You can also use this to replace null with a value, or a value with null\n",
|
||||
"* <a href='#error'>error</a> - use this method to replace a value with an error.\n",
|
||||
"* <a href='#fill_nulls'>fill_nulls</a> - this method lets you fill all nulls in a column with a certain value.\n",
|
||||
"* <a href='#fill_errors'>fill_errors</a> - this method lets you fill all errors in a column with a certain value."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.dataprep as dprep"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow = dprep.read_csv('../data/crime-spring.csv')\n",
|
||||
"dflow.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow = dflow.to_datetime('Date', ['%m/%d/%Y %H:%M'])\n",
|
||||
"dflow = dflow.to_number(['IUCR', 'District', 'FBI Code'])\n",
|
||||
"dflow.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Replace <a id='replace'></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### String\n",
|
||||
"Use `replace` to swap a string value with another string value."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow = dflow.replace('Primary Type', 'THEFT', 'STOLEN')\n",
|
||||
"head = dflow.head(5)\n",
|
||||
"head"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use `replace` to remove a certain string value from the column, replacing it with null. Note that Pandas shows null values as None."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow = dflow.replace('Primary Type', 'DECEPTIVE PRACTICE', None)\n",
|
||||
"head = dflow.head(5)\n",
|
||||
"head"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Numeric\n",
|
||||
"Use `replace` to swap a numeric value with another numeric value."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow = dflow.replace('District', 5, 1)\n",
|
||||
"head = dflow.head(5)\n",
|
||||
"head"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Date\n",
|
||||
"Use `replace` to swap in a new Date for an existing Date in the data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime, timezone\n",
|
||||
"dflow = dflow.replace('Date', \n",
|
||||
" datetime(2016, 4, 15, 9, 0, tzinfo=timezone.utc), \n",
|
||||
" datetime(2018, 7, 4, 0, 0, tzinfo=timezone.utc))\n",
|
||||
"head = dflow.head(5)\n",
|
||||
"head"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Error <a id='error'></a>\n",
|
||||
"\n",
|
||||
"The `error` method lets you create Error values. You can pass to this function the value that you want to find, along with the Error code to use in any Errors created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow = dflow.error('IUCR', 890, 'Invalid value')\n",
|
||||
"head = dflow.head(5)\n",
|
||||
"head"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fill Nulls <a id='fill_nulls'></a>\n",
|
||||
"\n",
|
||||
"Use the `fill_nulls` method to replace all null values in columns with another value. This is similar to Panda's fillna() method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow = dflow.fill_nulls('Primary Type', 'N/A')\n",
|
||||
"head = dflow.head(5)\n",
|
||||
"head"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fill Errors <a id='fill_errors'></a>\n",
|
||||
"\n",
|
||||
"Use the `fill_errors` method to replace all error values in columns with another value."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow = dflow.fill_errors('IUCR', -1)\n",
|
||||
"head = dflow.head(5)\n",
|
||||
"head"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sihhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.4"
|
||||
},
|
||||
"notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License."
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user