mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-21 10:05:09 -05:00
38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
import azureml.dataprep as dprep
|
|
import azureml.core
|
|
import pandas as pd
|
|
import logging
|
|
import os
|
|
import datetime
|
|
import shutil
|
|
|
|
from azureml.core import Workspace, Datastore, Dataset, Experiment, Run
|
|
from sklearn.model_selection import train_test_split
|
|
from azureml.core.compute import ComputeTarget, AmlCompute
|
|
from azureml.core.compute_target import ComputeTargetException
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
|
|
run = Run.get_context()
|
|
workspace = run.experiment.workspace
|
|
|
|
dataset_name = 'clean_Titanic_tutorial'
|
|
|
|
dataset = Dataset.get(workspace=workspace, name=dataset_name)
|
|
df = dataset.to_pandas_dataframe()
|
|
|
|
x_col = ['Pclass', 'Sex', 'SibSp', 'Parch']
|
|
y_col = ['Survived']
|
|
x_df = df.loc[:, x_col]
|
|
y_df = df.loc[:, y_col]
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=223)
|
|
|
|
data = {"train": {"X": x_train, "y": y_train},
|
|
|
|
"test": {"X": x_test, "y": y_test}}
|
|
|
|
clf = DecisionTreeClassifier().fit(data["train"]["X"], data["train"]["y"])
|
|
|
|
print('Accuracy of Decision Tree classifier on training set: {:.2f}'.format(clf.score(x_train, y_train)))
|
|
print('Accuracy of Decision Tree classifier on test set: {:.2f}'.format(clf.score(x_test, y_test)))
|