Files
MachineLearningNotebooks/work-with-data/datasets/datasets-tutorial/train-dataset/train.py

38 lines
1.2 KiB
Python

import azureml.dataprep as dprep
import azureml.core
import pandas as pd
import logging
import os
import datetime
import shutil
from azureml.core import Workspace, Datastore, Dataset, Experiment, Run
from sklearn.model_selection import train_test_split
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from sklearn.tree import DecisionTreeClassifier
run = Run.get_context()
workspace = run.experiment.workspace
dataset_name = 'clean_Titanic_tutorial'
dataset = Dataset.get(workspace=workspace, name=dataset_name)
df = dataset.to_pandas_dataframe()
x_col = ['Pclass', 'Sex', 'SibSp', 'Parch']
y_col = ['Survived']
x_df = df.loc[:, x_col]
y_df = df.loc[:, y_col]
x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=223)
data = {"train": {"X": x_train, "y": y_train},
"test": {"X": x_test, "y": y_test}}
clf = DecisionTreeClassifier().fit(data["train"]["X"], data["train"]["y"])
print('Accuracy of Decision Tree classifier on training set: {:.2f}'.format(clf.score(x_train, y_train)))
print('Accuracy of Decision Tree classifier on test set: {:.2f}'.format(clf.score(x_test, y_test)))