Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 5

dt_reg_traverse_model.py

Deprecation Notice: With the introduction of daal4py, a package that supersedes PyDAAL, Intel is deprecating PyDAAL and will discontinue support starting with Intel® DAAL 2021 and Intel® Distribution for Python 2021. Until then Intel will continue to provide compatible pyDAAL pip and conda packages for newer releases of Intel DAAL and make it available in open source. However, Intel will not add the new features of Intel DAAL to pyDAAL. Intel recommends developers switch to and use daal4py.

Note: To find daal4py examples, refer to daal4py documentation or browse github repository.

1 # file: dt_reg_traverse_model.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! C++ example of decision tree classification model traversal.
19 # !
20 # ! The program trains the decision tree classification model on a training
21 # ! datasetFileName and prints the trained model by its depth-first traversing.
22 # !*****************************************************************************
23 
24 #
25 ## <a name = "DAAL-EXAMPLE-PY-DT_REG_TRAVERSE_MODEL"></a>
26 ## \example dt_reg_traverse_model.py
27 #
28 
29 from __future__ import print_function
30 
31 from daal.algorithms import regression
32 from daal.algorithms import decision_tree
33 import daal.algorithms.decision_tree.regression
34 import daal.algorithms.decision_tree.regression.training
35 
36 from daal.data_management import FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
37 
38 # Input data set parameters
39 trainDatasetFileName = "../data/batch/decision_tree_train.csv"
40 pruneDatasetFileName = "../data/batch/decision_tree_prune.csv"
41 
42 nFeatures = 5 # Number of features in training and testing data sets
43 
44 
45 def trainModel():
46 
47  # Initialize FileDataSource to retrieve the input data from a .csv file
48  trainDataSource = FileDataSource(
49  trainDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
50  )
51 
52  # Create Numeric Tables for training data and dependent variables
53  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
54  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
55  mergedData = MergedNumericTable(trainData, trainGroundTruth)
56 
57  # Retrieve the data from the input file
58  trainDataSource.loadDataBlock(mergedData)
59 
60  # Initialize FileDataSource<CSVFeatureManager> to retrieve the pruning input data from a .csv file
61  pruneDataSource = FileDataSource(
62  pruneDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
63  )
64 
65  # Create Numeric Tables for pruning data and dependent variables
66  pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
67  pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
68  pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
69 
70  # Retrieve the data from the pruning input file
71  pruneDataSource.loadDataBlock(pruneMergedData)
72 
73  # Create an algorithm object to train the Decision tree model
74  algorithm = decision_tree.regression.training.Batch()
75 
76  # Pass the training data set, dependent variables, and pruning dataset with dependent variables to the algorithm
77  algorithm.input.set(decision_tree.regression.training.data, trainData)
78  algorithm.input.set(decision_tree.regression.training.dependentVariables, trainGroundTruth)
79  algorithm.input.set(decision_tree.regression.training.dataForPruning, pruneData)
80  algorithm.input.set(decision_tree.regression.training.dependentVariablesForPruning, pruneGroundTruth)
81 
82  # Train the Decision tree model and return the results
83  return algorithm.compute()
84 
85 
86 # Visitor class implementing NodeVisitor interface, prints out tree nodes of the model when it is called back by model traversal method
87 class PrintNodeVisitor(regression.TreeNodeVisitor):
88 
89  def __init__(self):
90  super(PrintNodeVisitor, self).__init__()
91 
92  def onLeafNode(self, level, response):
93 
94  for i in range(level):
95  print(" ", end='')
96  print("Level {}, leaf node. Response value = {:.4g}".format(level, response))
97  return True
98 
99 
100  def onSplitNode(self, level, featureIndex, featureValue):
101 
102  for i in range(level):
103  print(" ", end='')
104  print("Level {}, split node. Feature index = {}, feature value = {:.4g}".format(level, featureIndex, featureValue))
105  return True
106 
107 
108 def printModel(m):
109  visitor = PrintNodeVisitor()
110  m.traverseDF(visitor)
111 
112 if __name__ == "__main__":
113 
114  trainingResult = trainModel()
115  printModel(trainingResult.get(decision_tree.regression.training.model))

For more complete information about compiler optimizations, see our Optimization Notice.