Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 5

dt_reg_traverse_model.py

1 # file: dt_reg_traverse_model.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! C++ example of decision tree classification model traversal.
19 # !
20 # ! The program trains the decision tree classification model on a training
21 # ! datasetFileName and prints the trained model by its depth-first traversing.
22 # !*****************************************************************************
23 
24 #
25 
26 
27 #
28 
29 from __future__ import print_function
30 
31 from daal.algorithms import regression
32 from daal.algorithms import decision_tree
33 import daal.algorithms.decision_tree.regression
34 import daal.algorithms.decision_tree.regression.training
35 
36 from daal.data_management import FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
37 
38 # Input data set parameters
39 trainDatasetFileName = "../data/batch/decision_tree_train.csv"
40 pruneDatasetFileName = "../data/batch/decision_tree_prune.csv"
41 
42 nFeatures = 5 # Number of features in training and testing data sets
43 
44 
45 def trainModel():
46 
47  # Initialize FileDataSource to retrieve the input data from a .csv file
48  trainDataSource = FileDataSource(
49  trainDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
50  )
51 
52  # Create Numeric Tables for training data and dependent variables
53  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
54  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
55  mergedData = MergedNumericTable(trainData, trainGroundTruth)
56 
57  # Retrieve the data from the input file
58  trainDataSource.loadDataBlock(mergedData)
59 
60  # Initialize FileDataSource<CSVFeatureManager> to retrieve the pruning input data from a .csv file
61  pruneDataSource = FileDataSource(
62  pruneDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
63  )
64 
65  # Create Numeric Tables for pruning data and dependent variables
66  pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
67  pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
68  pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
69 
70  # Retrieve the data from the pruning input file
71  pruneDataSource.loadDataBlock(pruneMergedData)
72 
73  # Create an algorithm object to train the Decision tree model
74  algorithm = decision_tree.regression.training.Batch()
75 
76  # Pass the training data set, dependent variables, and pruning dataset with dependent variables to the algorithm
77  algorithm.input.set(decision_tree.regression.training.data, trainData)
78  algorithm.input.set(decision_tree.regression.training.dependentVariables, trainGroundTruth)
79  algorithm.input.set(decision_tree.regression.training.dataForPruning, pruneData)
80  algorithm.input.set(decision_tree.regression.training.dependentVariablesForPruning, pruneGroundTruth)
81 
82  # Train the Decision tree model and return the results
83  return algorithm.compute()
84 
85 
86 # Visitor class implementing NodeVisitor interface, prints out tree nodes of the model when it is called back by model traversal method
87 class PrintNodeVisitor(regression.TreeNodeVisitor):
88 
89  def __init__(self):
90  super(PrintNodeVisitor, self).__init__()
91 
92  def onLeafNode(self, level, response):
93 
94  for i in range(level):
95  print(" ", end='')
96  print("Level {}, leaf node. Response value = {:.4g}".format(level, response))
97  return True
98 
99 
100  def onSplitNode(self, level, featureIndex, featureValue):
101 
102  for i in range(level):
103  print(" ", end='')
104  print("Level {}, split node. Feature index = {}, feature value = {:.4g}".format(level, featureIndex, featureValue))
105  return True
106 
107 
108 def printModel(m):
109  visitor = PrintNodeVisitor()
110  m.traverseDF(visitor)
111 
112 if __name__ == "__main__":
113 
114  trainingResult = trainModel()
115  printModel(trainingResult.get(decision_tree.regression.training.model))

For more complete information about compiler optimizations, see our Optimization Notice.