Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 4

df_cls_traverse_model.py

Deprecation Notice: With the introduction of daal4py, a package that supersedes PyDAAL, Intel is deprecating PyDAAL and will discontinue support starting with Intel® DAAL 2021 and Intel® Distribution for Python 2021. Until then Intel will continue to provide compatible pyDAAL pip and conda packages for newer releases of Intel DAAL and make it available in open source. However, Intel will not add the new features of Intel DAAL to pyDAAL. Intel recommends developers switch to and use daal4py.

Note: To find daal4py examples, refer to daal4py documentation or browse github repository.

1 # file: df_cls_traverse_model.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example of decision forest classification model traversal.
19 # !
20 # ! The program trains the decision forest classification model on a training
21 # ! datasetFileName and prints the trained model by its depth-first traversing.
22 # !*****************************************************************************
23 
24 #
25 ## <a name = "DAAL-EXAMPLE-PY-DF_CLS_TRAVERSE_MODEL"></a>
26 ## \example df_cls_traverse_model.py
27 #
28 from __future__ import print_function
29 
30 from daal.algorithms import classifier
31 from daal.algorithms import decision_forest
32 import daal.algorithms.decision_forest.classification
33 import daal.algorithms.decision_forest.classification.training
34 
35 from daal.data_management import (
36  FileDataSource, HomogenNumericTable, MergedNumericTable, NumericTableIface, DataSourceIface, features
37 )
38 
39 # Input data set parameters
40 trainDatasetFileName = "../data/batch/df_classification_train.csv"
41 categoricalFeaturesIndices = [2]
42 nFeatures = 3 # Number of features in training and testing data sets
43 
44 # Decision forest parameters
45 nTrees = 2
46 minObservationsInLeafNode = 8
47 maxTreeDepth = 15
48 
49 nClasses = 5 # Number of classes
50 
51 
52 def trainModel():
53 
54  # Create Numeric Tables for training data and dependent variables
55  trainData, trainDependentVariable = loadData(trainDatasetFileName)
56 
57  # Create an algorithm object to train the decision forest classification model
58  algorithm = decision_forest.classification.training.Batch(nClasses)
59 
60  # Pass a training data set and dependent values to the algorithm
61  algorithm.input.set(classifier.training.data, trainData)
62  algorithm.input.set(classifier.training.labels, trainDependentVariable)
63 
64  algorithm.parameter.nTrees = nTrees
65  algorithm.parameter.featuresPerNode = nFeatures
66  algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
67  algorithm.parameter.maxTreeDepth = maxTreeDepth
68 
69  # Build the decision forest classification model and return the result
70  return algorithm.compute()
71 
72 
73 def loadData(fileName):
74 
75  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
76  trainDataSource = FileDataSource(
77  fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
78  )
79 
80  # Create Numeric Tables for training data and dependent variables
81  data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
82  dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
83  mergedData = MergedNumericTable(data, dependentVar)
84 
85  # Retrieve the data from input file
86  trainDataSource.loadDataBlock(mergedData)
87 
88  dictionary = data.getDictionary()
89  for i in range(len(categoricalFeaturesIndices)):
90  dictionary[categoricalFeaturesIndices[i]].featureType = features.DAAL_CATEGORICAL
91 
92  return data, dependentVar
93 
94 
95 # Visitor class implementing NodeVisitor interface, prints out tree nodes of the model when it is called back by model traversal method
96 class PrintNodeVisitor(classifier.TreeNodeVisitor):
97 
98  def __init__(self):
99  super(PrintNodeVisitor, self).__init__()
100 
101  def onLeafNode(self, level, response):
102 
103  for i in range(level):
104  print(" ", end='')
105  print("Level {}, leaf node. Response value = {}".format(level, response))
106  return True
107 
108  def onSplitNode(self, level, featureIndex, featureValue):
109 
110  for i in range(level):
111  print(" ", end='')
112  print("Level {}, split node. Feature index = {}, feature value = {:.6g}".format(level, featureIndex, featureValue))
113  return True
114 
115 
116 def printModel(m):
117  visitor = PrintNodeVisitor()
118  print("Number of trees: {}".format(m.getNumberOfTrees()))
119  for i in range(m.getNumberOfTrees()):
120  print("Tree #{}".format(i))
121  m.traverseDF(i, visitor)
122 
123 
124 if __name__ == "__main__":
125 
126  trainingResult = trainModel()
127  printModel(trainingResult.get(classifier.training.model))

For more complete information about compiler optimizations, see our Optimization Notice.