Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 5

df_reg_traverse_model.py

Deprecation Notice: With the introduction of daal4py, a package that supersedes PyDAAL, Intel is deprecating PyDAAL and will discontinue support starting with Intel® DAAL 2021 and Intel® Distribution for Python 2021. Until then Intel will continue to provide compatible pyDAAL pip and conda packages for newer releases of Intel DAAL and make it available in open source. However, Intel will not add the new features of Intel DAAL to pyDAAL. Intel recommends developers switch to and use daal4py.

Note: To find daal4py examples, refer to daal4py documentation or browse github repository.

1 # file: df_reg_traverse_model.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example of decision forest regression model traversal.
19 # !
20 # ! The program trains the decision forest regression model on a training
21 # ! datasetFileName and prints the trained model by its depth-first traversing.
22 # !*****************************************************************************
23 
24 #
25 ## <a name = "DAAL-EXAMPLE-PY-DF_REG_TRAVERSE_MODEL"></a>
26 ## \example df_reg_traverse_model.py
27 #
28 from __future__ import print_function
29 
30 from daal import algorithms
31 from daal.algorithms import decision_forest
32 import daal.algorithms.decision_forest.regression
33 import daal.algorithms.decision_forest.regression.training
34 
35 from daal.data_management import (
36  FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable, features
37 )
38 
39 # Input data set parameters
40 trainDatasetFileName = "../data/batch/df_regression_train.csv"
41 categoricalFeaturesIndices = [3]
42 nFeatures = 13 # Number of features in training and testing data sets
43 
44 # Decision forest parameters
45 nTrees = 2
46 
47 
48 def trainModel():
49 
50  # Create Numeric Tables for training data and dependent variables
51  trainData, trainDependentVariable = loadData(trainDatasetFileName)
52 
53  # Create an algorithm object to train the decision forest regression model with the default method
54  algorithm = decision_forest.regression.training.Batch()
55 
56  # Pass a training data set and dependent values to the algorithm
57  algorithm.input.set(decision_forest.regression.training.data, trainData)
58  algorithm.input.set(decision_forest.regression.training.dependentVariable, trainDependentVariable)
59 
60  algorithm.parameter.nTrees = nTrees
61 
62  # Build the decision forest regression model and return the result
63  return algorithm.compute()
64 
65 
66 def loadData(fileName):
67 
68  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
69  trainDataSource = FileDataSource(
70  fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
71  )
72 
73  # Create Numeric Tables for training data and dependent variables
74  data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
75  dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
76  mergedData = MergedNumericTable(data, dependentVar)
77 
78  # Retrieve the data from input file
79  trainDataSource.loadDataBlock(mergedData)
80 
81  dictionary = data.getDictionary()
82  for i in range(len(categoricalFeaturesIndices)):
83  dictionary[categoricalFeaturesIndices[i]].featureType = features.DAAL_CATEGORICAL
84 
85  return data, dependentVar
86 
87 
88 # Visitor class implementing NodeVisitor interface, prints out tree nodes of the model when it is called back by model traversal method
89 class PrintNodeVisitor(algorithms.regression.TreeNodeVisitor):
90 
91  def __init__(self):
92  super(PrintNodeVisitor, self).__init__()
93 
94  def onLeafNode(self, level, response):
95 
96  for i in range(level):
97  print(" ", end='')
98  print("Level {}, leaf node. Response value = {:.4g}".format(level, response))
99  return True
100 
101 
102  def onSplitNode(self, level, featureIndex, featureValue):
103 
104  for i in range(level):
105  print(" ", end='')
106  print("Level {}, split node. Feature index = {}, feature value = {:.4g}".format(level, featureIndex, featureValue))
107  return True
108 
109 
110 def printModel(m):
111  visitor = PrintNodeVisitor()
112  print("Number of trees: {}".format(m.getNumberOfTrees()))
113  for i in range(m.getNumberOfTrees()):
114  print("Tree #{}".format(i))
115  m.traverseDF(i, visitor)
116 
117 if __name__ == "__main__":
118 
119  trainingResult = trainModel()
120  printModel(trainingResult.get(decision_forest.regression.training.model))

For more complete information about compiler optimizations, see our Optimization Notice.