Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 3

df_reg_traverse_model.py

1 # file: df_reg_traverse_model.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example of decision forest regression model traversal.
19 # !
20 # ! The program trains the decision forest regression model on a training
21 # ! datasetFileName and prints the trained model by its depth-first traversing.
22 # !*****************************************************************************
23 
24 #
25 
26 
27 #
28 from __future__ import print_function
29 
30 from daal import algorithms
31 from daal.algorithms import decision_forest
32 import daal.algorithms.decision_forest.regression
33 import daal.algorithms.decision_forest.regression.training
34 
35 from daal.data_management import (
36  FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable, data_feature_utils
37 )
38 
39 # Input data set parameters
40 trainDatasetFileName = "../data/batch/df_regression_train.csv"
41 categoricalFeaturesIndices = [3]
42 nFeatures = 13 # Number of features in training and testing data sets
43 
44 # Decision forest parameters
45 nTrees = 2
46 
47 
48 def trainModel():
49 
50  # Create Numeric Tables for training data and dependent variables
51  trainData, trainDependentVariable = loadData(trainDatasetFileName)
52 
53  # Create an algorithm object to train the decision forest regression model with the default method
54  algorithm = decision_forest.regression.training.Batch()
55 
56  # Pass a training data set and dependent values to the algorithm
57  algorithm.input.set(decision_forest.regression.training.data, trainData)
58  algorithm.input.set(decision_forest.regression.training.dependentVariable, trainDependentVariable)
59 
60  algorithm.parameter.nTrees = nTrees
61 
62  # Build the decision forest regression model and return the result
63  return algorithm.compute()
64 
65 
66 def loadData(fileName):
67 
68  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
69  trainDataSource = FileDataSource(
70  fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
71  )
72 
73  # Create Numeric Tables for training data and dependent variables
74  data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
75  dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
76  mergedData = MergedNumericTable(data, dependentVar)
77 
78  # Retrieve the data from input file
79  trainDataSource.loadDataBlock(mergedData)
80 
81  dictionary = data.getDictionary()
82  for i in range(len(categoricalFeaturesIndices)):
83  dictionary[categoricalFeaturesIndices[i]].featureType = data_feature_utils.DAAL_CATEGORICAL
84 
85  return data, dependentVar
86 
87 
88 # Visitor class implementing NodeVisitor interface, prints out tree nodes of the model when it is called back by model traversal method
89 class PrintNodeVisitor(algorithms.regression.TreeNodeVisitor):
90 
91  def __init__(self):
92  super(PrintNodeVisitor, self).__init__()
93 
94  def onLeafNode(self, level, response):
95 
96  for i in range(level):
97  print(" ", end='')
98  print("Level {}, leaf node. Response value = {:.4g}".format(level, response))
99  return True
100 
101 
102  def onSplitNode(self, level, featureIndex, featureValue):
103 
104  for i in range(level):
105  print(" ", end='')
106  print("Level {}, split node. Feature index = {}, feature value = {:.4g}".format(level, featureIndex, featureValue))
107  return True
108 
109 
110 def printModel(m):
111  visitor = PrintNodeVisitor()
112  print("Number of trees: {}".format(m.numberOfTrees()))
113  for i in range(m.numberOfTrees()):
114  print("Tree #{}".format(i))
115  m.traverseDF(i, visitor)
116 
117 if __name__ == "__main__":
118 
119  trainingResult = trainModel()
120  printModel(trainingResult.get(decision_forest.regression.training.model))

For more complete information about compiler optimizations, see our Optimization Notice.