Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 1

df_reg_traverse_model.py

1 # file: df_reg_traverse_model.py
2 #===============================================================================
3 # Copyright 2014-2017 Intel Corporation All Rights Reserved.
4 #
5 # The source code, information and material ("Material") contained herein is
6 # owned by Intel Corporation or its suppliers or licensors, and title to such
7 # Material remains with Intel Corporation or its suppliers or licensors. The
8 # Material contains proprietary information of Intel or its suppliers and
9 # licensors. The Material is protected by worldwide copyright laws and treaty
10 # provisions. No part of the Material may be used, copied, reproduced,
11 # modified, published, uploaded, posted, transmitted, distributed or disclosed
12 # in any way without Intel's prior express written permission. No license under
13 # any patent, copyright or other intellectual property rights in the Material
14 # is granted to or conferred upon you, either expressly, by implication,
15 # inducement, estoppel or otherwise. Any license under such intellectual
16 # property rights must be express and approved by Intel in writing.
17 #
18 # Unless otherwise agreed by Intel in writing, you may not remove or alter this
19 # notice or any other notice embedded in Materials by Intel or Intel's
20 # suppliers or licensors in any way.
21 #===============================================================================
22 
23 #
24 # ! Content:
25 # ! Python example of decision forest regression model traversal.
26 # !
27 # ! The program trains the decision forest regression model on a training
28 # ! datasetFileName and prints the trained model by its depth-first traversing.
29 # !*****************************************************************************
30 
31 #
32 ## <a name = "DAAL-EXAMPLE-PY-DF_REG_TRAVERSE_MODEL"></a>
33 ## \example df_reg_traverse_model.py
34 #
35 from __future__ import print_function
36 
37 from daal import algorithms
38 from daal.algorithms import decision_forest
39 import daal.algorithms.decision_forest.regression
40 import daal.algorithms.decision_forest.regression.training
41 
42 from daal.data_management import (
43  FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable, data_feature_utils
44 )
45 
46 # Input data set parameters
47 trainDatasetFileName = "../data/batch/df_regression_train.csv"
48 categoricalFeaturesIndices = [3]
49 nFeatures = 13 # Number of features in training and testing data sets
50 
51 # Decision forest parameters
52 nTrees = 2
53 
54 
55 def trainModel():
56 
57  # Create Numeric Tables for training data and dependent variables
58  trainData, trainDependentVariable = loadData(trainDatasetFileName)
59 
60  # Create an algorithm object to train the decision forest regression model with the default method
61  algorithm = decision_forest.regression.training.Batch()
62 
63  # Pass a training data set and dependent values to the algorithm
64  algorithm.input.set(decision_forest.regression.training.data, trainData)
65  algorithm.input.set(decision_forest.regression.training.dependentVariable, trainDependentVariable)
66 
67  algorithm.parameter.nTrees = nTrees
68 
69  # Build the decision forest regression model and return the result
70  return algorithm.compute()
71 
72 
73 def loadData(fileName):
74 
75  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
76  trainDataSource = FileDataSource(
77  fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
78  )
79 
80  # Create Numeric Tables for training data and dependent variables
81  data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
82  dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
83  mergedData = MergedNumericTable(data, dependentVar)
84 
85  # Retrieve the data from input file
86  trainDataSource.loadDataBlock(mergedData)
87 
88  dictionary = data.getDictionary()
89  for i in range(len(categoricalFeaturesIndices)):
90  dictionary[categoricalFeaturesIndices[i]].featureType = data_feature_utils.DAAL_CATEGORICAL
91 
92  return data, dependentVar
93 
94 
95 # Visitor class implementing NodeVisitor interface, prints out tree nodes of the model when it is called back by model traversal method
96 class PrintNodeVisitor(algorithms.regression.TreeNodeVisitor):
97 
98  def __init__(self):
99  super(PrintNodeVisitor, self).__init__()
100 
101  def onLeafNode(self, level, response):
102 
103  for i in range(level):
104  print(" ", end='')
105  print("Level {}, leaf node. Response value = {:.4g}".format(level, response))
106  return True
107 
108 
109  def onSplitNode(self, level, featureIndex, featureValue):
110 
111  for i in range(level):
112  print(" ", end='')
113  print("Level {}, split node. Feature index = {}, feature value = {:.4g}".format(level, featureIndex, featureValue))
114  return True
115 
116 
117 def printModel(m):
118  visitor = PrintNodeVisitor()
119  print("Number of trees: {}".format(m.numberOfTrees()))
120  for i in range(m.numberOfTrees()):
121  print("Tree #{}".format(i))
122  m.traverseDF(i, visitor)
123 
124 if __name__ == "__main__":
125 
126  trainingResult = trainModel()
127  printModel(trainingResult.get(decision_forest.regression.training.model))

For more complete information about compiler optimizations, see our Optimization Notice.