Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 1

df_cls_traverse_model.py

1 # file: df_cls_traverse_model.py
2 #===============================================================================
3 # Copyright 2014-2017 Intel Corporation All Rights Reserved.
4 #
5 # The source code, information and material ("Material") contained herein is
6 # owned by Intel Corporation or its suppliers or licensors, and title to such
7 # Material remains with Intel Corporation or its suppliers or licensors. The
8 # Material contains proprietary information of Intel or its suppliers and
9 # licensors. The Material is protected by worldwide copyright laws and treaty
10 # provisions. No part of the Material may be used, copied, reproduced,
11 # modified, published, uploaded, posted, transmitted, distributed or disclosed
12 # in any way without Intel's prior express written permission. No license under
13 # any patent, copyright or other intellectual property rights in the Material
14 # is granted to or conferred upon you, either expressly, by implication,
15 # inducement, estoppel or otherwise. Any license under such intellectual
16 # property rights must be express and approved by Intel in writing.
17 #
18 # Unless otherwise agreed by Intel in writing, you may not remove or alter this
19 # notice or any other notice embedded in Materials by Intel or Intel's
20 # suppliers or licensors in any way.
21 #===============================================================================
22 
23 #
24 # ! Content:
25 # ! Python example of decision forest classification model traversal.
26 # !
27 # ! The program trains the decision forest classification model on a training
28 # ! datasetFileName and prints the trained model by its depth-first traversing.
29 # !*****************************************************************************
30 
31 #
32 
33 
34 #
35 from __future__ import print_function
36 
37 from daal.algorithms import classifier
38 from daal.algorithms import decision_forest
39 import daal.algorithms.decision_forest.classification
40 import daal.algorithms.decision_forest.classification.training
41 
42 from daal.data_management import (
43  FileDataSource, HomogenNumericTable, MergedNumericTable, NumericTableIface, DataSourceIface, data_feature_utils
44 )
45 
46 # Input data set parameters
47 trainDatasetFileName = "../data/batch/df_classification_train.csv"
48 categoricalFeaturesIndices = [2]
49 nFeatures = 3 # Number of features in training and testing data sets
50 
51 # Decision forest parameters
52 nTrees = 2
53 minObservationsInLeafNode = 8
54 maxTreeDepth = 15
55 
56 nClasses = 5 # Number of classes
57 
58 
59 def trainModel():
60 
61  # Create Numeric Tables for training data and dependent variables
62  trainData, trainDependentVariable = loadData(trainDatasetFileName)
63 
64  # Create an algorithm object to train the decision forest classification model
65  algorithm = decision_forest.classification.training.Batch(nClasses)
66 
67  # Pass a training data set and dependent values to the algorithm
68  algorithm.input.set(classifier.training.data, trainData)
69  algorithm.input.set(classifier.training.labels, trainDependentVariable)
70 
71  algorithm.parameter.nTrees = nTrees
72  algorithm.parameter.featuresPerNode = nFeatures
73  algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
74  algorithm.parameter.maxTreeDepth = maxTreeDepth
75 
76  # Build the decision forest classification model and return the result
77  return algorithm.compute()
78 
79 
80 def loadData(fileName):
81 
82  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
83  trainDataSource = FileDataSource(
84  fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
85  )
86 
87  # Create Numeric Tables for training data and dependent variables
88  data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
89  dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
90  mergedData = MergedNumericTable(data, dependentVar)
91 
92  # Retrieve the data from input file
93  trainDataSource.loadDataBlock(mergedData)
94 
95  dictionary = data.getDictionary()
96  for i in range(len(categoricalFeaturesIndices)):
97  dictionary[categoricalFeaturesIndices[i]].featureType = data_feature_utils.DAAL_CATEGORICAL
98 
99  return data, dependentVar
100 
101 
102 # Visitor class implementing NodeVisitor interface, prints out tree nodes of the model when it is called back by model traversal method
103 class PrintNodeVisitor(classifier.TreeNodeVisitor):
104 
105  def __init__(self):
106  super(PrintNodeVisitor, self).__init__()
107 
108  def onLeafNode(self, level, response):
109 
110  for i in range(level):
111  print(" ", end='')
112  print("Level {}, leaf node. Response value = {}".format(level, response))
113  return True
114 
115  def onSplitNode(self, level, featureIndex, featureValue):
116 
117  for i in range(level):
118  print(" ", end='')
119  print("Level {}, split node. Feature index = {}, feature value = {:.6g}".format(level, featureIndex, featureValue))
120  return True
121 
122 
123 def printModel(m):
124  visitor = PrintNodeVisitor()
125  print("Number of trees: {}".format(m.numberOfTrees()))
126  for i in range(m.numberOfTrees()):
127  print("Tree #{}".format(i))
128  m.traverseDF(i, visitor)
129 
130 
131 if __name__ == "__main__":
132 
133  trainingResult = trainModel()
134  printModel(trainingResult.get(classifier.training.model))

For more complete information about compiler optimizations, see our Optimization Notice.