Python* API Reference for Intel® Data Analytics Acceleration Library 2019

dt_cls_dense_batch.py

1 # file: dt_cls_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 ## <a name="DAAL-EXAMPLE-PY-DT_CLS_DENSE_BATCH"></a>
17 ## \example dt_cls_dense_batch.py
18 
19 import os
20 import sys
21 
22 from daal.algorithms.decision_tree.classification import prediction, training
23 from daal.algorithms import classifier
24 from daal.data_management import (
25  FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
26 )
27 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
28 if utils_folder not in sys.path:
29  sys.path.insert(0, utils_folder)
30 from utils import printNumericTables
31 
32 DAAL_PREFIX = os.path.join('..', 'data')
33 
34 # Input data set parameters
35 trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_train.csv')
36 pruneDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_prune.csv')
37 testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_test.csv')
38 
39 nFeatures = 5
40 nClasses = 5
41 
42 # Model object for the decision tree classification algorithm
43 model = None
44 predictionResult = None
45 testGroundTruth = None
46 
47 
48 def trainModel():
49  global model
50 
51  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
52  trainDataSource = FileDataSource(
53  trainDatasetFileName,
54  DataSourceIface.notAllocateNumericTable,
55  DataSourceIface.doDictionaryFromContext
56  )
57 
58  # Create Numeric Tables for training data and labels
59  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
60  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
61  mergedData = MergedNumericTable(trainData, trainGroundTruth)
62 
63  # Retrieve the data from the input file
64  trainDataSource.loadDataBlock(mergedData)
65 
66  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
67  pruneDataSource = FileDataSource(
68  pruneDatasetFileName,
69  DataSourceIface.notAllocateNumericTable,
70  DataSourceIface.doDictionaryFromContext
71  )
72 
73  # Create Numeric Tables for pruning data and labels
74  pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
75  pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
76  pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
77 
78  # Retrieve the data from the input file
79  pruneDataSource.loadDataBlock(pruneMergedData)
80 
81  # Create an algorithm object to train the decision tree classification model
82  algorithm = training.Batch(nClasses)
83 
84  # Pass the training data set and dependent values to the algorithm
85  algorithm.input.set(classifier.training.data, trainData)
86  algorithm.input.set(classifier.training.labels, trainGroundTruth)
87  algorithm.input.setTable(training.dataForPruning, pruneData)
88  algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth)
89 
90  # Train the decision tree classification model and retrieve the results of the training algorithm
91  trainingResult = algorithm.compute()
92  model = trainingResult.get(classifier.training.model)
93 
94 def testModel():
95  global testGroundTruth, predictionResult
96 
97  # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
98  testDataSource = FileDataSource(
99  testDatasetFileName,
100  DataSourceIface.notAllocateNumericTable,
101  DataSourceIface.doDictionaryFromContext
102  )
103 
104  # Create Numeric Tables for testing data and labels
105  testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
106  testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
107  mergedData = MergedNumericTable(testData, testGroundTruth)
108 
109  # Retrieve the data from input file
110  testDataSource.loadDataBlock(mergedData)
111 
112  # Create algorithm objects for decision tree classification prediction with the default method
113  algorithm = prediction.Batch()
114 
115  # Pass the testing data set and trained model to the algorithm
116  #print("Number of columns: {}".format(testData.getNumberOfColumns()))
117  algorithm.input.setTable(classifier.prediction.data, testData)
118  algorithm.input.setModel(classifier.prediction.model, model)
119 
120  # Compute prediction results and retrieve algorithm results
121  # (Result class from classifier.prediction)
122  predictionResult = algorithm.compute()
123 
124 
125 def printResults():
126 
127  printNumericTables(
128  testGroundTruth,
129  predictionResult.get(classifier.prediction.prediction),
130  "Ground truth", "Classification results",
131  "Decision tree classification results (first 20 observations):",
132  20, flt64=False
133  )
134 
135 if __name__ == "__main__":
136 
137  trainModel()
138  testModel()
139  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.