Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 4

dt_reg_dense_batch.py

1 # file: dt_reg_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 
17 
18 
19 import os
20 import sys
21 
22 from daal.algorithms.decision_tree.regression import prediction, training
23 from daal.data_management import (
24  FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
25 )
26 
27 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
28 if utils_folder not in sys.path:
29  sys.path.insert(0, utils_folder)
30 from utils import printNumericTables
31 
32 DAAL_PREFIX = os.path.join('..', 'data')
33 
34 # Input data set parameters
35 trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_train.csv')
36 pruneDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_prune.csv')
37 testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_test.csv')
38 
39 nFeatures = 5
40 
41 # Model object for the decision tree regression algorithm
42 model = None
43 predictionResult = None
44 testGroundTruth = None
45 
46 
47 def trainModel():
48  global model
49 
50  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
51  trainDataSource = FileDataSource(
52  trainDatasetFileName,
53  DataSourceIface.notAllocateNumericTable,
54  DataSourceIface.doDictionaryFromContext
55  )
56 
57  # Create Numeric Tables for training data and labels
58  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
59  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
60  mergedData = MergedNumericTable(trainData, trainGroundTruth)
61 
62  # Retrieve the data from the input file
63  trainDataSource.loadDataBlock(mergedData)
64 
65  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
66  pruneDataSource = FileDataSource(
67  pruneDatasetFileName,
68  DataSourceIface.notAllocateNumericTable,
69  DataSourceIface.doDictionaryFromContext
70  )
71 
72  # Create Numeric Tables for pruning data and labels
73  pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
74  pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
75  pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
76 
77  # Retrieve the data from the input file
78  pruneDataSource.loadDataBlock(pruneMergedData)
79 
80  # Create an algorithm object to train the decision tree regression model
81  algorithm = training.Batch()
82 
83  # Pass the training data set and dependent values to the algorithm
84  algorithm.input.set(training.data, trainData)
85  algorithm.input.set(training.dependentVariables, trainGroundTruth)
86  algorithm.input.set(training.dataForPruning, pruneData)
87  algorithm.input.set(training.dependentVariablesForPruning, pruneGroundTruth)
88 
89  # Train the decision tree regression model and retrieve the results of the training algorithm
90  trainingResult = algorithm.compute()
91  model = trainingResult.get(training.model)
92 
93 def testModel():
94  global testGroundTruth, predictionResult
95 
96  # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
97  testDataSource = FileDataSource(
98  testDatasetFileName,
99  DataSourceIface.notAllocateNumericTable,
100  DataSourceIface.doDictionaryFromContext
101  )
102 
103  # Create Numeric Tables for testing data and labels
104  testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
105  testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
106  mergedData = MergedNumericTable(testData, testGroundTruth)
107 
108  # Retrieve the data from input file
109  testDataSource.loadDataBlock(mergedData)
110 
111  # Create algorithm objects for decision tree regression prediction with the default method
112  algorithm = prediction.Batch()
113 
114  # Pass the testing data set and trained model to the algorithm
115  #print("Number of columns: {}".format(testData.getNumberOfColumns()))
116  algorithm.input.setTable(prediction.data, testData)
117  algorithm.input.setModel(prediction.model, model)
118 
119  # Compute prediction results and retrieve algorithm results
120  predictionResult = algorithm.compute()
121 
122 
123 def printResults():
124 
125  printNumericTables(testGroundTruth, predictionResult.get(prediction.prediction),
126  "Ground truth", "Regression results",
127  "Decision tree regression results (first 20 observations):",
128  20, flt64=False)
129 
130 if __name__ == "__main__":
131 
132  trainModel()
133  testModel()
134  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.