Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 5

df_reg_dense_batch.py

Deprecation Notice: With the introduction of daal4py, a package that supersedes PyDAAL, Intel is deprecating PyDAAL and will discontinue support starting with Intel® DAAL 2021 and Intel® Distribution for Python 2021. Until then Intel will continue to provide compatible pyDAAL pip and conda packages for newer releases of Intel DAAL and make it available in open source. However, Intel will not add the new features of Intel DAAL to pyDAAL. Intel recommends developers switch to and use daal4py.

Note: To find daal4py examples, refer to daal4py documentation or browse github repository.

1 # file: df_reg_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 ## <a name="DAAL-EXAMPLE-PY-DF_REG_DENSE_BATCH"></a>
17 ## \example df_reg_dense_batch.py
18 
19 import os
20 import sys
21 
22 from daal.algorithms import decision_forest
23 from daal.algorithms.decision_forest.regression import prediction, training
24 from daal.data_management import (
25  FileDataSource, DataSourceIface, NumericTableIface,
26  HomogenNumericTable, MergedNumericTable, features
27 )
28 
29 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
30 if utils_folder not in sys.path:
31  sys.path.insert(0, utils_folder)
32 from utils import printNumericTable
33 
34 DAAL_PREFIX = os.path.join('..', 'data')
35 
36 # Input data set parameters
37 trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_regression_train.csv')
38 testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_regression_test.csv')
39 
40 nFeatures = 13
41 
42 # Decision forest parameters
43 nTrees = 100
44 
45 # Model object for the decision forest regression algorithm
46 model = None
47 predictionResult = None
48 testGroundTruth = None
49 
50 
51 def trainModel():
52  global model
53 
54  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
55  trainDataSource = FileDataSource(
56  trainDatasetFileName,
57  DataSourceIface.notAllocateNumericTable,
58  DataSourceIface.doDictionaryFromContext
59  )
60 
61  # Create Numeric Tables for training data and labels
62  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
63  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
64  mergedData = MergedNumericTable(trainData, trainGroundTruth)
65 
66  # Retrieve the data from the input file
67  trainDataSource.loadDataBlock(mergedData)
68 
69  # Get the dictionary and update it with additional information about data
70  dict = trainData.getDictionary()
71 
72  # Add a feature type to the dictionary
73  dict[3].featureType = features.DAAL_CATEGORICAL
74 
75  # Create an algorithm object to train the decision forest regression model
76  algorithm = training.Batch()
77  algorithm.parameter.nTrees = nTrees
78  algorithm.parameter.varImportance = decision_forest.training.MDA_Raw
79  algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError|decision_forest.training.computeOutOfBagErrorPerObservation;
80 
81  # Pass the training data set and dependent values to the algorithm
82  algorithm.input.set(training.data, trainData)
83  algorithm.input.set(training.dependentVariable, trainGroundTruth)
84 
85  # Train the decision forest regression model and retrieve the results of the training algorithm
86  trainingResult = algorithm.compute()
87  model = trainingResult.get(training.model)
88  printNumericTable(trainingResult.getTable(training.variableImportance), "Variable importance results: ")
89  printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error: ")
90  printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error (first 10 rows): ", 10)
91 
92 def testModel():
93  global testGroundTruth, predictionResult
94 
95  # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
96  testDataSource = FileDataSource(
97  testDatasetFileName,
98  DataSourceIface.notAllocateNumericTable,
99  DataSourceIface.doDictionaryFromContext
100  )
101 
102  # Create Numeric Tables for testing data and labels
103  testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
104  testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
105  mergedData = MergedNumericTable(testData, testGroundTruth)
106 
107  # Retrieve the data from input file
108  testDataSource.loadDataBlock(mergedData)
109 
110  # Get the dictionary and update it with additional information about data
111  dict = testData.getDictionary()
112 
113  # Add a feature type to the dictionary
114  dict[3].featureType = features.DAAL_CATEGORICAL
115 
116  # Create algorithm objects for decision forest regression prediction with the default method
117  algorithm = prediction.Batch()
118 
119  # Pass the testing data set and trained model to the algorithm
120  algorithm.input.setTable(prediction.data, testData)
121  algorithm.input.set(prediction.model, model)
122 
123  # Compute prediction results and retrieve algorithm results
124  predictionResult = algorithm.compute()
125 
126 
127 def printResults():
128 
129  printNumericTable(
130  predictionResult.get(prediction.prediction),
131  "Decision forest prediction results (first 10 rows):", 10
132  )
133  printNumericTable(
134  testGroundTruth,
135  "Ground truth (first 10 rows):", 10
136  )
137 
138 if __name__ == "__main__":
139 
140  trainModel()
141  testModel()
142  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.