Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 4

mn_naive_bayes_dense_online.py

Deprecation Notice: With the introduction of daal4py, a package that supersedes PyDAAL, Intel is deprecating PyDAAL and will discontinue support starting with Intel® DAAL 2021 and Intel® Distribution for Python 2021. Until then Intel will continue to provide compatible pyDAAL pip and conda packages for newer releases of Intel DAAL and make it available in open source. However, Intel will not add the new features of Intel DAAL to pyDAAL. Intel recommends developers switch to and use daal4py.

Note: To find daal4py examples, refer to daal4py documentation or browse github repository.

1 # file: mn_naive_bayes_dense_online.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 ## <a name="DAAL-EXAMPLE-PY-MULTINOMIAL_NAIVE_BAYES_DENSE_ONLINE"></a>
17 ## \example mn_naive_bayes_dense_online.py
18 
19 import os
20 import sys
21 
22 from daal.algorithms.multinomial_naive_bayes import prediction, training
23 from daal.algorithms import classifier
24 from daal.data_management import (
25  FileDataSource, DataSourceIface, HomogenNumericTable, MergedNumericTable, NumericTableIface
26 )
27 
28 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
29 if utils_folder not in sys.path:
30  sys.path.insert(0, utils_folder)
31 from utils import printNumericTables
32 
33 DAAL_PREFIX = os.path.join('..', 'data')
34 
35 # Input data set parameters
36 trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_train_dense.csv')
37 testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_test_dense.csv')
38 
39 nFeatures = 20
40 nTrainVectorsInBlock = 2000
41 nClasses = 20
42 
43 trainingResult = None
44 predictionResult = None
45 testGroundTruth = None
46 
47 
48 def trainModel():
49  global trainingResult
50 
51  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
52  trainDataSource = FileDataSource(
53  trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
54  DataSourceIface.doDictionaryFromContext
55  )
56 
57  # Create Numeric Tables for training data and labels
58  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
59  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
60  mergedData = MergedNumericTable(trainData, trainGroundTruth)
61 
62  # Create an algorithm object to train the Naive Bayes model
63  algorithm = training.Online(nClasses)
64 
65  while(trainDataSource.loadDataBlock(nTrainVectorsInBlock, mergedData) == nTrainVectorsInBlock):
66  # Pass a training data set and dependent values to the algorithm
67  algorithm.input.set(classifier.training.data, trainData)
68  algorithm.input.set(classifier.training.labels, trainGroundTruth)
69 
70  # Build the Naive Bayes model
71  algorithm.compute()
72 
73  # Finalize the Naive Bayes model
74  trainingResult = algorithm.finalizeCompute() # Retrieve the algorithm results
75 
76 
77 def testModel():
78  global predictionResult, testGroundTruth
79 
80  # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
81  testDataSource = FileDataSource(
82  testDatasetFileName, DataSourceIface.notAllocateNumericTable,
83  DataSourceIface.doDictionaryFromContext
84  )
85 
86  # Create Numeric Tables for testing data and labels
87  testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
88  testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
89  mergedData = MergedNumericTable(testData, testGroundTruth)
90 
91  # Retrieve the data from input file
92  testDataSource.loadDataBlock(mergedData)
93 
94  # Create an algorithm object to predict Naive Bayes values
95  algorithm = prediction.Batch(nClasses)
96 
97  # Pass a testing data set and the trained model to the algorithm
98  algorithm.input.setTable(classifier.prediction.data, testData)
99  algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
100 
101  # Predict Naive Bayes values (Result class from classifier.prediction)
102  predictionResult = algorithm.compute() # Retrieve the algorithm results
103 
104 
105 def printResults():
106 
107  printNumericTables(
108  testGroundTruth, predictionResult.get(classifier.prediction.prediction),
109  "Ground truth", "Classification results",
110  "NaiveBayes classification results (first 20 observations):", 20, flt64=False
111  )
112 
113 if __name__ == "__main__":
114 
115  trainModel()
116  testModel()
117  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.