Python* API Reference for Intel® Data Analytics Acceleration Library 2019

mn_naive_bayes_csr_online.py

1 # file: mn_naive_bayes_csr_online.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 ## <a name="DAAL-EXAMPLE-PY-MULTINOMIAL_NAIVE_BAYES_CSR_ONLINE"></a>
17 ## \example mn_naive_bayes_csr_online.py
18 
19 import os
20 import sys
21 
22 from daal.algorithms.multinomial_naive_bayes import prediction, training
23 from daal.algorithms import classifier
24 from daal.data_management import FileDataSource, DataSourceIface
25 
26 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
27 if utils_folder not in sys.path:
28  sys.path.insert(0, utils_folder)
29 from utils import printNumericTables, createSparseTable
30 
31 DAAL_PREFIX = os.path.join('..', 'data')
32 
33 # Input data set parameters
34 trainDatasetFileNames = [
35  os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_train_csr.csv'),
36  os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_train_csr.csv'),
37  os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_train_csr.csv'),
38  os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_train_csr.csv')
39 ]
40 
41 trainGroundTruthFileNames = [
42  os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_train_labels.csv'),
43  os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_train_labels.csv'),
44  os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_train_labels.csv'),
45  os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_train_labels.csv')
46 ]
47 
48 testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_test_csr.csv')
49 testGroundTruthFileName = os.path.join(DAAL_PREFIX, 'batch', 'naivebayes_test_labels.csv')
50 
51 nTrainVectorsInBlock = 8000
52 nTestObservations = 2000
53 nClasses = 20
54 nBlocks = 4
55 
56 trainingResult = None
57 predictionResult = None
58 trainData = [0] * nBlocks
59 testData = None
60 
61 
62 def trainModel():
63  global trainData, trainingResult
64 
65  # Create an algorithm object to train the Naive Bayes model
66  algorithm = training.Online(nClasses, method=training.fastCSR)
67 
68  for i in range(nBlocks):
69  # Read trainDatasetFileNames and create a numeric table to store the input data
70  trainData[i] = createSparseTable(trainDatasetFileNames[i])
71  trainLabelsSource = FileDataSource(
72  trainGroundTruthFileNames[i], DataSourceIface.doAllocateNumericTable,
73  DataSourceIface.doDictionaryFromContext
74  )
75 
76  trainLabelsSource.loadDataBlock(nTrainVectorsInBlock)
77 
78  # Pass a training data set and dependent values to the algorithm
79  algorithm.input.set(classifier.training.data, trainData[i])
80  algorithm.input.set(classifier.training.labels, trainLabelsSource.getNumericTable())
81 
82  # Build the Naive Bayes model
83  algorithm.compute()
84 
85  # Finalize the Naive Bayes model and retrieve the algorithm results
86  trainingResult = algorithm.finalizeCompute()
87 
88 
89 def testModel():
90  global predictionResult, testData
91 
92  # Read testDatasetFileName and create a numeric table to store the input data
93  testData = createSparseTable(testDatasetFileName)
94 
95  # Create an algorithm object to predict Naive Bayes values
96  algorithm = prediction.Batch(nClasses, method=prediction.fastCSR)
97 
98  # Pass a testing data set and the trained model to the algorithm
99  algorithm.input.setTable(classifier.prediction.data, testData)
100  algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
101 
102  # Predict Naive Bayes values (Result class from classifier.prediction)
103  predictionResult = algorithm.compute() # Retrieve the algorithm results
104 
105 
106 def printResults():
107 
108  testGroundTruth = FileDataSource(
109  testGroundTruthFileName, DataSourceIface.doAllocateNumericTable,
110  DataSourceIface.doDictionaryFromContext
111  )
112  testGroundTruth.loadDataBlock(nTestObservations)
113 
114  printNumericTables(
115  testGroundTruth.getNumericTable(),
116  predictionResult.get(classifier.prediction.prediction),
117  "Ground truth", "Classification results",
118  "NaiveBayes classification results (first 20 observations):", 20, 15, flt64=False
119  )
120 
121 if __name__ == "__main__":
122 
123  trainModel()
124  testModel()
125  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.