Deprecation Notice: With the introduction of
daal4py, a package that supersedes PyDAAL, Intel is deprecating PyDAAL and will discontinue support starting with Intel® DAAL 2021 and Intel® Distribution for Python 2021. Until then Intel will continue to provide compatible pyDAAL
pip and
conda packages for newer releases of Intel DAAL and make it available in open source. However, Intel will not add the new features of Intel DAAL to pyDAAL. Intel recommends developers switch to and use daal4py.
Note: To find daal4py examples, refer to daal4py documentation
or browse github
repository.
22 from daal.algorithms.multinomial_naive_bayes
import prediction, training
23 from daal.algorithms
import classifier
24 from daal.data_management
import FileDataSource, DataSourceIface
26 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
27 if utils_folder
not in sys.path:
28 sys.path.insert(0, utils_folder)
29 from utils
import printNumericTables, createSparseTable
31 DAAL_PREFIX = os.path.join(
'..',
'data')
34 trainDatasetFileNames = [
35 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_csr.csv'),
36 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_csr.csv'),
37 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_csr.csv'),
38 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_csr.csv')
41 trainGroundTruthFileNames = [
42 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_labels.csv'),
43 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_labels.csv'),
44 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_labels.csv'),
45 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_labels.csv')
48 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_test_csr.csv')
49 testGroundTruthFileName = os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_test_labels.csv')
51 nTrainVectorsInBlock = 8000
52 nTestObservations = 2000
57 predictionResult =
None
58 trainData = [0] * nBlocks
63 global trainData, trainingResult
66 algorithm = training.Online(nClasses, method=training.fastCSR)
68 for i
in range(nBlocks):
70 trainData[i] = createSparseTable(trainDatasetFileNames[i])
71 trainLabelsSource = FileDataSource(
72 trainGroundTruthFileNames[i], DataSourceIface.doAllocateNumericTable,
73 DataSourceIface.doDictionaryFromContext
76 trainLabelsSource.loadDataBlock(nTrainVectorsInBlock)
79 algorithm.input.set(classifier.training.data, trainData[i])
80 algorithm.input.set(classifier.training.labels, trainLabelsSource.getNumericTable())
86 trainingResult = algorithm.finalizeCompute()
90 global predictionResult, testData
93 testData = createSparseTable(testDatasetFileName)
96 algorithm = prediction.Batch(nClasses, method=prediction.fastCSR)
99 algorithm.input.setTable(classifier.prediction.data, testData)
100 algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
103 predictionResult = algorithm.compute()
108 testGroundTruth = FileDataSource(
109 testGroundTruthFileName, DataSourceIface.doAllocateNumericTable,
110 DataSourceIface.doDictionaryFromContext
112 testGroundTruth.loadDataBlock(nTestObservations)
115 testGroundTruth.getNumericTable(),
116 predictionResult.get(classifier.prediction.prediction),
117 "Ground truth",
"Classification results",
118 "NaiveBayes classification results (first 20 observations):", 20, 15, flt64=
False
121 if __name__ ==
"__main__":