22 from daal
import step1Local, step2Master
23 from daal.algorithms.multinomial_naive_bayes
import prediction, training
24 from daal.algorithms
import classifier
25 from daal.data_management
import (
26 FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
29 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
30 if utils_folder
not in sys.path:
31 sys.path.insert(0, utils_folder)
32 from utils
import printNumericTables
34 DAAL_PREFIX = os.path.join(
'..',
'data')
37 trainDatasetFileNames = [
38 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_dense.csv'),
39 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_dense.csv'),
40 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_dense.csv'),
41 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_dense.csv')
44 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_test_dense.csv')
51 predictionResult =
None
52 testGroundTruth =
None
58 masterAlgorithm = training.Distributed(step2Master, nClasses)
60 for i
in range(nBlocks):
62 trainDataSource = FileDataSource(
63 trainDatasetFileNames[i], DataSourceIface.notAllocateNumericTable,
64 DataSourceIface.doDictionaryFromContext
67 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
68 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
69 mergedData = MergedNumericTable(trainData, trainGroundTruth)
72 trainDataSource.loadDataBlock(mergedData)
75 localAlgorithm = training.Distributed(step1Local, nClasses)
78 localAlgorithm.input.set(classifier.training.data, trainData)
79 localAlgorithm.input.set(classifier.training.labels, trainGroundTruth)
83 masterAlgorithm.input.add(training.partialModels, localAlgorithm.compute())
86 masterAlgorithm.compute()
87 trainingResult = masterAlgorithm.finalizeCompute()
91 global predictionResult, testGroundTruth
94 testDataSource = FileDataSource(
95 testDatasetFileName, DataSourceIface.notAllocateNumericTable,
96 DataSourceIface.doDictionaryFromContext
100 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
101 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
102 mergedData = MergedNumericTable(testData, testGroundTruth)
105 testDataSource.loadDataBlock(mergedData)
108 algorithm = prediction.Batch(nClasses)
111 algorithm.input.setTable(classifier.prediction.data, testData)
112 algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
115 predictionResult = algorithm.compute()
120 testGroundTruth, predictionResult.get(classifier.prediction.prediction),
121 "Ground truth",
"Classification results",
122 "NaiveBayes classification results (first 20 observations):", 20, flt64=
False
125 if __name__ ==
"__main__":