48 from daal
import step1Local, step2Master
49 from daal.algorithms.multinomial_naive_bayes
import prediction, training
50 from daal.algorithms
import classifier
51 from daal.data_management
import (
52 FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
55 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
56 if utils_folder
not in sys.path:
57 sys.path.insert(0, utils_folder)
58 from utils
import printNumericTables
60 DAAL_PREFIX = os.path.join(
'..',
'data')
63 trainDatasetFileNames = [
64 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_dense.csv'),
65 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_dense.csv'),
66 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_dense.csv'),
67 os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_train_dense.csv')
70 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'naivebayes_test_dense.csv')
77 predictionResult =
None 78 testGroundTruth =
None 84 masterAlgorithm = training.Distributed(step2Master, nClasses)
86 for i
in range(nBlocks):
88 trainDataSource = FileDataSource(
89 trainDatasetFileNames[i], DataSourceIface.notAllocateNumericTable,
90 DataSourceIface.doDictionaryFromContext
93 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
94 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
95 mergedData = MergedNumericTable(trainData, trainGroundTruth)
98 trainDataSource.loadDataBlock(mergedData)
101 localAlgorithm = training.Distributed(step1Local, nClasses)
104 localAlgorithm.input.set(classifier.training.data, trainData)
105 localAlgorithm.input.set(classifier.training.labels, trainGroundTruth)
109 masterAlgorithm.input.add(training.partialModels, localAlgorithm.compute())
112 masterAlgorithm.compute()
113 trainingResult = masterAlgorithm.finalizeCompute()
117 global predictionResult, testGroundTruth
120 testDataSource = FileDataSource(
121 testDatasetFileName, DataSourceIface.notAllocateNumericTable,
122 DataSourceIface.doDictionaryFromContext
126 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
127 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
128 mergedData = MergedNumericTable(testData, testGroundTruth)
131 testDataSource.loadDataBlock(mergedData)
134 algorithm = prediction.Batch(nClasses)
137 algorithm.input.setTable(classifier.prediction.data, testData)
138 algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
141 predictionResult = algorithm.compute()
146 testGroundTruth, predictionResult.get(classifier.prediction.prediction),
147 "Ground truth",
"Classification results",
148 "NaiveBayes classification results (first 20 observations):", 20, flt64=
False 151 if __name__ ==
"__main__":