22 from daal.algorithms
import decision_forest
23 from daal.algorithms.decision_forest.classification
import prediction, training
24 from daal.algorithms
import classifier
25 from daal.data_management
import (
26 FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
27 MergedNumericTable, features
30 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
31 if utils_folder
not in sys.path:
32 sys.path.insert(0, utils_folder)
33 from utils
import printNumericTable, printNumericTables
35 DAAL_PREFIX = os.path.join(
'..',
'data')
38 trainDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_train.csv')
39 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_test.csv')
46 minObservationsInLeafNode = 8
50 predictionResult =
None
51 testGroundTruth =
None
58 trainDataSource = FileDataSource(
60 DataSourceIface.notAllocateNumericTable,
61 DataSourceIface.doDictionaryFromContext
65 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
66 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
67 mergedData = MergedNumericTable(trainData, trainGroundTruth)
70 trainDataSource.loadDataBlock(mergedData)
73 dict = trainData.getDictionary()
76 dict[0].featureType = features.DAAL_CONTINUOUS
77 dict[1].featureType = features.DAAL_CONTINUOUS
78 dict[2].featureType = features.DAAL_CATEGORICAL
81 algorithm = training.Batch(nClasses)
82 algorithm.parameter.nTrees = nTrees
83 algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
84 algorithm.parameter.featuresPerNode = nFeatures
85 algorithm.parameter.varImportance = decision_forest.training.MDI
86 algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError
89 algorithm.input.set(classifier.training.data, trainData)
90 algorithm.input.set(classifier.training.labels, trainGroundTruth)
93 trainingResult = algorithm.compute()
94 model = trainingResult.get(classifier.training.model)
95 printNumericTable(trainingResult.getTable(training.variableImportance),
"Variable importance results: ")
96 printNumericTable(trainingResult.getTable(training.outOfBagError),
"OOB error: ")
99 global testGroundTruth, predictionResult
102 testDataSource = FileDataSource(
104 DataSourceIface.notAllocateNumericTable,
105 DataSourceIface.doDictionaryFromContext
109 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
110 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
111 mergedData = MergedNumericTable(testData, testGroundTruth)
114 testDataSource.loadDataBlock(mergedData)
117 dict = testData.getDictionary()
120 dict[0].featureType = features.DAAL_CONTINUOUS
121 dict[1].featureType = features.DAAL_CONTINUOUS
122 dict[2].featureType = features.DAAL_CATEGORICAL
125 algorithm = prediction.Batch(nClasses)
128 algorithm.input.setTable(classifier.prediction.data, testData)
129 algorithm.input.setModel(classifier.prediction.model, model)
133 predictionResult = algorithm.compute()
137 printNumericTable(predictionResult.get(classifier.prediction.prediction),
"Decision forest prediction results (first 10 rows):",10)
138 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):", 10);
140 if __name__ ==
"__main__":