47 from daal.algorithms
import decision_forest
48 from daal.algorithms.decision_forest.classification
import prediction, training
49 from daal.algorithms
import classifier
50 from daal.data_management
import (
51 FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
52 MergedNumericTable, data_feature_utils
55 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
56 if utils_folder
not in sys.path:
57 sys.path.insert(0, utils_folder)
58 from utils
import printNumericTable, printNumericTables
60 DAAL_PREFIX = os.path.join(
'..',
'data')
63 trainDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_train.csv')
64 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_test.csv')
71 minObservationsInLeafNode = 8
75 predictionResult =
None
76 testGroundTruth =
None
83 trainDataSource = FileDataSource(
85 DataSourceIface.notAllocateNumericTable,
86 DataSourceIface.doDictionaryFromContext
90 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
91 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
92 mergedData = MergedNumericTable(trainData, trainGroundTruth)
95 trainDataSource.loadDataBlock(mergedData)
98 dict = trainData.getDictionary()
101 dict[0].featureType = data_feature_utils.DAAL_CONTINUOUS
102 dict[1].featureType = data_feature_utils.DAAL_CONTINUOUS
103 dict[2].featureType = data_feature_utils.DAAL_CATEGORICAL
106 algorithm = training.Batch(nClasses)
107 algorithm.parameter.nTrees = nTrees
108 algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
109 algorithm.parameter.featuresPerNode = nFeatures
110 algorithm.parameter.varImportance = decision_forest.training.MDI
111 algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError
114 algorithm.input.set(classifier.training.data, trainData)
115 algorithm.input.set(classifier.training.labels, trainGroundTruth)
118 trainingResult = algorithm.compute()
119 model = trainingResult.get(classifier.training.model)
120 printNumericTable(trainingResult.getTable(training.variableImportance),
"Variable importance results: ")
121 printNumericTable(trainingResult.getTable(training.outOfBagError),
"OOB error: ")
124 global testGroundTruth, predictionResult
127 testDataSource = FileDataSource(
129 DataSourceIface.notAllocateNumericTable,
130 DataSourceIface.doDictionaryFromContext
134 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
135 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
136 mergedData = MergedNumericTable(testData, testGroundTruth)
139 testDataSource.loadDataBlock(mergedData)
142 dict = testData.getDictionary()
145 dict[0].featureType = data_feature_utils.DAAL_CONTINUOUS
146 dict[1].featureType = data_feature_utils.DAAL_CONTINUOUS
147 dict[2].featureType = data_feature_utils.DAAL_CATEGORICAL
150 algorithm = prediction.Batch(nClasses)
153 algorithm.input.setTable(classifier.prediction.data, testData)
154 algorithm.input.setModel(classifier.prediction.model, model)
158 predictionResult = algorithm.compute()
165 predictionResult.get(classifier.prediction.prediction),
166 "Ground truth",
"Classification results",
167 "decision forest classification classification results (first 20 observations):", 20
170 if __name__ ==
"__main__":