48 from daal.algorithms
import decision_forest
49 from daal.algorithms.decision_forest.classification
import prediction, training
50 from daal.algorithms
import classifier
51 from daal.data_management
import (
52 FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
53 MergedNumericTable, data_feature_utils
56 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
57 if utils_folder
not in sys.path:
58 sys.path.insert(0, utils_folder)
59 from utils
import printNumericTable, printNumericTables
61 DAAL_PREFIX = os.path.join(
'..',
'data')
64 trainDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_train.csv')
65 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_test.csv')
72 minObservationsInLeafNode = 8
76 predictionResult =
None 77 testGroundTruth =
None 84 trainDataSource = FileDataSource(
86 DataSourceIface.notAllocateNumericTable,
87 DataSourceIface.doDictionaryFromContext
91 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
92 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
93 mergedData = MergedNumericTable(trainData, trainGroundTruth)
96 trainDataSource.loadDataBlock(mergedData)
99 dict = trainData.getDictionary()
102 dict[0].featureType = data_feature_utils.DAAL_CONTINUOUS
103 dict[1].featureType = data_feature_utils.DAAL_CONTINUOUS
104 dict[2].featureType = data_feature_utils.DAAL_CATEGORICAL
107 algorithm = training.Batch(nClasses)
108 algorithm.parameter.nTrees = nTrees
109 algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
110 algorithm.parameter.featuresPerNode = nFeatures
111 algorithm.parameter.varImportance = decision_forest.training.MDI
112 algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError
115 algorithm.input.set(classifier.training.data, trainData)
116 algorithm.input.set(classifier.training.labels, trainGroundTruth)
119 trainingResult = algorithm.compute()
120 model = trainingResult.get(classifier.training.model)
121 printNumericTable(trainingResult.getTable(training.variableImportance),
"Variable importance results: ")
122 printNumericTable(trainingResult.getTable(training.outOfBagError),
"OOB error: ")
125 global testGroundTruth, predictionResult
128 testDataSource = FileDataSource(
130 DataSourceIface.notAllocateNumericTable,
131 DataSourceIface.doDictionaryFromContext
135 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
136 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
137 mergedData = MergedNumericTable(testData, testGroundTruth)
140 testDataSource.loadDataBlock(mergedData)
143 dict = testData.getDictionary()
146 dict[0].featureType = data_feature_utils.DAAL_CONTINUOUS
147 dict[1].featureType = data_feature_utils.DAAL_CONTINUOUS
148 dict[2].featureType = data_feature_utils.DAAL_CATEGORICAL
151 algorithm = prediction.Batch(nClasses)
154 algorithm.input.setTable(classifier.prediction.data, testData)
155 algorithm.input.setModel(classifier.prediction.model, model)
159 predictionResult = algorithm.compute()
163 printNumericTable(predictionResult.get(classifier.prediction.prediction),
"Decision forest prediction results (first 10 rows):",10)
164 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):", 10);
166 if __name__ ==
"__main__":