47 from daal.algorithms
import decision_forest
48 from daal.algorithms.decision_forest.regression
import prediction, training
49 from daal.data_management
import (
50 FileDataSource, DataSourceIface, NumericTableIface,
51 HomogenNumericTable, MergedNumericTable, data_feature_utils
54 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
55 if utils_folder
not in sys.path:
56 sys.path.insert(0, utils_folder)
57 from utils
import printNumericTable
59 DAAL_PREFIX = os.path.join(
'..',
'data')
62 trainDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_regression_train.csv')
63 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_regression_test.csv')
72 predictionResult =
None
73 testGroundTruth =
None
80 trainDataSource = FileDataSource(
82 DataSourceIface.notAllocateNumericTable,
83 DataSourceIface.doDictionaryFromContext
87 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
88 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
89 mergedData = MergedNumericTable(trainData, trainGroundTruth)
92 trainDataSource.loadDataBlock(mergedData)
95 dict = trainData.getDictionary()
98 dict[3].featureType = data_feature_utils.DAAL_CATEGORICAL
101 algorithm = training.Batch()
102 algorithm.parameter.nTrees = nTrees
103 algorithm.parameter.varImportance = decision_forest.training.MDA_Raw
104 algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError
107 algorithm.input.set(training.data, trainData)
108 algorithm.input.set(training.dependentVariable, trainGroundTruth)
111 trainingResult = algorithm.compute()
112 model = trainingResult.get(training.model)
113 printNumericTable(trainingResult.getTable(training.variableImportance),
"Variable importance results: ")
114 printNumericTable(trainingResult.getTable(training.outOfBagError),
"OOB error: ")
117 global testGroundTruth, predictionResult
120 testDataSource = FileDataSource(
122 DataSourceIface.notAllocateNumericTable,
123 DataSourceIface.doDictionaryFromContext
127 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
128 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
129 mergedData = MergedNumericTable(testData, testGroundTruth)
132 testDataSource.loadDataBlock(mergedData)
135 dict = testData.getDictionary()
138 dict[3].featureType = data_feature_utils.DAAL_CATEGORICAL
141 algorithm = prediction.Batch()
144 algorithm.input.setTable(prediction.data, testData)
145 algorithm.input.set(prediction.model, model)
148 predictionResult = algorithm.compute()
154 predictionResult.get(prediction.prediction),
155 "Decision forest prediction results (first 10 rows):", 10
159 "Ground truth (first 10 rows):", 10
162 if __name__ ==
"__main__":