48 from daal.algorithms
import decision_forest
49 from daal.algorithms.decision_forest.regression
import prediction, training
50 from daal.data_management
import (
51 FileDataSource, DataSourceIface, NumericTableIface,
52 HomogenNumericTable, MergedNumericTable, data_feature_utils
55 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
56 if utils_folder
not in sys.path:
57 sys.path.insert(0, utils_folder)
58 from utils
import printNumericTable
60 DAAL_PREFIX = os.path.join(
'..',
'data')
63 trainDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_regression_train.csv')
64 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_regression_test.csv')
73 predictionResult =
None 74 testGroundTruth =
None 81 trainDataSource = FileDataSource(
83 DataSourceIface.notAllocateNumericTable,
84 DataSourceIface.doDictionaryFromContext
88 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
89 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
90 mergedData = MergedNumericTable(trainData, trainGroundTruth)
93 trainDataSource.loadDataBlock(mergedData)
96 dict = trainData.getDictionary()
99 dict[3].featureType = data_feature_utils.DAAL_CATEGORICAL
102 algorithm = training.Batch()
103 algorithm.parameter.nTrees = nTrees
104 algorithm.parameter.varImportance = decision_forest.training.MDA_Raw
105 algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError|decision_forest.training.computeOutOfBagErrorPerObservation;
108 algorithm.input.set(training.data, trainData)
109 algorithm.input.set(training.dependentVariable, trainGroundTruth)
112 trainingResult = algorithm.compute()
113 model = trainingResult.get(training.model)
114 printNumericTable(trainingResult.getTable(training.variableImportance),
"Variable importance results: ")
115 printNumericTable(trainingResult.getTable(training.outOfBagError),
"OOB error: ")
116 printNumericTable(trainingResult.getTable(training.outOfBagError),
"OOB error (first 10 rows): ", 10)
119 global testGroundTruth, predictionResult
122 testDataSource = FileDataSource(
124 DataSourceIface.notAllocateNumericTable,
125 DataSourceIface.doDictionaryFromContext
129 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
130 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
131 mergedData = MergedNumericTable(testData, testGroundTruth)
134 testDataSource.loadDataBlock(mergedData)
137 dict = testData.getDictionary()
140 dict[3].featureType = data_feature_utils.DAAL_CATEGORICAL
143 algorithm = prediction.Batch()
146 algorithm.input.setTable(prediction.data, testData)
147 algorithm.input.set(prediction.model, model)
150 predictionResult = algorithm.compute()
156 predictionResult.get(prediction.prediction),
157 "Decision forest prediction results (first 10 rows):", 10
161 "Ground truth (first 10 rows):", 10
164 if __name__ ==
"__main__":