22 from daal.algorithms
import gbt
23 from daal.algorithms.gbt.classification
import prediction, training
24 from daal.algorithms
import classifier
25 from daal.data_management
import (
26 FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
27 MergedNumericTable, data_feature_utils
30 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
31 if utils_folder
not in sys.path:
32 sys.path.insert(0, utils_folder)
33 from utils
import printNumericTable, printNumericTables
35 DAAL_PREFIX = os.path.join(
'..',
'data')
38 trainDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_train.csv')
39 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_test.csv')
46 minObservationsInLeafNode = 8
50 predictionResult =
None
51 testGroundTruth =
None
58 trainDataSource = FileDataSource(
60 DataSourceIface.notAllocateNumericTable,
61 DataSourceIface.doDictionaryFromContext
65 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
66 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
67 mergedData = MergedNumericTable(trainData, trainGroundTruth)
70 trainDataSource.loadDataBlock(mergedData)
73 dict = trainData.getDictionary()
76 dict[0].featureType = data_feature_utils.DAAL_CONTINUOUS
77 dict[1].featureType = data_feature_utils.DAAL_CONTINUOUS
78 dict[2].featureType = data_feature_utils.DAAL_CATEGORICAL
81 algorithm = training.Batch(nClasses)
82 algorithm.parameter().maxIterations = maxIterations
83 algorithm.parameter().minObservationsInLeafNode = minObservationsInLeafNode
84 algorithm.parameter().featuresPerNode = nFeatures
87 algorithm.input.set(classifier.training.data, trainData)
88 algorithm.input.set(classifier.training.labels, trainGroundTruth)
91 trainingResult = algorithm.compute()
92 model = trainingResult.get(classifier.training.model)
95 global testGroundTruth, predictionResult
98 testDataSource = FileDataSource(
100 DataSourceIface.notAllocateNumericTable,
101 DataSourceIface.doDictionaryFromContext
105 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
106 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
107 mergedData = MergedNumericTable(testData, testGroundTruth)
110 testDataSource.loadDataBlock(mergedData)
113 dict = testData.getDictionary()
116 dict[0].featureType = data_feature_utils.DAAL_CONTINUOUS
117 dict[1].featureType = data_feature_utils.DAAL_CONTINUOUS
118 dict[2].featureType = data_feature_utils.DAAL_CATEGORICAL
121 algorithm = prediction.Batch(nClasses)
124 algorithm.input.setTable(classifier.prediction.data, testData)
125 algorithm.input.setModel(classifier.prediction.model, model)
129 predictionResult = algorithm.compute()
134 printNumericTable(predictionResult.get(classifier.prediction.prediction),
"Gragient boosted trees prediction results (first 10 rows):",10)
135 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):",10)
137 if __name__ ==
"__main__":