47 from daal
import step1Local, step2Master
48 from daal.algorithms.linear_regression
import training, prediction
49 from daal.data_management
import (
50 DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable, NumericTableIface
53 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
54 if utils_folder
not in sys.path:
55 sys.path.insert(0, utils_folder)
56 from utils
import printNumericTable
58 DAAL_PREFIX = os.path.join(
'..',
'data')
61 trainDatasetFileNames = [
62 os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_train_1.csv'),
63 os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_train_2.csv'),
64 os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_train_3.csv'),
65 os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_train_4.csv')
68 testDatasetFileName = os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_test.csv')
73 nDependentVariables = 2
76 predictionResult =
None
83 masterAlgorithm = training.Distributed(step2Master, method=training.qrDense)
85 for i
in range(nBlocks):
87 trainDataSource = FileDataSource(
88 trainDatasetFileNames[i], DataSourceIface.notAllocateNumericTable,
89 DataSourceIface.doDictionaryFromContext
93 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
94 trainDependentVariables = HomogenNumericTable(
95 nDependentVariables, 0, NumericTableIface.doNotAllocate
97 mergedData = MergedNumericTable(trainData, trainDependentVariables)
100 trainDataSource.loadDataBlock(mergedData)
103 localAlgorithm = training.Distributed(step1Local, method=training.qrDense)
106 localAlgorithm.input.set(training.data, trainData)
107 localAlgorithm.input.set(training.dependentVariables, trainDependentVariables)
111 masterAlgorithm.input.add(training.partialModels, localAlgorithm.compute())
114 masterAlgorithm.compute()
117 trainingResult = masterAlgorithm.finalizeCompute()
118 printNumericTable(trainingResult.get(training.model).getBeta(),
"Linear Regression coefficients:")
124 testDataSource = FileDataSource(
125 testDatasetFileName, DataSourceIface.doAllocateNumericTable,
126 DataSourceIface.doDictionaryFromContext
130 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
131 testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
132 mergedData = MergedNumericTable(testData, testGroundTruth)
135 testDataSource.loadDataBlock(mergedData)
138 algorithm = prediction.Batch()
141 algorithm.input.setTable(prediction.data, testData)
142 algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
145 predictionResult = algorithm.compute()
146 printNumericTable(predictionResult.get(prediction.prediction),
"Linear Regression prediction results: (first 10 rows):", 10)
147 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):", 10)
149 if __name__ ==
"__main__":