59 from daal
import step1Local, step2Master
60 from daal.algorithms.ridge_regression
import training, prediction
61 from daal.data_management
import DataSource, FileDataSource, NumericTable, HomogenNumericTable, MergedNumericTable
63 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
64 if utils_folder
not in sys.path:
65 sys.path.insert(0, utils_folder)
66 from utils
import printNumericTable
68 trainDatasetFileNames = [
69 os.path.join(
"..",
"data",
"distributed",
"linear_regression_train_1.csv"),
70 os.path.join(
"..",
"data",
"distributed",
"linear_regression_train_2.csv"),
71 os.path.join(
"..",
"data",
"distributed",
"linear_regression_train_3.csv"),
72 os.path.join(
"..",
"data",
"distributed",
"linear_regression_train_4.csv"),
76 testDatasetFileName = os.path.join(
"..",
"data",
"distributed",
"linear_regression_test.csv")
80 nDependentVariables = 2
85 masterAlgorithm = training.Distributed(step=step2Master)
87 for i
in range(nBlocks):
89 trainDataSource = FileDataSource(trainDatasetFileNames[i],
90 DataSource.notAllocateNumericTable,
91 DataSource.doDictionaryFromContext)
94 trainData = HomogenNumericTable(nFeatures, 0, NumericTable.doNotAllocate)
95 trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTable.doNotAllocate)
96 mergedData = MergedNumericTable(trainData, trainDependentVariables)
99 trainDataSource.loadDataBlock(mergedData)
102 localAlgorithm = training.Distributed(step=step1Local)
105 localAlgorithm.input.set(training.data, trainData)
106 localAlgorithm.input.set(training.dependentVariables, trainDependentVariables)
109 presult = localAlgorithm.compute()
112 masterAlgorithm.input.add(training.partialModels, presult)
116 masterAlgorithm.compute()
119 trainingResult = masterAlgorithm.finalizeCompute()
121 printNumericTable(trainingResult.get(training.model).getBeta(),
"Ridge Regression coefficients:")
122 return trainingResult
125 def testModel(trainingResult):
127 testDataSource = FileDataSource(testDatasetFileName,
128 DataSource.doAllocateNumericTable,
129 DataSource.doDictionaryFromContext)
132 testData = HomogenNumericTable(nFeatures, 0, NumericTable.doNotAllocate)
133 testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTable.doNotAllocate)
134 mergedData = MergedNumericTable(testData, testGroundTruth)
137 testDataSource.loadDataBlock(mergedData)
140 algorithm = prediction.Batch()
143 algorithm.input.setTable(prediction.data, testData)
144 algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
147 predictionResult = algorithm.compute()
149 printNumericTable(predictionResult.get(prediction.prediction),
"Ridge Regression prediction results: (first 10 rows):", 10)
150 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):", 10)
153 if __name__ ==
"__main__":
154 trainingResult = trainModel()
155 testModel(trainingResult)