58 from daal.algorithms.ridge_regression
import training, prediction
59 from daal.data_management
import DataSource, FileDataSource, NumericTable, HomogenNumericTable, MergedNumericTable
61 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
62 if utils_folder
not in sys.path:
63 sys.path.insert(0, utils_folder)
64 from utils
import printNumericTable
67 trainDatasetFileName = os.path.join(
"..",
"data",
"batch",
"linear_regression_train.csv")
68 testDatasetFileName = os.path.join(
"..",
"data",
"batch",
"linear_regression_test.csv")
70 nTrainVectorsInBlock = 250
72 nDependentVariables = 2
77 trainDataSource = FileDataSource(trainDatasetFileName,
78 DataSource.notAllocateNumericTable,
79 DataSource.doDictionaryFromContext)
82 trainData = HomogenNumericTable(nFeatures, 0, NumericTable.doNotAllocate)
83 trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTable.doNotAllocate)
84 mergedData = MergedNumericTable(trainData, trainDependentVariables)
87 algorithm = training.Online()
89 while trainDataSource.loadDataBlock(nTrainVectorsInBlock, mergedData) == nTrainVectorsInBlock:
91 algorithm.input.set(training.data, trainData)
92 algorithm.input.set(training.dependentVariables, trainDependentVariables)
99 trainingResult = algorithm.finalizeCompute()
101 printNumericTable(trainingResult.get(training.model).getBeta(),
"Ridge Regression coefficients:")
102 return trainingResult
105 def testModel(trainingResult):
107 testDataSource = FileDataSource(testDatasetFileName,
108 DataSource.doAllocateNumericTable,
109 DataSource.doDictionaryFromContext)
112 testData = HomogenNumericTable(nFeatures, 0, NumericTable.doNotAllocate)
113 testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTable.doNotAllocate)
114 mergedData = MergedNumericTable(testData, testGroundTruth)
117 testDataSource.loadDataBlock(mergedData)
120 algorithm = prediction.Batch()
123 algorithm.input.setTable(prediction.data, testData)
124 algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
127 predictionResult = algorithm.compute()
129 printNumericTable(predictionResult.get(prediction.prediction),
130 "Ridge Regression prediction results: (first 10 rows):", 10)
131 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):", 10)
134 if __name__ ==
"__main__":
135 trainingResult = trainModel()
136 testModel(trainingResult)