Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 4

ridge_reg_norm_eq_dense_online.py

Deprecation Notice: With the introduction of daal4py, a package that supersedes PyDAAL, Intel is deprecating PyDAAL and will discontinue support starting with Intel® DAAL 2021 and Intel® Distribution for Python 2021. Until then Intel will continue to provide compatible pyDAAL pip and conda packages for newer releases of Intel DAAL and make it available in open source. However, Intel will not add the new features of Intel DAAL to pyDAAL. Intel recommends developers switch to and use daal4py.

Note: To find daal4py examples, refer to daal4py documentation or browse github repository.

1 # file: ridge_reg_norm_eq_dense_online.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example of ridge regression in the online processing mode.
19 # !
20 # ! The program trains the ridge regression model on a training datasetFileName
21 # ! with the normal equations method and computes regression for the test data.
22 # !*****************************************************************************
23 
24 #
25 ## <a name="DAAL-EXAMPLE-PY-RIDGE_REGRESSION_NORM_EQ_ONLINE"></a>
26 ## \example ridge_reg_norm_eq_dense_online.py
27 #
28 
29 import os
30 import sys
31 
32 from daal.algorithms.ridge_regression import training, prediction
33 from daal.data_management import DataSource, FileDataSource, NumericTable, HomogenNumericTable, MergedNumericTable
34 
35 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
36 if utils_folder not in sys.path:
37  sys.path.insert(0, utils_folder)
38 from utils import printNumericTable
39 
40 # Input data set parameters
41 trainDatasetFileName = os.path.join("..", "data", "batch", "linear_regression_train.csv")
42 testDatasetFileName = os.path.join("..", "data", "batch", "linear_regression_test.csv")
43 
44 nTrainVectorsInBlock = 250
45 nFeatures = 10 # Number of features in training and testing data sets
46 nDependentVariables = 2 # Number of dependent variables that correspond to each observation
47 
48 
49 def trainModel():
50  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
51  trainDataSource = FileDataSource(trainDatasetFileName,
52  DataSource.notAllocateNumericTable,
53  DataSource.doDictionaryFromContext)
54 
55  # Create Numeric Tables for training data and dependent variables
56  trainData = HomogenNumericTable(nFeatures, 0, NumericTable.doNotAllocate)
57  trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTable.doNotAllocate)
58  mergedData = MergedNumericTable(trainData, trainDependentVariables)
59 
60  # Create an algorithm object to train the ridge regression model
61  algorithm = training.Online()
62 
63  while trainDataSource.loadDataBlock(nTrainVectorsInBlock, mergedData) == nTrainVectorsInBlock:
64  # Pass a training data set and dependent values to the algorithm
65  algorithm.input.set(training.data, trainData)
66  algorithm.input.set(training.dependentVariables, trainDependentVariables)
67 
68  # Update the ridge regression model
69  algorithm.compute()
70 
71 
72  # Finalize the ridge regression model and retrieve the algorithm results
73  trainingResult = algorithm.finalizeCompute()
74 
75  printNumericTable(trainingResult.get(training.model).getBeta(), "Ridge Regression coefficients:")
76  return trainingResult
77 
78 
79 def testModel(trainingResult):
80  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
81  testDataSource = FileDataSource(testDatasetFileName,
82  DataSource.doAllocateNumericTable,
83  DataSource.doDictionaryFromContext)
84 
85  # Create Numeric Tables for testing data and ground truth values
86  testData = HomogenNumericTable(nFeatures, 0, NumericTable.doNotAllocate)
87  testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTable.doNotAllocate)
88  mergedData = MergedNumericTable(testData, testGroundTruth)
89 
90  # Retrieve the data from the input file
91  testDataSource.loadDataBlock(mergedData)
92 
93  # Create an algorithm object to predict values of ridge regression
94  algorithm = prediction.Batch()
95 
96  # Pass a testing data set and the trained model to the algorithm
97  algorithm.input.setTable(prediction.data, testData)
98  algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
99 
100  # Predict values of ridge regression and retrieve the algorithm results
101  predictionResult = algorithm.compute()
102 
103  printNumericTable(predictionResult.get(prediction.prediction),
104  "Ridge Regression prediction results: (first 10 rows):", 10)
105  printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10)
106 
107 
108 if __name__ == "__main__":
109  trainingResult = trainModel()
110  testModel(trainingResult)

For more complete information about compiler optimizations, see our Optimization Notice.