Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 4

lin_reg_metrics_dense_batch.py

Deprecation Notice: With the introduction of daal4py, a package that supersedes PyDAAL, Intel is deprecating PyDAAL and will discontinue support starting with Intel® DAAL 2021 and Intel® Distribution for Python 2021. Until then Intel will continue to provide compatible pyDAAL pip and conda packages for newer releases of Intel DAAL and make it available in open source. However, Intel will not add the new features of Intel DAAL to pyDAAL. Intel recommends developers switch to and use daal4py.

Note: To find daal4py examples, refer to daal4py documentation or browse github repository.

1 # file: lin_reg_metrics_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 ## <a name="DAAL-EXAMPLE-PY-LIN_REG_METRICS_DENSE_BATCH"></a>
17 ## \example lin_reg_metrics_dense_batch.py
18 
19 import os
20 import sys
21 
22 import daal.algorithms.linear_regression as linear_regression
23 import daal.algorithms.linear_regression.quality_metric_set as quality_metric_set
24 from daal.algorithms.linear_regression import training, prediction
25 from daal.algorithms.linear_regression.quality_metric import single_beta, group_of_betas
26 from daal.data_management import (
27  DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable,
28  NumericTableIface, BlockDescriptor, readWrite
29 )
30 
31 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
32 if utils_folder not in sys.path:
33  sys.path.insert(0, utils_folder)
34 from utils import printNumericTable
35 
36 trainDatasetFileName = os.path.join('..', 'data', 'batch', 'linear_regression_train.csv')
37 
38 nFeatures = 10
39 nDependentVariables = 2
40 
41 trainingResult = None
42 # predictionResult = None
43 qmsResult = None
44 trainData = None
45 trainDependentVariables = None
46 
47 def trainModel(algorithm):
48  global trainingResult, trainData, trainDependentVariables
49 
50  # Pass a training data set and dependent values to the algorithm
51  algorithm.input.set(training.data, trainData)
52  algorithm.input.set(training.dependentVariables, trainDependentVariables)
53 
54  # Build the multiple linear regression model and retrieve the algorithm results
55  trainingResult = algorithm.compute()
56  printNumericTable(trainingResult.get(training.model).getBeta(), "Linear Regression coefficients:")
57 
58 def predictResults(trainData):
59  # Create an algorithm object to predict values of multiple linear regression
60  algorithm = prediction.Batch()
61 
62  # Pass a testing data set and the trained model to the algorithm
63  algorithm.input.setTable(prediction.data, trainData)
64  algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
65 
66  # Predict values of multiple linear regression and retrieve the algorithm results
67  predictionResult = algorithm.compute()
68  return predictionResult.get(prediction.prediction)
69 
70 def predictReducedModelResults(trainData):
71  model = trainingResult.get(training.model)
72 
73  betas = model.getBeta()
74  nBetas = model.getNumberOfBetas()
75 
76  j1 = 2
77  j2 = 10
78  savedBeta = [[None] * nBetas for _ in range(nDependentVariables)]
79 
80  block = BlockDescriptor()
81  betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
82  pBeta = block.getArray()
83 
84  for i in range(0, nDependentVariables):
85  savedBeta[i][j1] = pBeta[i][j1]
86  savedBeta[i][j2] = pBeta[i][j2]
87  pBeta[i][j1] = 0
88  pBeta[i][j2] = 0
89  betas.releaseBlockOfRows(block)
90 
91  predictedResults = predictResults(trainData)
92 
93  block = BlockDescriptor()
94  betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
95  pBeta = block.getArray()
96 
97  for i in range(0, nDependentVariables):
98  pBeta[i][j1] = savedBeta[i][j1]
99  pBeta[i][j2] = savedBeta[i][j2]
100  betas.releaseBlockOfRows(block)
101  return predictedResults
102 
103 def testModelQuality():
104  global trainingResult, qmsResult
105 
106  predictedResults = predictResults(trainData)
107  printNumericTable(trainDependentVariables, "Expected responses (first 20 rows):", 20)
108  printNumericTable(predictedResults, "Predicted responses (first 20 rows):", 20)
109 
110  model = trainingResult.get(linear_regression.training.model)
111  predictedReducedModelResults = predictReducedModelResults(trainData)
112  printNumericTable(predictedReducedModelResults, "Responses predicted with reduced model (first 20 rows):", 20)
113 
114  # Create a quality metric set object to compute quality metrics of the linear regression algorithm
115  nBetaReducedModel = model.getNumberOfBetas() - 2
116  qualityMetricSet = quality_metric_set.Batch(model.getNumberOfBetas(), nBetaReducedModel)
117  singleBeta = single_beta.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.singleBeta))
118  singleBeta.setDataInput(single_beta.expectedResponses, trainDependentVariables)
119  singleBeta.setDataInput(single_beta.predictedResponses, predictedResults)
120  singleBeta.setModelInput(single_beta.model, model)
121 
122  # Set input for a group of betas metrics algorithm
123  groupOfBetas = group_of_betas.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.groupOfBetas))
124  groupOfBetas.set(group_of_betas.expectedResponses, trainDependentVariables)
125  groupOfBetas.set(group_of_betas.predictedResponses, predictedResults)
126  groupOfBetas.set(group_of_betas.predictedReducedModelResponses, predictedReducedModelResults)
127 
128  # Compute quality metrics
129  qualityMetricSet.compute()
130 
131  # Retrieve the quality metrics
132  qmsResult = qualityMetricSet.getResultCollection()
133 
134 def printResults():
135  # Print the quality metrics for a single beta
136  print ("Quality metrics for a single beta")
137  result = single_beta.Result.downCast(qmsResult.getResult(quality_metric_set.singleBeta))
138  printNumericTable(result.getResult(single_beta.rms), "Root means square errors for each response (dependent variable):")
139  printNumericTable(result.getResult(single_beta.variance), "Variance for each response (dependent variable):")
140  printNumericTable(result.getResult(single_beta.zScore), "Z-score statistics:")
141  printNumericTable(result.getResult(single_beta.confidenceIntervals), "Confidence intervals for each beta coefficient:")
142  printNumericTable(result.getResult(single_beta.inverseOfXtX), "Inverse(Xt * X) matrix:")
143 
144  coll = result.getResultDataCollection(single_beta.betaCovariances)
145  for i in range(0, coll.size()):
146  message = "Variance-covariance matrix for betas of " + str(i) + "-th response\n"
147  betaCov = result.get(single_beta.betaCovariances, i)
148  printNumericTable(betaCov, message)
149 
150  # Print quality metrics for a group of betas
151  print ("Quality metrics for a group of betas")
152  result = group_of_betas.Result.downCast(qmsResult.getResult(quality_metric_set.groupOfBetas))
153 
154  printNumericTable(result.get(group_of_betas.expectedMeans), "Means of expected responses for each dependent variable:", 0, 0, 20)
155  printNumericTable(result.get(group_of_betas.expectedVariance), "Variance of expected responses for each dependent variable:", 0, 0, 20)
156  printNumericTable(result.get(group_of_betas.regSS), "Regression sum of squares of expected responses:", 0, 0, 20)
157  printNumericTable(result.get(group_of_betas.resSS), "Sum of squares of residuals for each dependent variable:", 0, 0, 20)
158  printNumericTable(result.get(group_of_betas.tSS), "Total sum of squares for each dependent variable:", 0, 0, 20)
159  printNumericTable(result.get(group_of_betas.determinationCoeff), "Determination coefficient for each dependent variable:", 0, 0, 20)
160  printNumericTable(result.get(group_of_betas.fStatistics), "F-statistics for each dependent variable:", 0, 0, 20)
161 
162 if __name__ == "__main__":
163 
164  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
165  dataSource = FileDataSource(trainDatasetFileName,
166  DataSourceIface.notAllocateNumericTable,
167  DataSourceIface.doDictionaryFromContext)
168 
169  # Create Numeric Tables for data and values for dependent variable
170  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
171  trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
172  mergedData = MergedNumericTable(trainData, trainDependentVariables)
173 
174  # Retrieve the data from the input file
175  dataSource.loadDataBlock(mergedData)
176 
177  for i in range(0, 2):
178  if i == 0:
179  print ("Train model with normal equation algorithm.")
180  algorithm = training.Batch()
181  trainModel(algorithm)
182  else:
183  print ("Train model with QR algorithm.")
184  algorithm = training.Batch(method=training.qrDense)
185  trainModel(algorithm)
186  testModelQuality()
187  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.