Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 5

lin_reg_metrics_dense_batch.py

1 # file: lin_reg_metrics_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 
17 
18 
19 import os
20 import sys
21 
22 import daal.algorithms.linear_regression as linear_regression
23 import daal.algorithms.linear_regression.quality_metric_set as quality_metric_set
24 from daal.algorithms.linear_regression import training, prediction
25 from daal.algorithms.linear_regression.quality_metric import single_beta, group_of_betas
26 from daal.data_management import (
27  DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable,
28  NumericTableIface, BlockDescriptor, readWrite
29 )
30 
31 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
32 if utils_folder not in sys.path:
33  sys.path.insert(0, utils_folder)
34 from utils import printNumericTable
35 
36 trainDatasetFileName = os.path.join('..', 'data', 'batch', 'linear_regression_train.csv')
37 
38 nFeatures = 10
39 nDependentVariables = 2
40 
41 trainingResult = None
42 # predictionResult = None
43 qmsResult = None
44 trainData = None
45 trainDependentVariables = None
46 
47 def trainModel(algorithm):
48  global trainingResult, trainData, trainDependentVariables
49 
50  # Pass a training data set and dependent values to the algorithm
51  algorithm.input.set(training.data, trainData)
52  algorithm.input.set(training.dependentVariables, trainDependentVariables)
53 
54  # Build the multiple linear regression model and retrieve the algorithm results
55  trainingResult = algorithm.compute()
56  printNumericTable(trainingResult.get(training.model).getBeta(), "Linear Regression coefficients:")
57 
58 def predictResults(trainData):
59  # Create an algorithm object to predict values of multiple linear regression
60  algorithm = prediction.Batch()
61 
62  # Pass a testing data set and the trained model to the algorithm
63  algorithm.input.setTable(prediction.data, trainData)
64  algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
65 
66  # Predict values of multiple linear regression and retrieve the algorithm results
67  predictionResult = algorithm.compute()
68  return predictionResult.get(prediction.prediction)
69 
70 def predictReducedModelResults(trainData):
71  model = trainingResult.get(training.model)
72 
73  betas = model.getBeta()
74  nBetas = model.getNumberOfBetas()
75 
76  j1 = 2
77  j2 = 10
78  savedBeta = [[None] * nBetas for _ in range(nDependentVariables)]
79 
80  block = BlockDescriptor()
81  betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
82  pBeta = block.getArray()
83 
84  for i in range(0, nDependentVariables):
85  savedBeta[i][j1] = pBeta[i][j1]
86  savedBeta[i][j2] = pBeta[i][j2]
87  pBeta[i][j1] = 0
88  pBeta[i][j2] = 0
89  betas.releaseBlockOfRows(block)
90 
91  predictedResults = predictResults(trainData)
92 
93  block = BlockDescriptor()
94  betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
95  pBeta = block.getArray()
96 
97  for i in range(0, nDependentVariables):
98  pBeta[i][j1] = savedBeta[i][j1]
99  pBeta[i][j2] = savedBeta[i][j2]
100  betas.releaseBlockOfRows(block)
101  return predictedResults
102 
103 def testModelQuality():
104  global trainingResult, qmsResult
105 
106  predictedResults = predictResults(trainData)
107  printNumericTable(trainDependentVariables, "Expected responses (first 20 rows):", 20)
108  printNumericTable(predictedResults, "Predicted responses (first 20 rows):", 20)
109 
110  model = trainingResult.get(linear_regression.training.model)
111  predictedReducedModelResults = predictReducedModelResults(trainData)
112  printNumericTable(predictedReducedModelResults, "Responses predicted with reduced model (first 20 rows):", 20)
113 
114  # Create a quality metric set object to compute quality metrics of the linear regression algorithm
115  nBetaReducedModel = model.getNumberOfBetas() - 2
116  qualityMetricSet = quality_metric_set.Batch(model.getNumberOfBetas(), nBetaReducedModel)
117  singleBeta = single_beta.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.singleBeta))
118  singleBeta.setDataInput(single_beta.expectedResponses, trainDependentVariables)
119  singleBeta.setDataInput(single_beta.predictedResponses, predictedResults)
120  singleBeta.setModelInput(single_beta.model, model)
121 
122  # Set input for a group of betas metrics algorithm
123  groupOfBetas = group_of_betas.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.groupOfBetas))
124  groupOfBetas.set(group_of_betas.expectedResponses, trainDependentVariables)
125  groupOfBetas.set(group_of_betas.predictedResponses, predictedResults)
126  groupOfBetas.set(group_of_betas.predictedReducedModelResponses, predictedReducedModelResults)
127 
128  # Compute quality metrics
129  qualityMetricSet.compute()
130 
131  # Retrieve the quality metrics
132  qmsResult = qualityMetricSet.getResultCollection()
133 
134 def printResults():
135  # Print the quality metrics for a single beta
136  print ("Quality metrics for a single beta")
137  result = single_beta.Result.downCast(qmsResult.getResult(quality_metric_set.singleBeta))
138  printNumericTable(result.getResult(single_beta.rms), "Root means square errors for each response (dependent variable):")
139  printNumericTable(result.getResult(single_beta.variance), "Variance for each response (dependent variable):")
140  printNumericTable(result.getResult(single_beta.zScore), "Z-score statistics:")
141  printNumericTable(result.getResult(single_beta.confidenceIntervals), "Confidence intervals for each beta coefficient:")
142  printNumericTable(result.getResult(single_beta.inverseOfXtX), "Inverse(Xt * X) matrix:")
143 
144  coll = result.getResultDataCollection(single_beta.betaCovariances)
145  for i in range(0, coll.size()):
146  message = "Variance-covariance matrix for betas of " + str(i) + "-th response\n"
147  betaCov = result.get(single_beta.betaCovariances, i)
148  printNumericTable(betaCov, message)
149 
150  # Print quality metrics for a group of betas
151  print ("Quality metrics for a group of betas")
152  result = group_of_betas.Result.downCast(qmsResult.getResult(quality_metric_set.groupOfBetas))
153 
154  printNumericTable(result.get(group_of_betas.expectedMeans), "Means of expected responses for each dependent variable:", 0, 0, 20)
155  printNumericTable(result.get(group_of_betas.expectedVariance), "Variance of expected responses for each dependent variable:", 0, 0, 20)
156  printNumericTable(result.get(group_of_betas.regSS), "Regression sum of squares of expected responses:", 0, 0, 20)
157  printNumericTable(result.get(group_of_betas.resSS), "Sum of squares of residuals for each dependent variable:", 0, 0, 20)
158  printNumericTable(result.get(group_of_betas.tSS), "Total sum of squares for each dependent variable:", 0, 0, 20)
159  printNumericTable(result.get(group_of_betas.determinationCoeff), "Determination coefficient for each dependent variable:", 0, 0, 20)
160  printNumericTable(result.get(group_of_betas.fStatistics), "F-statistics for each dependent variable:", 0, 0, 20)
161 
162 if __name__ == "__main__":
163 
164  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
165  dataSource = FileDataSource(trainDatasetFileName,
166  DataSourceIface.notAllocateNumericTable,
167  DataSourceIface.doDictionaryFromContext)
168 
169  # Create Numeric Tables for data and values for dependent variable
170  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
171  trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
172  mergedData = MergedNumericTable(trainData, trainDependentVariables)
173 
174  # Retrieve the data from the input file
175  dataSource.loadDataBlock(mergedData)
176 
177  for i in range(0, 2):
178  if i == 0:
179  print ("Train model with normal equation algorithm.")
180  algorithm = training.Batch()
181  trainModel(algorithm)
182  else:
183  print ("Train model with QR algorithm.")
184  algorithm = training.Batch(method=training.qrDense)
185  trainModel(algorithm)
186  testModelQuality()
187  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.