47 import daal.algorithms.linear_regression
as linear_regression
48 import daal.algorithms.linear_regression.quality_metric_set
as quality_metric_set
49 from daal.algorithms.linear_regression
import training, prediction
50 from daal.algorithms.linear_regression.quality_metric
import single_beta, group_of_betas
51 from daal.data_management
import (
52 DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable,
53 NumericTableIface, BlockDescriptor, readWrite
56 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
57 if utils_folder
not in sys.path:
58 sys.path.insert(0, utils_folder)
59 from utils
import printNumericTable
61 trainDatasetFileName = os.path.join(
'..',
'data',
'batch',
'linear_regression_train.csv')
64 nDependentVariables = 2
70 trainDependentVariables =
None
72 def trainModel(algorithm):
73 global trainingResult, trainData, trainDependentVariables
76 algorithm.input.set(training.data, trainData)
77 algorithm.input.set(training.dependentVariables, trainDependentVariables)
80 trainingResult = algorithm.compute()
81 printNumericTable(trainingResult.get(training.model).getBeta(),
"Linear Regression coefficients:")
83 def predictResults(trainData):
85 algorithm = prediction.Batch()
88 algorithm.input.setTable(prediction.data, trainData)
89 algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
92 predictionResult = algorithm.compute()
93 return predictionResult.get(prediction.prediction)
95 def predictReducedModelResults(trainData):
96 model = trainingResult.get(training.model)
98 betas = model.getBeta()
99 nBetas = model.getNumberOfBetas()
103 savedBeta = [
None] * (nBetas * nDependentVariables)
105 block = BlockDescriptor()
106 betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
107 pBeta = block.getArray().flatten()
109 for i
in range(0, nDependentVariables):
110 savedBeta[nDependentVariables * i + j1] = pBeta[nDependentVariables * i + j1]
111 savedBeta[nDependentVariables * i + j2] = pBeta[nDependentVariables * i + j2]
112 pBeta[nDependentVariables * i + j1] = 0
113 pBeta[nDependentVariables * i + j2] = 0
114 betas.releaseBlockOfRows(block)
116 predictedResults = predictResults(trainData)
118 block = BlockDescriptor()
119 betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
120 pBeta = block.getArray().flatten()
122 savedBeta = [
None] * nBetas * nDependentVariables
123 for i
in range(0, nDependentVariables):
124 pBeta[nDependentVariables * i + j1] = savedBeta[nDependentVariables * i + j1]
125 pBeta[nDependentVariables * i + j2] = savedBeta[nDependentVariables * i + j2]
126 betas.releaseBlockOfRows(block)
127 return predictedResults
129 def testModelQuality():
130 global trainingResult, qmsResult
132 predictedResults = predictResults(trainData)
133 printNumericTable(trainDependentVariables,
"Expected responses (first 20 rows):", 20)
134 printNumericTable(predictedResults,
"Predicted responses (first 20 rows):", 20)
136 model = trainingResult.get(linear_regression.training.model)
137 predictedReducedModelResults = predictReducedModelResults(trainData)
138 printNumericTable(predictedReducedModelResults,
"Responses predicted with reduced model (first 20 rows):", 20)
141 nBetaReducedModel = model.getNumberOfBetas() - 2
142 qualityMetricSet = quality_metric_set.Batch(model.getNumberOfBetas(), nBetaReducedModel)
143 singleBeta = single_beta.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.singleBeta))
144 singleBeta.setDataInput(single_beta.expectedResponses, trainDependentVariables)
145 singleBeta.setDataInput(single_beta.predictedResponses, predictedResults)
146 singleBeta.setModelInput(single_beta.model, model)
149 groupOfBetas = group_of_betas.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.groupOfBetas))
150 groupOfBetas.set(group_of_betas.expectedResponses, trainDependentVariables)
151 groupOfBetas.set(group_of_betas.predictedResponses, predictedResults)
152 groupOfBetas.set(group_of_betas.predictedReducedModelResponses, predictedReducedModelResults)
155 qualityMetricSet.compute()
158 qmsResult = qualityMetricSet.getResultCollection()
162 print (
"Quality metrics for a single beta")
163 result = single_beta.Result.downCast(qmsResult.getResult(quality_metric_set.singleBeta))
164 printNumericTable(result.getResult(single_beta.rms),
"Root means square errors for each response (dependent variable):")
165 printNumericTable(result.getResult(single_beta.variance),
"Variance for each response (dependent variable):")
166 printNumericTable(result.getResult(single_beta.zScore),
"Z-score statistics:")
167 printNumericTable(result.getResult(single_beta.confidenceIntervals),
"Confidence intervals for each beta coefficient:")
168 printNumericTable(result.getResult(single_beta.inverseOfXtX),
"Inverse(Xt * X) matrix:")
170 coll = result.getResultDataCollection(single_beta.betaCovariances)
171 for i
in range(0, coll.size()):
172 message =
"Variance-covariance matrix for betas of " + str(i) +
"-th response"
173 betaCov = result.get(single_beta.betaCovariances, i)
174 printNumericTable(betaCov, message)
177 print (
"Quality metrics for a group of betas")
178 result = group_of_betas.Result.downCast(qmsResult.getResult(quality_metric_set.groupOfBetas))
180 printNumericTable(result.get(group_of_betas.expectedMeans),
"Means of expected responses for each dependent variable:", 0, 0, 20)
181 printNumericTable(result.get(group_of_betas.expectedVariance),
"Variance of expected responses for each dependent variable:", 0, 0, 20)
182 printNumericTable(result.get(group_of_betas.regSS),
"Regression sum of squares of expected responses:", 0, 0, 20)
183 printNumericTable(result.get(group_of_betas.resSS),
"Sum of squares of residuals for each dependent variable:", 0, 0, 20)
184 printNumericTable(result.get(group_of_betas.tSS),
"Total sum of squares for each dependent variable:", 0, 0, 20)
185 printNumericTable(result.get(group_of_betas.determinationCoeff),
"Determination coefficient for each dependent variable:", 0, 0, 20)
186 printNumericTable(result.get(group_of_betas.fStatistics),
"F-statistics for each dependent variable:", 0, 0, 20)
188 if __name__ ==
"__main__":
191 dataSource = FileDataSource(trainDatasetFileName,
192 DataSourceIface.notAllocateNumericTable,
193 DataSourceIface.doDictionaryFromContext)
196 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
197 trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
198 mergedData = MergedNumericTable(trainData, trainDependentVariables)
201 dataSource.loadDataBlock(mergedData)
203 for i
in range(0, 2):
205 print (
"Train model with normal equation algorithm.")
206 algorithm = training.Batch()
207 trainModel(algorithm)
209 print (
"Train model with QR algorithm.")
210 algorithm = training.Batch(method=training.qrDense)
211 trainModel(algorithm)