48 import daal.algorithms.linear_regression
as linear_regression
49 import daal.algorithms.linear_regression.quality_metric_set
as quality_metric_set
50 from daal.algorithms.linear_regression
import training, prediction
51 from daal.algorithms.linear_regression.quality_metric
import single_beta, group_of_betas
52 from daal.data_management
import (
53 DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable,
54 NumericTableIface, BlockDescriptor, readWrite
57 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
58 if utils_folder
not in sys.path:
59 sys.path.insert(0, utils_folder)
60 from utils
import printNumericTable
62 trainDatasetFileName = os.path.join(
'..',
'data',
'batch',
'linear_regression_train.csv')
65 nDependentVariables = 2
71 trainDependentVariables =
None
73 def trainModel(algorithm):
74 global trainingResult, trainData, trainDependentVariables
77 algorithm.input.set(training.data, trainData)
78 algorithm.input.set(training.dependentVariables, trainDependentVariables)
81 trainingResult = algorithm.compute()
82 printNumericTable(trainingResult.get(training.model).getBeta(),
"Linear Regression coefficients:")
84 def predictResults(trainData):
86 algorithm = prediction.Batch()
89 algorithm.input.setTable(prediction.data, trainData)
90 algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
93 predictionResult = algorithm.compute()
94 return predictionResult.get(prediction.prediction)
96 def predictReducedModelResults(trainData):
97 model = trainingResult.get(training.model)
99 betas = model.getBeta()
100 nBetas = model.getNumberOfBetas()
104 savedBeta = [
None] * (nBetas * nDependentVariables)
106 block = BlockDescriptor()
107 betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
108 pBeta = block.getArray().flatten()
110 for i
in range(0, nDependentVariables):
111 savedBeta[nDependentVariables * i + j1] = pBeta[nDependentVariables * i + j1]
112 savedBeta[nDependentVariables * i + j2] = pBeta[nDependentVariables * i + j2]
113 pBeta[nDependentVariables * i + j1] = 0
114 pBeta[nDependentVariables * i + j2] = 0
115 betas.releaseBlockOfRows(block)
117 predictedResults = predictResults(trainData)
119 block = BlockDescriptor()
120 betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
121 pBeta = block.getArray().flatten()
123 savedBeta = [
None] * nBetas * nDependentVariables
124 for i
in range(0, nDependentVariables):
125 pBeta[nDependentVariables * i + j1] = savedBeta[nDependentVariables * i + j1]
126 pBeta[nDependentVariables * i + j2] = savedBeta[nDependentVariables * i + j2]
127 betas.releaseBlockOfRows(block)
128 return predictedResults
130 def testModelQuality():
131 global trainingResult, qmsResult
133 predictedResults = predictResults(trainData)
134 printNumericTable(trainDependentVariables,
"Expected responses (first 20 rows):", 20)
135 printNumericTable(predictedResults,
"Predicted responses (first 20 rows):", 20)
137 model = trainingResult.get(linear_regression.training.model)
138 predictedReducedModelResults = predictReducedModelResults(trainData)
139 printNumericTable(predictedReducedModelResults,
"Responses predicted with reduced model (first 20 rows):", 20)
142 nBetaReducedModel = model.getNumberOfBetas() - 2
143 qualityMetricSet = quality_metric_set.Batch(model.getNumberOfBetas(), nBetaReducedModel)
144 singleBeta = single_beta.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.singleBeta))
145 singleBeta.setDataInput(single_beta.expectedResponses, trainDependentVariables)
146 singleBeta.setDataInput(single_beta.predictedResponses, predictedResults)
147 singleBeta.setModelInput(single_beta.model, model)
150 groupOfBetas = group_of_betas.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.groupOfBetas))
151 groupOfBetas.set(group_of_betas.expectedResponses, trainDependentVariables)
152 groupOfBetas.set(group_of_betas.predictedResponses, predictedResults)
153 groupOfBetas.set(group_of_betas.predictedReducedModelResponses, predictedReducedModelResults)
156 qualityMetricSet.compute()
159 qmsResult = qualityMetricSet.getResultCollection()
163 print (
"Quality metrics for a single beta")
164 result = single_beta.Result.downCast(qmsResult.getResult(quality_metric_set.singleBeta))
165 printNumericTable(result.getResult(single_beta.rms),
"Root means square errors for each response (dependent variable):")
166 printNumericTable(result.getResult(single_beta.variance),
"Variance for each response (dependent variable):")
167 printNumericTable(result.getResult(single_beta.zScore),
"Z-score statistics:")
168 printNumericTable(result.getResult(single_beta.confidenceIntervals),
"Confidence intervals for each beta coefficient:")
169 printNumericTable(result.getResult(single_beta.inverseOfXtX),
"Inverse(Xt * X) matrix:")
171 coll = result.getResultDataCollection(single_beta.betaCovariances)
172 for i
in range(0, coll.size()):
173 message =
"Variance-covariance matrix for betas of " + str(i) +
"-th response\n"
174 betaCov = result.get(single_beta.betaCovariances, i)
175 printNumericTable(betaCov, message)
178 print (
"Quality metrics for a group of betas")
179 result = group_of_betas.Result.downCast(qmsResult.getResult(quality_metric_set.groupOfBetas))
181 printNumericTable(result.get(group_of_betas.expectedMeans),
"Means of expected responses for each dependent variable:", 0, 0, 20)
182 printNumericTable(result.get(group_of_betas.expectedVariance),
"Variance of expected responses for each dependent variable:", 0, 0, 20)
183 printNumericTable(result.get(group_of_betas.regSS),
"Regression sum of squares of expected responses:", 0, 0, 20)
184 printNumericTable(result.get(group_of_betas.resSS),
"Sum of squares of residuals for each dependent variable:", 0, 0, 20)
185 printNumericTable(result.get(group_of_betas.tSS),
"Total sum of squares for each dependent variable:", 0, 0, 20)
186 printNumericTable(result.get(group_of_betas.determinationCoeff),
"Determination coefficient for each dependent variable:", 0, 0, 20)
187 printNumericTable(result.get(group_of_betas.fStatistics),
"F-statistics for each dependent variable:", 0, 0, 20)
189 if __name__ ==
"__main__":
192 dataSource = FileDataSource(trainDatasetFileName,
193 DataSourceIface.notAllocateNumericTable,
194 DataSourceIface.doDictionaryFromContext)
197 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
198 trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
199 mergedData = MergedNumericTable(trainData, trainDependentVariables)
202 dataSource.loadDataBlock(mergedData)
204 for i
in range(0, 2):
206 print (
"Train model with normal equation algorithm.")
207 algorithm = training.Batch()
208 trainModel(algorithm)
210 print (
"Train model with QR algorithm.")
211 algorithm = training.Batch(method=training.qrDense)
212 trainModel(algorithm)