22 import daal.algorithms.linear_regression
as linear_regression
23 import daal.algorithms.linear_regression.quality_metric_set
as quality_metric_set
24 from daal.algorithms.linear_regression
import training, prediction
25 from daal.algorithms.linear_regression.quality_metric
import single_beta, group_of_betas
26 from daal.data_management
import (
27 DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable,
28 NumericTableIface, BlockDescriptor, readWrite
31 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
32 if utils_folder
not in sys.path:
33 sys.path.insert(0, utils_folder)
34 from utils
import printNumericTable
36 trainDatasetFileName = os.path.join(
'..',
'data',
'batch',
'linear_regression_train.csv')
39 nDependentVariables = 2
45 trainDependentVariables =
None
47 def trainModel(algorithm):
48 global trainingResult, trainData, trainDependentVariables
51 algorithm.input.set(training.data, trainData)
52 algorithm.input.set(training.dependentVariables, trainDependentVariables)
55 trainingResult = algorithm.compute()
56 printNumericTable(trainingResult.get(training.model).getBeta(),
"Linear Regression coefficients:")
58 def predictResults(trainData):
60 algorithm = prediction.Batch()
63 algorithm.input.setTable(prediction.data, trainData)
64 algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
67 predictionResult = algorithm.compute()
68 return predictionResult.get(prediction.prediction)
70 def predictReducedModelResults(trainData):
71 model = trainingResult.get(training.model)
73 betas = model.getBeta()
74 nBetas = model.getNumberOfBetas()
78 savedBeta = [[
None] * nBetas
for _
in range(nDependentVariables)]
80 block = BlockDescriptor()
81 betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
82 pBeta = block.getArray()
84 for i
in range(0, nDependentVariables):
85 savedBeta[i][j1] = pBeta[i][j1]
86 savedBeta[i][j2] = pBeta[i][j2]
89 betas.releaseBlockOfRows(block)
91 predictedResults = predictResults(trainData)
93 block = BlockDescriptor()
94 betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
95 pBeta = block.getArray()
97 for i
in range(0, nDependentVariables):
98 pBeta[i][j1] = savedBeta[i][j1]
99 pBeta[i][j2] = savedBeta[i][j2]
100 betas.releaseBlockOfRows(block)
101 return predictedResults
103 def testModelQuality():
104 global trainingResult, qmsResult
106 predictedResults = predictResults(trainData)
107 printNumericTable(trainDependentVariables,
"Expected responses (first 20 rows):", 20)
108 printNumericTable(predictedResults,
"Predicted responses (first 20 rows):", 20)
110 model = trainingResult.get(linear_regression.training.model)
111 predictedReducedModelResults = predictReducedModelResults(trainData)
112 printNumericTable(predictedReducedModelResults,
"Responses predicted with reduced model (first 20 rows):", 20)
115 nBetaReducedModel = model.getNumberOfBetas() - 2
116 qualityMetricSet = quality_metric_set.Batch(model.getNumberOfBetas(), nBetaReducedModel)
117 singleBeta = single_beta.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.singleBeta))
118 singleBeta.setDataInput(single_beta.expectedResponses, trainDependentVariables)
119 singleBeta.setDataInput(single_beta.predictedResponses, predictedResults)
120 singleBeta.setModelInput(single_beta.model, model)
123 groupOfBetas = group_of_betas.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.groupOfBetas))
124 groupOfBetas.set(group_of_betas.expectedResponses, trainDependentVariables)
125 groupOfBetas.set(group_of_betas.predictedResponses, predictedResults)
126 groupOfBetas.set(group_of_betas.predictedReducedModelResponses, predictedReducedModelResults)
129 qualityMetricSet.compute()
132 qmsResult = qualityMetricSet.getResultCollection()
136 print (
"Quality metrics for a single beta")
137 result = single_beta.Result.downCast(qmsResult.getResult(quality_metric_set.singleBeta))
138 printNumericTable(result.getResult(single_beta.rms),
"Root means square errors for each response (dependent variable):")
139 printNumericTable(result.getResult(single_beta.variance),
"Variance for each response (dependent variable):")
140 printNumericTable(result.getResult(single_beta.zScore),
"Z-score statistics:")
141 printNumericTable(result.getResult(single_beta.confidenceIntervals),
"Confidence intervals for each beta coefficient:")
142 printNumericTable(result.getResult(single_beta.inverseOfXtX),
"Inverse(Xt * X) matrix:")
144 coll = result.getResultDataCollection(single_beta.betaCovariances)
145 for i
in range(0, coll.size()):
146 message =
"Variance-covariance matrix for betas of " + str(i) +
"-th response\n"
147 betaCov = result.get(single_beta.betaCovariances, i)
148 printNumericTable(betaCov, message)
151 print (
"Quality metrics for a group of betas")
152 result = group_of_betas.Result.downCast(qmsResult.getResult(quality_metric_set.groupOfBetas))
154 printNumericTable(result.get(group_of_betas.expectedMeans),
"Means of expected responses for each dependent variable:", 0, 0, 20)
155 printNumericTable(result.get(group_of_betas.expectedVariance),
"Variance of expected responses for each dependent variable:", 0, 0, 20)
156 printNumericTable(result.get(group_of_betas.regSS),
"Regression sum of squares of expected responses:", 0, 0, 20)
157 printNumericTable(result.get(group_of_betas.resSS),
"Sum of squares of residuals for each dependent variable:", 0, 0, 20)
158 printNumericTable(result.get(group_of_betas.tSS),
"Total sum of squares for each dependent variable:", 0, 0, 20)
159 printNumericTable(result.get(group_of_betas.determinationCoeff),
"Determination coefficient for each dependent variable:", 0, 0, 20)
160 printNumericTable(result.get(group_of_betas.fStatistics),
"F-statistics for each dependent variable:", 0, 0, 20)
162 if __name__ ==
"__main__":
165 dataSource = FileDataSource(trainDatasetFileName,
166 DataSourceIface.notAllocateNumericTable,
167 DataSourceIface.doDictionaryFromContext)
170 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
171 trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
172 mergedData = MergedNumericTable(trainData, trainDependentVariables)
175 dataSource.loadDataBlock(mergedData)
177 for i
in range(0, 2):
179 print (
"Train model with normal equation algorithm.")
180 algorithm = training.Batch()
181 trainModel(algorithm)
183 print (
"Train model with QR algorithm.")
184 algorithm = training.Batch(method=training.qrDense)
185 trainModel(algorithm)