#include "daal.h"
#include "service.h"
#include "algorithms/linear_regression/linear_regression_quality_metric_set_batch.h"
#include "algorithms/linear_regression/linear_regression_quality_metric_set_types.h"
#include "algorithms/linear_regression/linear_regression_single_beta_batch.h"
#include "algorithms/linear_regression/linear_regression_single_beta_types.h"
#include "algorithms/linear_regression/linear_regression_group_of_betas_types.h"
#include "algorithms/linear_regression/linear_regression_group_of_betas_batch.h"
using namespace std;
using namespace daal;
using namespace daal::services;
using namespace daal::algorithms::linear_regression;
using namespace daal::algorithms::linear_regression::quality_metric;
string trainDatasetFileName = "../data/batch/linear_regression_train.csv";
string testDatasetFileName = "../data/batch/linear_regression_test.csv";
const size_t nFeatures = 10;
const size_t nDependentVariables = 2;
template <class TrainingAlgorithm>
void trainModel(TrainingAlgorithm& algo);
void testModelQuality();
void printResults();
training::ResultPtr trainingResult;
quality_metric_set::ResultCollectionPtr qmsResult;
NumericTablePtr trainData;
NumericTablePtr trainDependentVariables;
int main(int argc, char *argv[])
{
checkArguments(argc, argv, 2, &trainDatasetFileName, &testDatasetFileName);
FileDataSource<CSVFeatureManager> trainDataSource(trainDatasetFileName,
DataSource::notAllocateNumericTable,
DataSource::doDictionaryFromContext);
trainData = NumericTablePtr(new HomogenNumericTable<>(nFeatures, 0, NumericTable::doNotAllocate));
trainDependentVariables = NumericTablePtr(new HomogenNumericTable<>(nDependentVariables, 0, NumericTable::doNotAllocate));
NumericTablePtr mergedData(new MergedNumericTable(trainData, trainDependentVariables));
trainDataSource.loadDataBlock(mergedData.get());
for(size_t i = 0; i < 2; ++i)
{
if(i == 0)
{
std::cout << "Train model with normal equation algorithm." << std::endl;
training::Batch<> algorithm;
trainModel(algorithm);
}
else
{
std::cout << "Train model with QR algorithm." << std::endl;
training::Batch<float, training::qrDense> algorithm;
trainModel(algorithm);
}
testModelQuality();
printResults();
}
return 0;
}
template <class TrainingAlgorithm>
void trainModel(TrainingAlgorithm& algorithm)
{
algorithm.input.set(training::data, trainData);
algorithm.input.set(training::dependentVariables, trainDependentVariables);
algorithm.compute();
trainingResult = algorithm.getResult();
printNumericTable(trainingResult->get(training::model)->getBeta(), "Linear Regression coefficients:");
}
NumericTablePtr predictResults(NumericTablePtr& data)
{
prediction::Batch<> algorithm;
algorithm.input.set(prediction::data, data);
algorithm.input.set(prediction::model, trainingResult->get(training::model));
algorithm.compute();
prediction::ResultPtr predictionResult = algorithm.getResult();
return predictionResult->get(prediction::prediction);
}
NumericTablePtr predictReducedModelResults(NumericTablePtr& data)
{
ModelPtr model = trainingResult->get(training::model);
NumericTablePtr betas = model->getBeta();
const size_t nBetas = model->getNumberOfBetas();
size_t j1 = 2;
size_t j2 = 10;
float* savedBeta = new float[nBetas * nDependentVariables];
{
BlockDescriptor<> block;
betas->getBlockOfRows(0, nDependentVariables, readWrite, block);
float* pBeta = block.getBlockPtr();
for(size_t i = 0; i < nDependentVariables; ++i)
{
savedBeta[nDependentVariables*i + j1] = pBeta[nDependentVariables*i + j1];
savedBeta[nDependentVariables*i + j2] = pBeta[nDependentVariables*i + j2];
pBeta[nDependentVariables*i + j1] = 0;
pBeta[nDependentVariables*i + j2] = 0;
}
betas->releaseBlockOfRows(block);
}
NumericTablePtr predictedResults = predictResults(trainData);
{
BlockDescriptor<> block;
betas->getBlockOfRows(0, nDependentVariables, readWrite, block);
float* pBeta = block.getBlockPtr();
for(size_t i = 0; i < nDependentVariables; ++i)
{
pBeta[nDependentVariables*i + j1] = savedBeta[nDependentVariables*i + j1];
pBeta[nDependentVariables*i + j2] = savedBeta[nDependentVariables*i + j2];
}
betas->releaseBlockOfRows(block);
}
delete[] savedBeta;
return predictedResults;
}
void testModelQuality()
{
NumericTablePtr predictedResults = predictResults(trainData);
printNumericTable(trainDependentVariables, "Expected responses (first 20 rows):", 20);
printNumericTable(predictedResults, "Predicted responses (first 20 rows):", 20);
ModelPtr model = trainingResult->get(training::model);
NumericTablePtr predictedReducedModelResults = predictReducedModelResults(trainData);
printNumericTable(predictedReducedModelResults, "Responses predicted with reduced model (first 20 rows):", 20);
const size_t nBetaReducedModel = model->getNumberOfBetas() - 2;
quality_metric_set::Batch qualityMetricSet(model->getNumberOfBetas(), nBetaReducedModel);
algorithms::InputPtr algInput =
qualityMetricSet.getInputDataCollection()->getInput(quality_metric_set::singleBeta);
single_beta::InputPtr singleBeta = single_beta::Input::cast(algInput);
singleBeta->set(single_beta::expectedResponses, trainDependentVariables);
singleBeta->set(single_beta::predictedResponses, predictedResults);
singleBeta->set(single_beta::model, model);
algInput = qualityMetricSet.getInputDataCollection()->getInput(quality_metric_set::groupOfBetas);
group_of_betas::InputPtr groupOfBetas = group_of_betas::Input::cast(algInput);
groupOfBetas->set(group_of_betas::expectedResponses, trainDependentVariables);
groupOfBetas->set(group_of_betas::predictedResponses, predictedResults);
groupOfBetas->set(group_of_betas::predictedReducedModelResponses, predictedReducedModelResults);
qualityMetricSet.compute();
qmsResult = qualityMetricSet.getResultCollection();
}
void printResults()
{
std::cout << "Quality metrics for a single beta" << std::endl;
{
single_beta::ResultPtr result = single_beta::Result::cast(qmsResult->getResult(quality_metric_set::singleBeta));
if(!result)
return;
printNumericTable(result->get(single_beta::rms), "Root means square errors for each response (dependent variable):");
printNumericTable(result->get(single_beta::variance), "Variance for each response (dependent variable):");
printNumericTable(result->get(single_beta::zScore), "Z-score statistics:");
printNumericTable(result->get(single_beta::confidenceIntervals), "Confidence intervals for each beta coefficient:");
printNumericTable(result->get(single_beta::inverseOfXtX), "Inverse(Xt * X) matrix:");
data_management::DataCollectionPtr coll = result->get(single_beta::betaCovariances);
for(size_t i = 0; i < coll->size(); ++i)
{
std::ostringstream str;
str << "Variance-covariance matrix for betas of " << i << "-th response" << std::endl;
NumericTablePtr betaCov = data_management::NumericTable::cast((*coll)[i]);
printNumericTable(betaCov, str.str().c_str());
}
}
std::cout << "Quality metrics for a group of betas" << std::endl;
{
group_of_betas::ResultPtr result = group_of_betas::Result::cast(qmsResult->getResult(quality_metric_set::groupOfBetas));
if(!result)
return;
printNumericTable(result->get(group_of_betas::expectedMeans), "Means of expected responses for each dependent variable:", 0, 0, 20);
printNumericTable(result->get(group_of_betas::expectedVariance), "Variance of expected responses for each dependent variable:", 0, 0, 20);
printNumericTable(result->get(group_of_betas::regSS), "Regression sum of squares of expected responses:", 0, 0, 20);
printNumericTable(result->get(group_of_betas::resSS), "Sum of squares of residuals for each dependent variable:", 0, 0, 20);
printNumericTable(result->get(group_of_betas::tSS), "Total sum of squares for each dependent variable:", 0, 0, 20);
printNumericTable(result->get(group_of_betas::determinationCoeff), "Determination coefficient for each dependent variable:", 0, 0, 20);
printNumericTable(result->get(group_of_betas::fStatistics), "F-statistics for each dependent variable:", 0, 0, 20);
}
}