C++ API Reference for Intel® Data Analytics Acceleration Library 2019 Update 5

custom_csv_feature_modifiers.cpp

/* file: custom_csv_feature_modifiers.cpp */
/*******************************************************************************
* Copyright 2014-2019 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
! C++ example of modifiers usage with file data source
!******************************************************************************/
#include <cassert>
#include <algorithm>
#include "daal.h"
#include "service.h"
using namespace daal::data_management;
class MySquaringModifier : public modifiers::csv::FeatureModifier
{
public:
/* This method is called for every row in CSV file */
virtual void apply(modifiers::csv::Context &context)
{
const size_t numberOfTokens = context.getNumberOfTokens();
daal::services::BufferView<DAAL_DATA_TYPE> outputBuffer = context.getOutputBuffer();
/* By default number of tokens (token is one word separated by commas) is equals to the
* buffer size. This behavior can be redefined by calling 'setNumberOfOutputFeatures' on
* initialization stage of the modifier (see 'MyMaxFeatureModifier') */
assert(numberOfTokens == outputBuffer.size());
for (size_t i = 0; i < numberOfTokens; i++)
{
const float x = context.getTokenAs<float>(i);
outputBuffer[i] = x * x;
}
}
};
class MyMaxFeatureModifier : public modifiers::csv::FeatureModifier
{
public:
/* This method is called once before CSV parsing */
virtual void initialize(modifiers::csv::Config &config)
{
/* Set number of output features for the modifier. We assume modifier
* computes function y = max { x_1, ..., x_n }, where x_i is input
* features and y is output feature, so there is single output feature */
config.setNumberOfOutputFeatures(1);
}
/* This method is called for every row in CSV file */
virtual void apply(modifiers::csv::Context &context)
{
const size_t numberOfTokens = context.getNumberOfTokens();
/* Iterate throughout tokens, parse every token as float and compute max value */
float maxFeature = context.getTokenAs<float>(0);
for (size_t i = 1; i < numberOfTokens; i++)
{
maxFeature = std::max(maxFeature, context.getTokenAs<float>(i));
}
/* Write max value to the output buffer, buffer size is equal to the
* number of output features that specified in 'initialize' method */
context.getOutputBuffer()[0] = maxFeature;
}
};
int main(int argc, char *argv[])
{
/* Path to the CSV to be read */
const std::string csvFileName = "../data/batch/mixed_text_and_numbers.csv";
checkArguments(argc, argv, 1, &csvFileName);
/* Define options for CSV data source */
const CsvDataSourceOptions csvOptions = CsvDataSourceOptions::allocateNumericTable |
CsvDataSourceOptions::createDictionaryFromContext |
CsvDataSourceOptions::parseHeader;
/* Define CSV file data source */
FileDataSource<CSVFeatureManager> ds(csvFileName, csvOptions);
/* Configure format of output numeric table by applying modifiers.
* Output numeric table will have the following format:
* | Numeric1 | Numeric2 ^ 2 | Numeric5 ^ 2 | max(Numeric0, Numeric5) | */
ds.getFeatureManager()
.addModifier( features::list("Numeric1"), modifiers::csv::continuous() )
.addModifier( features::list("Numeric2", "Numeric5"), modifiers::csv::custom<MySquaringModifier>() )
.addModifier( features::list("Numeric0", "Numeric5"), modifiers::csv::custom<MyMaxFeatureModifier>() );
/* Load and parse CSV file */
ds.loadDataBlock();
printNumericTable(ds.getNumericTable(), "Loaded numeric table:");
return 0;
}

For more complete information about compiler optimizations, see our Optimization Notice.