C++ API Reference for Intel® Data Analytics Acceleration Library 2019 Update 4

simple_csv_feature_modifiers.cpp

/* file: simple_csv_feature_modifiers.cpp */
/*******************************************************************************
* Copyright 2014-2019 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
! C++ example of modifiers usage with file data source
!******************************************************************************/
#include "daal.h"
#include "service.h"
using namespace daal::data_management;
/* Path to the CSV to be read */
const std::string csvFileName = "../data/batch/mixed_text_and_numbers.csv";
/* Define options for CSV data source */
const CsvDataSourceOptions csvOptions = CsvDataSourceOptions::allocateNumericTable |
CsvDataSourceOptions::createDictionaryFromContext |
CsvDataSourceOptions::parseHeader;
/* Read CSV using default data source behavior */
void readDefault()
{
FileDataSource<CSVFeatureManager> ds(csvFileName, csvOptions);
/* By default all numeric columns will be parsed as continuous
* features and other columns as categorical */
ds.loadDataBlock();
printNumericTable(ds.getNumericTable(), "readDefault function result:");
}
/* Read CSV and do basic filtering using columns indices */
void readOnlySpecifiedColumnIndices()
{
FileDataSource<CSVFeatureManager> ds(csvFileName, csvOptions);
/* This means that columns with indices {0, 1, 5} will be included to the output numeric
* table and other columns will be ignored. The first argument of method 'include' specifies
* the set of columns and the second one specifies modifier. in this case we use predefined
* automatic modifier that automatically decides how to parse column in the best way */
ds.getFeatureManager()
.addModifier( features::list(0, 1, 5), modifiers::csv::automatic() );
ds.loadDataBlock();
printNumericTable(ds.getNumericTable(), "readOnlySpecifiedColumnIndices function result:");
}
/* Read CSV and do basic filtering using columns names */
void readOnlySpecifiedColumnNames()
{
FileDataSource<CSVFeatureManager> ds(csvFileName, csvOptions);
/* The same as readOnlySpecifiedColumnIndices but uses column names instead of indices */
ds.getFeatureManager()
.addModifier( features::list("Numeric1", "Categorical0"), modifiers::csv::automatic() );
ds.loadDataBlock();
printNumericTable(ds.getNumericTable(), "readOnlySpecifiedColumnNames function result:");
}
/* Read CSV using multiple modifiers */
void readUsingMultipleModifiers()
{
FileDataSource<CSVFeatureManager> ds(csvFileName, csvOptions);
ds.getFeatureManager()
.addModifier( features::list("Numeric1"), modifiers::csv::continuous() )
.addModifier( features::list("Text1", "Categorical1"), modifiers::csv::categorical() );
ds.loadDataBlock();
printNumericTable(ds.getNumericTable(), "readUsingMultipleModifiers function result:");
}
int main(int argc, char *argv[])
{
checkArguments(argc, argv, 1, &csvFileName);
/* Read CSV using default data source behavior */
readDefault();
/* Read CSV and do basic filtering using columns indices */
readOnlySpecifiedColumnIndices();
/* Read CSV and do basic filtering using columns names */
readOnlySpecifiedColumnNames();
/* Read CSV using multiple modifiers */
readUsingMultipleModifiers();
return 0;
}

For more complete information about compiler optimizations, see our Optimization Notice.