C++ API Reference for Intel® Data Analytics Acceleration Library 2019 Update 5

csv/internal/default_modifiers.h
1 /* file: default_modifiers.h */
2 /*******************************************************************************
3 * Copyright 2014-2019 Intel Corporation.
4 *
5 * This software and the related documents are Intel copyrighted materials, and
6 * your use of them is governed by the express license under which they were
7 * provided to you (License). Unless the License provides otherwise, you may not
8 * use, modify, copy, publish, distribute, disclose or transmit this software or
9 * the related documents without Intel's prior written permission.
10 *
11 * This software and the related documents are provided as is, with no express
12 * or implied warranties, other than those that are expressly stated in the
13 * License.
14 *******************************************************************************/
15 
16 #ifndef __DATA_SOURCE_MODIFIERS_CSV_DEFAULT_MODIFIERS_H__
17 #define __DATA_SOURCE_MODIFIERS_CSV_DEFAULT_MODIFIERS_H__
18 
19 #include "services/daal_shared_ptr.h"
20 #include "services/internal/collection.h"
21 
22 #include "data_management/features/defines.h"
23 #include "data_management/data_source/modifiers/csv/modifier.h"
24 
25 namespace daal
26 {
27 namespace data_management
28 {
29 namespace modifiers
30 {
31 namespace csv
32 {
33 namespace internal
34 {
35 
40 class FeatureModifierPrimitive : public Base
41 {
42 public:
43  virtual void initialize(Config &context, size_t index) { }
44  virtual DAAL_DATA_TYPE apply(Context &context, size_t index) = 0;
45  virtual void finalize(Config &context, size_t index) { }
46 };
47 
52 class DefaultFeatureModifierPrimitive : public FeatureModifierPrimitive
53 {
54 public:
55  virtual DAAL_DATA_TYPE apply(Context &context, size_t index) DAAL_C11_OVERRIDE
56  {
57  return (DAAL_DATA_TYPE)0;
58  }
59 };
60 
65 class ContinuousFeatureModifierPrimitive : public FeatureModifierPrimitive
66 {
67 public:
68  virtual void initialize(Config &config, size_t index) DAAL_C11_OVERRIDE
69  {
70  config.setOutputFeatureType(index, features::DAAL_CONTINUOUS);
71  }
72 
73  virtual DAAL_DATA_TYPE apply(Context &context, size_t index) DAAL_C11_OVERRIDE
74  {
75  return context.getTokenAs<DAAL_DATA_TYPE>(index);
76  }
77 };
78 
83 class CategoricalFeatureModifierPrimitive : public FeatureModifierPrimitive
84 {
85 public:
86  CategoricalFeatureModifierPrimitive() :
87  _catDict(new CategoricalFeatureDictionary()) { }
88 
89  virtual void initialize(Config &config, size_t index) DAAL_C11_OVERRIDE
90  {
91  config.setOutputFeatureType(index, features::DAAL_CATEGORICAL);
92  }
93 
94  virtual DAAL_DATA_TYPE apply(Context &context, size_t index) DAAL_C11_OVERRIDE
95  {
96  const services::StringView token = context.getToken(index);
97  const std::string sToken(token.begin(), token.end());
98  const CategoricalFeatureDictionary::iterator it = _catDict->find(sToken);
99 
100  if (it != _catDict->end())
101  {
102  it->second.second++;
103  return (DAAL_DATA_TYPE)it->second.first;
104  }
105  else
106  {
107  const int itemIndex = (int)(_catDict->size());
108  const std::pair<int, int> indexPair(itemIndex, 1);
109  (*_catDict)[sToken] = indexPair;
110  return (DAAL_DATA_TYPE)itemIndex;
111  }
112  }
113 
114  virtual void finalize(Config &config, size_t index) DAAL_C11_OVERRIDE
115  {
116  const size_t numberOfCategories = _catDict->size();
117  config.setNumberOfCategories(index, numberOfCategories);
118  config.setCategoricalDictionary(index, _catDict);
119  }
120 
121 private:
122  CategoricalFeatureDictionaryPtr _catDict;
123 };
124 
129 class ContinuousFeatureModifier : public FeatureModifier
130 {
131 public:
132  virtual void initialize(Config &config) DAAL_C11_OVERRIDE
133  {
134  FeatureModifier::initialize(config);
135 
136  const size_t numberOfFeatures = config.getNumberOfInputFeatures();
137  for (size_t i = 0; i < numberOfFeatures; i++)
138  {
139  config.setOutputFeatureType(i, features::DAAL_CONTINUOUS);
140  }
141  }
142 
143  virtual void apply(Context &context) DAAL_C11_OVERRIDE
144  {
145  services::BufferView<DAAL_DATA_TYPE> outputBuffer = context.getOutputBuffer();
146  for (size_t i = 0; i < outputBuffer.size(); i++)
147  {
148  outputBuffer[i] = context.getTokenAs<DAAL_DATA_TYPE>(i);
149  }
150  }
151 };
152 
157 class CategoricalFeatureModifier : public FeatureModifier
158 {
159 public:
160  virtual void initialize(Config &config) DAAL_C11_OVERRIDE
161  {
162  FeatureModifier::initialize(config);
163 
164  const size_t numberOfInputFeatures = config.getNumberOfInputFeatures();
165  _primitives = services::Collection<CategoricalFeatureModifierPrimitive>(numberOfInputFeatures);
166  if ( !_primitives.data() )
167  {
168  services::throwIfPossible(services::ErrorMemoryAllocationFailed);
169  }
170 
171  for (size_t i = 0; i < numberOfInputFeatures; i++)
172  {
173  _primitives[i].initialize(config, i);
174  }
175  }
176 
177  virtual void apply(Context &context) DAAL_C11_OVERRIDE
178  {
179  services::BufferView<DAAL_DATA_TYPE> outputBuffer = context.getOutputBuffer();
180  for (size_t i = 0; i < outputBuffer.size(); i++)
181  {
182  outputBuffer[i] = _primitives[i].apply(context, i);
183  }
184  }
185 
186  virtual void finalize(Config &config) DAAL_C11_OVERRIDE
187  {
188  FeatureModifier::finalize(config);
189 
190  const size_t numberOfOutputFeatures = config.getNumberOfInputFeatures();
191  for (size_t i = 0; i < numberOfOutputFeatures; i++)
192  {
193  _primitives[i].finalize(config, i);
194  }
195  }
196 
197 private:
198  services::Collection<CategoricalFeatureModifierPrimitive> _primitives;
199 };
200 
205 class AutomaticFeatureModifier : public FeatureModifier
206 {
207 public:
208  virtual void initialize(Config &config) DAAL_C11_OVERRIDE
209  {
210  FeatureModifier::initialize(config);
211 
212  const size_t numberOfInputFeatures = config.getNumberOfInputFeatures();
213  for (size_t i = 0; i < numberOfInputFeatures; i++)
214  {
215  FeatureModifierPrimitive *primitive =
216  createPrimitive(config.getInputFeatureDetectedType(i));
217 
218  if ( !_primitives.push_back(primitive) )
219  {
220  services::throwIfPossible(services::ErrorMemoryAllocationFailed);
221  }
222 
223  primitive->initialize(config, i);
224  }
225  }
226 
227  virtual void apply(Context &context) DAAL_C11_OVERRIDE
228  {
229  services::BufferView<DAAL_DATA_TYPE> outputBuffer = context.getOutputBuffer();
230  for (size_t i = 0; i < outputBuffer.size(); i++)
231  {
232  outputBuffer[i] = _primitives[i].apply(context, i);
233  }
234  }
235 
236  virtual void finalize(Config &config) DAAL_C11_OVERRIDE
237  {
238  FeatureModifier::finalize(config);
239 
240  const size_t numberOfOutputFeatures = config.getNumberOfInputFeatures();
241  for (size_t i = 0; i < numberOfOutputFeatures; i++)
242  {
243  _primitives[i].finalize(config, i);
244  }
245  }
246 
247 private:
248  FeatureModifierPrimitive *createPrimitive(features::FeatureType featureType)
249  {
250  switch (featureType)
251  {
252  case features::DAAL_CONTINUOUS:
253  return new ContinuousFeatureModifierPrimitive();
254 
255  case features::DAAL_ORDINAL:
256  case features::DAAL_CATEGORICAL:
257  return new CategoricalFeatureModifierPrimitive();
258  }
259  return new DefaultFeatureModifierPrimitive();
260  }
261 
262 private:
263  services::internal::ObjectPtrCollection<FeatureModifierPrimitive> _primitives;
264 };
265 typedef services::SharedPtr<AutomaticFeatureModifier> AutomaticFeatureModifierPtr;
266 
267 } // namespace internal
268 } // namespace csv
269 } // namespace modifiers
270 } // namespace data_management
271 } // namespace daal
272 
273 #endif
daal::data_management::modifiers::csv::internal::AutomaticFeatureModifier
Feature modifier that determines suitable feature type and parses tokens according to determined type...
Definition: csv/internal/default_modifiers.h:205
daal
Definition: algorithm_base_common.h:31
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:147
daal::data_management::modifiers::csv::internal::FeatureModifierPrimitive
Primitive modifier that applicable to a single column.
Definition: csv/internal/default_modifiers.h:40
daal::data_management::modifiers::csv::internal::CategoricalFeatureModifier
Feature modifier that parses tokens as categorical features.
Definition: csv/internal/default_modifiers.h:157
daal::data_management::modifiers::csv::internal::DefaultFeatureModifierPrimitive
Default implementation of primitive feature modifier.
Definition: csv/internal/default_modifiers.h:52
daal::data_management::modifiers::csv::internal::ContinuousFeatureModifier
Feature modifier that parses tokens as continuous features.
Definition: csv/internal/default_modifiers.h:129
daal::services::internal::ObjectPtrCollection
Class that implements functionality of collection container and holds pointers to objects of specifie...
Definition: internal/collection.h:37
daal::data_management::modifiers::csv::internal::CategoricalFeatureModifierPrimitive
Primitive feature modifier that parses tokens as categorical features.
Definition: csv/internal/default_modifiers.h:83
daal::Base
Base class for Intel(R) Data Analytics Acceleration Library objects
Definition: base.h:39
daal::data_management::modifiers::csv::internal::ContinuousFeatureModifierPrimitive
Primitive feature modifier that parses tokens as continuous features.
Definition: csv/internal/default_modifiers.h:65

For more complete information about compiler optimizations, see our Optimization Notice.