C++ API Reference for Intel® Data Analytics Acceleration Library 2019

data_source_dictionary.h
1 /* file: data_source_dictionary.h */
2 /*******************************************************************************
3 * Copyright 2014-2018 Intel Corporation.
4 *
5 * This software and the related documents are Intel copyrighted materials, and
6 * your use of them is governed by the express license under which they were
7 * provided to you (License). Unless the License provides otherwise, you may not
8 * use, modify, copy, publish, distribute, disclose or transmit this software or
9 * the related documents without Intel's prior written permission.
10 *
11 * This software and the related documents are provided as is, with no express
12 * or implied warranties, other than those that are expressly stated in the
13 * License.
14 *******************************************************************************/
15 
16 /*
17 //++
18 // Implementation of a data source dictionary.
19 //--
20 */
21 
22 #ifndef __DATA_SOURCE_DICTIONARY_H__
23 #define __DATA_SOURCE_DICTIONARY_H__
24 
25 #include <map>
26 #include <string>
27 
28 #include "services/internal/buffer.h"
29 #include "data_management/features/defines.h"
30 #include "data_management/data/data_dictionary.h"
31 
32 namespace daal
33 {
34 namespace data_management
35 {
36 namespace interface1
37 {
38 
44 class CategoricalFeatureDictionary : public std::map<std::string, std::pair<int, int> > { };
45 typedef services::SharedPtr<CategoricalFeatureDictionary> CategoricalFeatureDictionaryPtr;
46 
51 class DataSourceFeature : public SerializationIface
52 {
53 public:
54  NumericTableFeature ntFeature;
55  size_t name_length;
56  char *name;
57 
58  CategoricalFeatureDictionary *cat_dict;
59 
60 public:
64  DataSourceFeature() :
65  name(NULL),
66  name_length(0),
67  cat_dict(NULL) { }
68 
72  DataSourceFeature(const DataSourceFeature &other)
73  {
74  assign(other);
75  }
76 
80  DataSourceFeature &operator= (const DataSourceFeature &other)
81  {
82  return assign(other);
83  }
84 
86  virtual ~DataSourceFeature()
87  {
88  if (_catDictPtr.get() != cat_dict)
89  {
90  delete cat_dict;
91  }
92  }
93 
97  services::String getFeatureName() const
98  {
99  return services::String(name);
100  }
101 
106  CategoricalFeatureDictionary *getCategoricalDictionary()
107  {
108  if (!cat_dict)
109  {
110  cat_dict = new CategoricalFeatureDictionary();
111  _catDictPtr = CategoricalFeatureDictionaryPtr(cat_dict);
112  }
113 
114  return cat_dict;
115  }
116 
117  void setCategoricalDictionary(const CategoricalFeatureDictionaryPtr &dictionary)
118  {
119  if (_catDictPtr.get() != cat_dict)
120  { delete cat_dict; }
121 
122  _catDictPtr = dictionary;
123  cat_dict = dictionary.get();
124  }
125 
130  void setFeatureName(const services::String &featureName)
131  {
132  _name = featureName;
133  synchRawAndStringNames();
134  }
135 
140  template<typename T>
141  void setType()
142  {
143  ntFeature.setType<T>();
144  }
145 
147  services::Status serializeImpl(InputDataArchive *arch) DAAL_C11_OVERRIDE
148  {
149  return serialImpl<InputDataArchive, false>(arch);
150  }
151 
153  services::Status deserializeImpl(const OutputDataArchive *arch) DAAL_C11_OVERRIDE
154  {
155  return serialImpl<const OutputDataArchive, true>(arch);
156  }
157 
159  template<typename Archive, bool onDeserialize>
160  services::Status serialImpl( Archive *arch )
161  {
162  services::Status status;
163 
164  arch->setObj(&ntFeature);
165  arch->set(name_length);
166 
167  if (onDeserialize)
168  {
169  if (name_length > 0)
170  {
171  _name = services::String(name_length);
172  synchRawAndStringNames();
173  }
174  }
175 
176  arch->set(name, name_length);
177 
178  const int categoricalFeatureDictionaryFlag = (cat_dict != 0);
179  arch->set(categoricalFeatureDictionaryFlag);
180 
181  if (categoricalFeatureDictionaryFlag)
182  {
183  if (onDeserialize)
184  {
185  /* Make sure that dictionary is allocated */
186  getCategoricalDictionary();
187  /* Make sure that dictionary is empty */
188  cat_dict->empty();
189  }
190 
191  size_t size = cat_dict->size();
192  arch->set(size);
193 
194  if (onDeserialize)
195  {
196  const size_t initialBuffSize = 10;
197  services::internal::Buffer<char> buff(initialBuffSize, &status);
198  DAAL_CHECK_STATUS_VAR(status);
199 
200  for (size_t i = 0; i < size; i++)
201  {
202  size_t catNameLen = 0;
203  int catV1 = 0;
204  int catV2 = 0;
205 
206  arch->set(catNameLen);
207  if (catNameLen > buff.size())
208  {
209  DAAL_CHECK_STATUS( status, buff.reallocate(catNameLen) );
210  }
211  arch->set(buff.data(), catNameLen);
212  arch->set(catV1);
213  arch->set(catV2);
214 
215  (*cat_dict)[ std::string(buff.data(), catNameLen) ] = std::pair<int,int>(catV1, catV2);
216  }
217  }
218  else
219  {
220  typedef CategoricalFeatureDictionary::iterator it_type;
221 
222  for (it_type it=cat_dict->begin(); it != cat_dict->end(); it++)
223  {
224  const std::string & catName = it->first;
225  size_t catNameLen = catName.size();
226  int catV1 = it->second.first;
227  int catV2 = it->second.second;
228 
229  arch->set(catNameLen);
230  arch->set(catName.c_str(), catNameLen);
231  arch->set(catV1);
232  arch->set(catV2);
233  }
234  }
235  }
236  else
237  {
238  cat_dict = NULL;
239  _catDictPtr = CategoricalFeatureDictionaryPtr();
240  }
241 
242  return status;
243  }
244 
245  virtual int getSerializationTag() const DAAL_C11_OVERRIDE
246  {
247  return SERIALIZATION_DATAFEATURE_NT_ID;
248  }
249 
250  features::IndexNumType getIndexType() const
251  {
252  return ntFeature.indexType;
253  }
254 
255 private:
256  DataSourceFeature &assign(const DataSourceFeature& other)
257  {
258  _name = other._name;
259  _catDictPtr = other._catDictPtr;
260  ntFeature = other.ntFeature;
261  cat_dict = other.cat_dict;
262 
263  if (other.name == other._name.c_str())
264  {
265  synchRawAndStringNames();
266  }
267  else
268  {
269  name = other.name;
270  name_length = other.name_length;
271  }
272 
273  return *this;
274  }
275 
276  void synchRawAndStringNames()
277  {
278  name_length = _name.length();
279  name = const_cast<char *>(_name.c_str());
280  }
281 
282 private:
283  services::String _name;
284  CategoricalFeatureDictionaryPtr _catDictPtr;
285 };
286 
287 typedef Dictionary<DataSourceFeature, SERIALIZATION_DATADICTIONARY_DS_ID> DataSourceDictionary;
288 typedef services::SharedPtr<DataSourceDictionary> DataSourceDictionaryPtr;
291 } // namespace interface1
292 
293 using interface1::CategoricalFeatureDictionary;
294 using interface1::CategoricalFeatureDictionaryPtr;
295 using interface1::DataSourceFeature;
296 using interface1::DataSourceDictionary;
297 using interface1::DataSourceDictionaryPtr;
298 
299 } // namespace data_management
300 } // namespace daal
301 
302 #endif
daal
Definition: algorithm_base_common.h:31
daal::data_management::interface1::DataSourceFeature::setFeatureName
void setFeatureName(const services::String &featureName)
Definition: data_source_dictionary.h:130
daal::data_management::interface1::InputDataArchive
Provides methods to create an archive data object (serialized) and access this object.
Definition: data_archive.h:689
daal::data_management::interface1::DataSourceFeature::setType
void setType()
Definition: data_source_dictionary.h:141
daal::data_management::interface1::DataSourceFeature::getFeatureName
services::String getFeatureName() const
Definition: data_source_dictionary.h:97
daal::data_management::interface1::DataSourceFeature
Data structure that describes the Data Source feature.
Definition: data_source_dictionary.h:51
daal::data_management::interface1::DataSourceFeature::operator=
DataSourceFeature & operator=(const DataSourceFeature &other)
Definition: data_source_dictionary.h:80
daal::data_management::interface1::CategoricalFeatureDictionary
Definition: data_source_dictionary.h:44
daal::data_management::interface1::DataSourceFeature::getSerializationTag
virtual int getSerializationTag() const DAAL_C11_OVERRIDE
Definition: data_source_dictionary.h:245
daal::data_management::interface1::DataSourceFeature::getCategoricalDictionary
CategoricalFeatureDictionary * getCategoricalDictionary()
Definition: data_source_dictionary.h:106
daal::data_management::interface1::DataSourceFeature::DataSourceFeature
DataSourceFeature(const DataSourceFeature &other)
Definition: data_source_dictionary.h:72
daal::data_management::interface1::NumericTableFeature::setType
void setType()
Definition: data_dictionary.h:94
daal::data_management::interface1::SerializationIface
Abstract interface class that defines the interface for serialization and deserialization.
Definition: data_serialize.h:50
daal::data_management::interface1::NumericTableFeature
Data structure describes the Numeric Table feature.
Definition: data_dictionary.h:51
daal::data_management::interface1::DataSourceFeature::DataSourceFeature
DataSourceFeature()
Definition: data_source_dictionary.h:64

For more complete information about compiler optimizations, see our Optimization Notice.