C++ API Reference for Intel® Data Analytics Acceleration Library 2018 Update 1

string_data_source.h
1 /* file: string_data_source.h */
2 /*******************************************************************************
3 * Copyright 2014-2017 Intel Corporation
4 * All Rights Reserved.
5 *
6 * If this software was obtained under the Intel Simplified Software License,
7 * the following terms apply:
8 *
9 * The source code, information and material ("Material") contained herein is
10 * owned by Intel Corporation or its suppliers or licensors, and title to such
11 * Material remains with Intel Corporation or its suppliers or licensors. The
12 * Material contains proprietary information of Intel or its suppliers and
13 * licensors. The Material is protected by worldwide copyright laws and treaty
14 * provisions. No part of the Material may be used, copied, reproduced,
15 * modified, published, uploaded, posted, transmitted, distributed or disclosed
16 * in any way without Intel's prior express written permission. No license under
17 * any patent, copyright or other intellectual property rights in the Material
18 * is granted to or conferred upon you, either expressly, by implication,
19 * inducement, estoppel or otherwise. Any license under such intellectual
20 * property rights must be express and approved by Intel in writing.
21 *
22 * Unless otherwise agreed by Intel in writing, you may not remove or alter this
23 * notice or any other notice embedded in Materials by Intel or Intel's
24 * suppliers or licensors in any way.
25 *
26 *
27 * If this software was obtained under the Apache License, Version 2.0 (the
28 * "License"), the following terms apply:
29 *
30 * You may not use this file except in compliance with the License. You may
31 * obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
32 *
33 *
34 * Unless required by applicable law or agreed to in writing, software
35 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
36 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 *
38 * See the License for the specific language governing permissions and
39 * limitations under the License.
40 *******************************************************************************/
41 
42 /*
43 //++
44 // Implementation of the string data source class.
45 //--
46 */
47 
48 #ifndef __STRING_DATA_SOURCE_H__
49 #define __STRING_DATA_SOURCE_H__
50 
51 #include "services/daal_memory.h"
52 #include "data_management/data_source/data_source.h"
53 #include "data_management/data_source/csv_data_source.h"
54 #include "data_management/data/data_dictionary.h"
55 #include "data_management/data/numeric_table.h"
56 #include "data_management/data/homogen_numeric_table.h"
57 
58 namespace daal
59 {
60 namespace data_management
61 {
62 
63 namespace interface1
64 {
74 template< typename _featureManager, typename _summaryStatisticsType = DAAL_SUMMARY_STATISTICS_TYPE >
75 class StringDataSource : public CsvDataSource< _featureManager, _summaryStatisticsType >
76 {
77 public:
78  using CsvDataSource<_featureManager,_summaryStatisticsType>::checkDictionary;
79  using CsvDataSource<_featureManager,_summaryStatisticsType>::checkNumericTable;
80  using CsvDataSource<_featureManager,_summaryStatisticsType>::freeNumericTable;
81  using CsvDataSource<_featureManager,_summaryStatisticsType>::_dict;
82  using CsvDataSource<_featureManager,_summaryStatisticsType>::_initialMaxRows;
83  using CsvDataSource<_featureManager,_summaryStatisticsType>::loadDataBlock;
84 
85  using CsvDataSource<_featureManager,_summaryStatisticsType>::featureManager;
86 
87  using CsvDataSource<_featureManager,_summaryStatisticsType>::createDictionaryFromContext;
88 
92  typedef _featureManager FeatureManager;
93 
94 protected:
95  typedef data_management::HomogenNumericTable<DAAL_DATA_TYPE> DefaultNumericTableType;
96 
97  using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineBuffer;
98  using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineBufferLen;
99  using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineLength;
100  using CsvDataSource<_featureManager,_summaryStatisticsType>::enlargeBuffer;
101 
102 public:
112  StringDataSource( const byte *data,
113  DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable = DataSource::notAllocateNumericTable,
114  DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext = DataSource::notDictionaryFromContext,
115  size_t initialMaxRows = 10):
116  CsvDataSource<_featureManager,_summaryStatisticsType>(doAllocateNumericTable, doCreateDictionaryFromContext, initialMaxRows), _contextDictFlag(false)
117  {
118  setData( data );
119  }
120 
121  ~StringDataSource() {}
122 
127  void setData( const byte *data )
128  {
129  if( !data )
130  {
131  this->_status.add(services::throwIfPossible(services::Status(services::ErrorNullPtr)));
132  return;
133  }
134  _stringBufferPos = 0;
135  _stringBuffer = (char *)data;
136  }
137 
142  const byte *getData()
143  {
144  return (const byte *)(_stringBuffer);
145  }
146 
150  void resetData()
151  {
152  _stringBufferPos = 0;
153  }
154 
155 public:
156  services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
157  {
158  services::Status s = CsvDataSource<_featureManager,_summaryStatisticsType>::createDictionaryFromContext();
159  _stringBufferPos = 0;
160  return s;
161  }
162 
163  DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
164  {
165  return (iseof() ? DataSourceIface::endOfData : DataSourceIface::readyForLoad);
166  }
167 
168 protected:
169  bool iseof() const DAAL_C11_OVERRIDE
170  {
171  return (_stringBuffer[_stringBufferPos] == '\0');
172  }
173 
174  int readLine(char *buffer, int count)
175  {
176  int pos = 0;
177  for(;pos<count-1;pos++)
178  {
179  buffer[pos] = _stringBuffer[_stringBufferPos+pos];
180 
181  if( buffer[pos]=='\0' || buffer[pos]=='\n' )
182  {
183  break;
184  }
185  }
186  if(buffer[pos]=='\n')
187  {
188  pos++;
189  }
190  _stringBufferPos += pos;
191  buffer[pos] = '\0';
192  return pos;
193  }
194 
195  services::Status readLine() DAAL_C11_OVERRIDE
196  {
197  _rawLineLength = 0;
198  while(!iseof())
199  {
200  const int readLen = readLine (_rawLineBuffer + _rawLineLength, (int)(_rawLineBufferLen - _rawLineLength));
201  if (readLen <= 0)
202  {
203  _rawLineLength = 0;
204  return services::Status();
205  }
206  _rawLineLength += readLen;
207  if (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r')
208  {
209  while (_rawLineLength > 0 && (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r'))
210  {
211  _rawLineLength--;
212  }
213  _rawLineBuffer[_rawLineLength] = '\0';
214  return services::Status();
215  }
216  if(!enlargeBuffer())
217  return services::Status(services::ErrorMemoryAllocationFailed);
218  }
219  return services::Status();
220  }
221 
222 private:
223  char *_stringBuffer;
224  size_t _stringBufferPos;
225 
226  bool _contextDictFlag;
227 };
229 } // namespace interface1
230 using interface1::StringDataSource;
231 
232 }
233 }
234 #endif
daal::data_management::interface1::StringDataSource::resetData
void resetData()
Definition: string_data_source.h:150
daal::data_management::interface1::StringDataSource::FeatureManager
_featureManager FeatureManager
Definition: string_data_source.h:92
daal
Definition: algorithm_base_common.h:57
daal::data_management::interface1::StringDataSource::StringDataSource
StringDataSource(const byte *data, DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable=DataSource::notAllocateNumericTable, DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext=DataSource::notDictionaryFromContext, size_t initialMaxRows=10)
Definition: string_data_source.h:112
daal::data_management::interface1::StringDataSource
Specifies methods to access data stored in byte arrays in the C-string format.
Definition: string_data_source.h:75
daal::data_management::interface1::CsvDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:275
daal::data_management::interface1::DataSourceIface::DictionaryCreationFlag
DictionaryCreationFlag
Specifies whether a Data Dictionary is created from the context of a Data Source. ...
Definition: data_source.h:95
daal::data_management::interface1::StringDataSource::setData
void setData(const byte *data)
Definition: string_data_source.h:127
daal::data_management::interface1::StringDataSource::getStatus
DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
Definition: string_data_source.h:163
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:170
daal::data_management::interface1::DataSourceIface::notAllocateNumericTable
Definition: data_source.h:107
daal::services::ErrorNullPtr
Definition: error_indexes.h:165
daal::data_management::interface1::DataSourceIface::doAllocateNumericTable
Definition: data_source.h:108
daal::data_management::interface1::StringDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: string_data_source.h:156
daal::data_management::interface1::CsvDataSource
Specifies methods to access data stored in files.
Definition: csv_data_source.h:74
daal::algorithms::association_rules::data
Definition: apriori_types.h:107
daal::data_management::interface1::DataSourceIface::DataSourceStatus
DataSourceStatus
Specifies the status of the Data Source.
Definition: data_source.h:83
daal::data_management::interface1::DataSourceIface::readyForLoad
Definition: data_source.h:85
daal::data_management::interface1::StringDataSource::getData
const byte * getData()
Definition: string_data_source.h:142
daal::data_management::interface1::DataSourceIface::notDictionaryFromContext
Definition: data_source.h:97
daal::data_management::interface1::DataSourceIface::endOfData
Definition: data_source.h:87
daal::data_management::interface1::DataSourceIface::NumericTableAllocationFlag
NumericTableAllocationFlag
Specifies whether a Numeric Table is allocated inside of the Data Source object.
Definition: data_source.h:105

For more complete information about compiler optimizations, see our Optimization Notice.