C++ API Reference for Intel® Data Analytics Acceleration Library 2018 Update 2

file_data_source.h
1 /* file: file_data_source.h */
2 /*******************************************************************************
3 * Copyright 2014-2018 Intel Corporation
4 * All Rights Reserved.
5 *
6 * If this software was obtained under the Intel Simplified Software License,
7 * the following terms apply:
8 *
9 * The source code, information and material ("Material") contained herein is
10 * owned by Intel Corporation or its suppliers or licensors, and title to such
11 * Material remains with Intel Corporation or its suppliers or licensors. The
12 * Material contains proprietary information of Intel or its suppliers and
13 * licensors. The Material is protected by worldwide copyright laws and treaty
14 * provisions. No part of the Material may be used, copied, reproduced,
15 * modified, published, uploaded, posted, transmitted, distributed or disclosed
16 * in any way without Intel's prior express written permission. No license under
17 * any patent, copyright or other intellectual property rights in the Material
18 * is granted to or conferred upon you, either expressly, by implication,
19 * inducement, estoppel or otherwise. Any license under such intellectual
20 * property rights must be express and approved by Intel in writing.
21 *
22 * Unless otherwise agreed by Intel in writing, you may not remove or alter this
23 * notice or any other notice embedded in Materials by Intel or Intel's
24 * suppliers or licensors in any way.
25 *
26 *
27 * If this software was obtained under the Apache License, Version 2.0 (the
28 * "License"), the following terms apply:
29 *
30 * You may not use this file except in compliance with the License. You may
31 * obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
32 *
33 *
34 * Unless required by applicable law or agreed to in writing, software
35 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
36 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 *
38 * See the License for the specific language governing permissions and
39 * limitations under the License.
40 *******************************************************************************/
41 
42 /*
43 //++
44 // Implementation of the file data source class.
45 //--
46 */
47 
48 #ifndef __FILE_DATA_SOURCE_H__
49 #define __FILE_DATA_SOURCE_H__
50 
51 #include <cstdio>
52 
53 #include "services/daal_memory.h"
54 #include "data_management/data_source/data_source.h"
55 #include "data_management/data_source/csv_data_source.h"
56 #include "data_management/data/data_dictionary.h"
57 #include "data_management/data/numeric_table.h"
58 #include "data_management/data/homogen_numeric_table.h"
59 
60 namespace daal
61 {
62 namespace data_management
63 {
64 
65 namespace interface1
66 {
76 template< typename _featureManager, typename _summaryStatisticsType = DAAL_SUMMARY_STATISTICS_TYPE >
77 class FileDataSource : public CsvDataSource< _featureManager, _summaryStatisticsType >
78 {
79 public:
80  using CsvDataSource<_featureManager,_summaryStatisticsType>::checkDictionary;
81  using CsvDataSource<_featureManager,_summaryStatisticsType>::checkNumericTable;
82  using CsvDataSource<_featureManager,_summaryStatisticsType>::freeNumericTable;
83  using CsvDataSource<_featureManager,_summaryStatisticsType>::_dict;
84  using CsvDataSource<_featureManager,_summaryStatisticsType>::_initialMaxRows;
85  using CsvDataSource<_featureManager,_summaryStatisticsType>::loadDataBlock;
86 
87  using CsvDataSource<_featureManager,_summaryStatisticsType>::featureManager;
88 
89  using CsvDataSource<_featureManager,_summaryStatisticsType>::createDictionaryFromContext;
90 
94  typedef _featureManager FeatureManager;
95 
96 protected:
97  typedef data_management::HomogenNumericTable<DAAL_DATA_TYPE> DefaultNumericTableType;
98 
99  using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineBuffer;
100  using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineBufferLen;
101  using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineLength;
102  using CsvDataSource<_featureManager,_summaryStatisticsType>::enlargeBuffer;
103 
104 public:
114  FileDataSource( const std::string &fileName,
115  DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable = DataSource::notAllocateNumericTable,
116  DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext = DataSource::notDictionaryFromContext,
117  size_t initialMaxRows = 10):
118  CsvDataSource<_featureManager,_summaryStatisticsType>(doAllocateNumericTable, doCreateDictionaryFromContext, initialMaxRows), _fileBuffer(NULL)
119  {
120  _fileName = fileName;
121 
122  #if (defined(_MSC_VER)&&(_MSC_VER >= 1400))
123  errno_t error;
124  error = fopen_s( &_file, fileName.c_str(), "r" );
125  if(error != 0 || !_file)
126  this->_status.add(services::throwIfPossible(services::Status(services::ErrorOnFileOpen)));
127  #else
128  _file = fopen( (char*)(fileName.c_str()), "r" );
129  if( !_file )
130  this->_status.add(services::throwIfPossible(services::Status(services::ErrorOnFileOpen)));
131  #endif
132 
133  _fileBufferLen = 1048576;
134  _fileBufferPos = _fileBufferLen;
135  _fileBuffer = (char *)daal::services::daal_malloc(_fileBufferLen);
136  }
137 
138  ~FileDataSource()
139  {
140  if (_file)
141  fclose(_file);
142  daal::services::daal_free( _fileBuffer );
143  }
144 
145 public:
146  services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
147  {
148  services::Status s = CsvDataSource<_featureManager,_summaryStatisticsType>::createDictionaryFromContext();
149  fseek(_file, 0, SEEK_SET);
150  _fileBufferPos = _fileBufferLen;
151  return s;
152  }
153 
154  DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
155  {
156  return (iseof() ? DataSourceIface::endOfData : DataSourceIface::readyForLoad);
157  }
158 
159 protected:
160  bool iseof() const DAAL_C11_OVERRIDE
161  {
162  return ((_fileBufferPos == _fileBufferLen || _fileBuffer[_fileBufferPos] == '\0') && feof(_file));
163  }
164 
165  bool readLine(char *buffer, int count, int& pos)
166  {
167  bool bRes = true;
168  pos = 0;
169  while (pos + 1 < count)
170  {
171  if (_fileBufferPos < _fileBufferLen && _fileBuffer[_fileBufferPos] != '\0')
172  {
173  buffer[pos] = _fileBuffer[_fileBufferPos];
174  pos++;
175  _fileBufferPos++;
176  if (buffer[pos - 1] == '\n')
177  break;
178  }
179  else
180  {
181  if (iseof ())
182  break;
183  _fileBufferPos = 0;
184  const int readLen = (int)fread(_fileBuffer, 1, _fileBufferLen, _file);
185  if (readLen < _fileBufferLen)
186  {
187  _fileBuffer[readLen] = '\0';
188  }
189  if (ferror(_file))
190  {
191  bRes = false;
192  break;
193  }
194  }
195  }
196  buffer[pos] = '\0';
197  return bRes;
198  }
199 
200  services::Status readLine() DAAL_C11_OVERRIDE
201  {
202  _rawLineLength = 0;
203  while(!iseof())
204  {
205  int readLen = 0;
206  if(!readLine(_rawLineBuffer + _rawLineLength, _rawLineBufferLen - _rawLineLength, readLen))
207  return services::Status(services::ErrorOnFileRead);
208 
209  if (readLen <= 0)
210  {
211  _rawLineLength = 0;
212  break;
213  }
214  _rawLineLength += readLen;
215  if (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r')
216  {
217  while (_rawLineLength > 0 && (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r'))
218  {
219  _rawLineLength--;
220  }
221  _rawLineBuffer[_rawLineLength] = '\0';
222  break;
223  }
224  if(!enlargeBuffer())
225  return services::Status(services::ErrorMemoryAllocationFailed);
226  }
227  return services::Status();
228  }
229 
230 protected:
231  std::string _fileName;
232 
233  FILE *_file;
234 
235  char *_fileBuffer;
236  int _fileBufferLen;
237  int _fileBufferPos;
238 };
240 } // namespace interface1
241 using interface1::FileDataSource;
242 
243 }
244 }
245 #endif
daal::data_management::interface1::DataSourceIface::readyForLoad
Definition: data_source.h:85
daal::services::interface1::Status
Class that holds the results of API calls. In case of API routine failure it contains the list of err...
Definition: error_handling.h:491
daal::data_management::interface1::DataSourceIface::endOfData
Definition: data_source.h:87
daal
Definition: algorithm_base_common.h:57
daal::data_management::interface1::DataSourceIface::doAllocateNumericTable
Definition: data_source.h:108
daal::data_management::interface1::DataSourceIface::NumericTableAllocationFlag
NumericTableAllocationFlag
Specifies whether a Numeric Table is allocated inside of the Data Source object.
Definition: data_source.h:105
daal::data_management::interface1::CsvDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:275
daal::services::interface1::Status::add
Status & add(ErrorID id)
daal::data_management::interface1::DataSourceIface::notDictionaryFromContext
Definition: data_source.h:97
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:172
daal::data_management::interface1::DataSourceIface::DataSourceStatus
DataSourceStatus
Specifies the status of the Data Source.
Definition: data_source.h:83
daal::services::ErrorOnFileOpen
Definition: error_indexes.h:402
daal::data_management::interface1::DataSourceIface::notAllocateNumericTable
Definition: data_source.h:107
daal::data_management::interface1::CsvDataSource
Specifies methods to access data stored in files.
Definition: csv_data_source.h:74
daal::services::daal_malloc
DAAL_EXPORT void * daal_malloc(size_t size, size_t alignment=DAAL_MALLOC_DEFAULT_ALIGNMENT)
daal::data_management::interface1::DataSourceIface::DictionaryCreationFlag
DictionaryCreationFlag
Specifies whether a Data Dictionary is created from the context of a Data Source. ...
Definition: data_source.h:95
daal::data_management::interface1::FileDataSource::FeatureManager
_featureManager FeatureManager
Definition: file_data_source.h:94
daal::services::ErrorOnFileRead
Definition: error_indexes.h:403
daal::services::daal_free
DAAL_EXPORT void daal_free(void *ptr)
daal::data_management::interface1::FileDataSource::FileDataSource
FileDataSource(const std::string &fileName, DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable=DataSource::notAllocateNumericTable, DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext=DataSource::notDictionaryFromContext, size_t initialMaxRows=10)
Definition: file_data_source.h:114
daal::data_management::interface1::HomogenNumericTable
Class that provides methods to access data stored as a contiguous array of homogeneous feature vector...
Definition: homogen_numeric_table.h:76
daal::data_management::interface1::FileDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: file_data_source.h:146
daal::data_management::interface1::FileDataSource::getStatus
DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
Definition: file_data_source.h:154
daal::data_management::interface1::FileDataSource
Specifies methods to access data stored in files.
Definition: file_data_source.h:77

For more complete information about compiler optimizations, see our Optimization Notice.