C++ API Reference for Intel® Data Analytics Acceleration Library 2018 Update 3

file_data_source.h
1 /* file: file_data_source.h */
2 /*******************************************************************************
3 * Copyright 2014-2018 Intel Corporation.
4 *
5 * This software and the related documents are Intel copyrighted materials, and
6 * your use of them is governed by the express license under which they were
7 * provided to you (License). Unless the License provides otherwise, you may not
8 * use, modify, copy, publish, distribute, disclose or transmit this software or
9 * the related documents without Intel's prior written permission.
10 *
11 * This software and the related documents are provided as is, with no express
12 * or implied warranties, other than those that are expressly stated in the
13 * License.
14 *******************************************************************************/
15 
16 /*
17 //++
18 // Implementation of the file data source class.
19 //--
20 */
21 
22 #ifndef __FILE_DATA_SOURCE_H__
23 #define __FILE_DATA_SOURCE_H__
24 
25 #include <cstdio>
26 
27 #include "services/daal_memory.h"
28 #include "data_management/data_source/data_source.h"
29 #include "data_management/data_source/csv_data_source.h"
30 #include "data_management/data/data_dictionary.h"
31 #include "data_management/data/numeric_table.h"
32 #include "data_management/data/homogen_numeric_table.h"
33 
34 namespace daal
35 {
36 namespace data_management
37 {
38 
39 namespace interface1
40 {
50 template< typename _featureManager, typename _summaryStatisticsType = DAAL_SUMMARY_STATISTICS_TYPE >
51 class FileDataSource : public CsvDataSource< _featureManager, _summaryStatisticsType >
52 {
53 public:
54  using CsvDataSource<_featureManager,_summaryStatisticsType>::checkDictionary;
55  using CsvDataSource<_featureManager,_summaryStatisticsType>::checkNumericTable;
56  using CsvDataSource<_featureManager,_summaryStatisticsType>::freeNumericTable;
57  using CsvDataSource<_featureManager,_summaryStatisticsType>::_dict;
58  using CsvDataSource<_featureManager,_summaryStatisticsType>::_initialMaxRows;
59  using CsvDataSource<_featureManager,_summaryStatisticsType>::loadDataBlock;
60 
61  using CsvDataSource<_featureManager,_summaryStatisticsType>::featureManager;
62 
63  using CsvDataSource<_featureManager,_summaryStatisticsType>::createDictionaryFromContext;
64 
68  typedef _featureManager FeatureManager;
69 
70 protected:
71  typedef data_management::HomogenNumericTable<DAAL_DATA_TYPE> DefaultNumericTableType;
72 
73  using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineBuffer;
74  using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineBufferLen;
75  using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineLength;
76  using CsvDataSource<_featureManager,_summaryStatisticsType>::enlargeBuffer;
77 
78 public:
88  FileDataSource( const std::string &fileName,
89  DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable = DataSource::notAllocateNumericTable,
90  DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext = DataSource::notDictionaryFromContext,
91  size_t initialMaxRows = 10):
92  CsvDataSource<_featureManager,_summaryStatisticsType>(doAllocateNumericTable, doCreateDictionaryFromContext, initialMaxRows), _fileBuffer(NULL)
93  {
94  _fileName = fileName;
95 
96  #if (defined(_MSC_VER)&&(_MSC_VER >= 1400))
97  errno_t error;
98  error = fopen_s( &_file, fileName.c_str(), "r" );
99  if(error != 0 || !_file)
100  this->_status.add(services::throwIfPossible(services::Status(services::ErrorOnFileOpen)));
101  #else
102  _file = fopen( (char*)(fileName.c_str()), "r" );
103  if( !_file )
104  this->_status.add(services::throwIfPossible(services::Status(services::ErrorOnFileOpen)));
105  #endif
106 
107  _fileBufferLen = 1048576;
108  _fileBufferPos = _fileBufferLen;
109  _fileBuffer = (char *)daal::services::daal_malloc(_fileBufferLen);
110  }
111 
112  ~FileDataSource()
113  {
114  if (_file)
115  fclose(_file);
116  daal::services::daal_free( _fileBuffer );
117  }
118 
119 public:
120  services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
121  {
122  services::Status s = CsvDataSource<_featureManager,_summaryStatisticsType>::createDictionaryFromContext();
123  fseek(_file, 0, SEEK_SET);
124  _fileBufferPos = _fileBufferLen;
125  return s;
126  }
127 
128  DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
129  {
130  return (iseof() ? DataSourceIface::endOfData : DataSourceIface::readyForLoad);
131  }
132 
133 protected:
134  bool iseof() const DAAL_C11_OVERRIDE
135  {
136  return ((_fileBufferPos == _fileBufferLen || _fileBuffer[_fileBufferPos] == '\0') && feof(_file));
137  }
138 
139  bool readLine(char *buffer, int count, int& pos)
140  {
141  bool bRes = true;
142  pos = 0;
143  while (pos + 1 < count)
144  {
145  if (_fileBufferPos < _fileBufferLen && _fileBuffer[_fileBufferPos] != '\0')
146  {
147  buffer[pos] = _fileBuffer[_fileBufferPos];
148  pos++;
149  _fileBufferPos++;
150  if (buffer[pos - 1] == '\n')
151  break;
152  }
153  else
154  {
155  if (iseof ())
156  break;
157  _fileBufferPos = 0;
158  const int readLen = (int)fread(_fileBuffer, 1, _fileBufferLen, _file);
159  if (readLen < _fileBufferLen)
160  {
161  _fileBuffer[readLen] = '\0';
162  }
163  if (ferror(_file))
164  {
165  bRes = false;
166  break;
167  }
168  }
169  }
170  buffer[pos] = '\0';
171  return bRes;
172  }
173 
174  services::Status readLine() DAAL_C11_OVERRIDE
175  {
176  _rawLineLength = 0;
177  while(!iseof())
178  {
179  int readLen = 0;
180  if(!readLine(_rawLineBuffer + _rawLineLength, _rawLineBufferLen - _rawLineLength, readLen))
181  return services::Status(services::ErrorOnFileRead);
182 
183  if (readLen <= 0)
184  {
185  _rawLineLength = 0;
186  break;
187  }
188  _rawLineLength += readLen;
189  if (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r')
190  {
191  while (_rawLineLength > 0 && (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r'))
192  {
193  _rawLineLength--;
194  }
195  _rawLineBuffer[_rawLineLength] = '\0';
196  break;
197  }
198  if(!enlargeBuffer())
199  return services::Status(services::ErrorMemoryAllocationFailed);
200  }
201  return services::Status();
202  }
203 
204 protected:
205  std::string _fileName;
206 
207  FILE *_file;
208 
209  char *_fileBuffer;
210  int _fileBufferLen;
211  int _fileBufferPos;
212 };
214 } // namespace interface1
215 using interface1::FileDataSource;
216 
217 }
218 }
219 #endif
daal
Definition: algorithm_base_common.h:31
daal::data_management::interface1::CsvDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:249
daal::data_management::interface1::DataSourceIface::DictionaryCreationFlag
DictionaryCreationFlag
Specifies whether a Data Dictionary is created from the context of a Data Source. ...
Definition: data_source.h:69
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:146
daal::data_management::interface1::DataSourceIface::notAllocateNumericTable
Definition: data_source.h:81
daal::services::ErrorOnFileOpen
Definition: error_indexes.h:378
daal::data_management::interface1::DataSourceIface::doAllocateNumericTable
Definition: data_source.h:82
daal::data_management::interface1::CsvDataSource
Specifies methods to access data stored in files.
Definition: csv_data_source.h:48
daal::services::daal_malloc
DAAL_EXPORT void * daal_malloc(size_t size, size_t alignment=DAAL_MALLOC_DEFAULT_ALIGNMENT)
daal::data_management::interface1::FileDataSource::FeatureManager
_featureManager FeatureManager
Definition: file_data_source.h:68
daal::services::ErrorOnFileRead
Definition: error_indexes.h:379
daal::data_management::interface1::DataSourceIface::DataSourceStatus
DataSourceStatus
Specifies the status of the Data Source.
Definition: data_source.h:57
daal::services::daal_free
DAAL_EXPORT void daal_free(void *ptr)
daal::data_management::interface1::DataSourceIface::readyForLoad
Definition: data_source.h:59
daal::data_management::interface1::DataSourceIface::notDictionaryFromContext
Definition: data_source.h:71
daal::data_management::interface1::DataSourceIface::endOfData
Definition: data_source.h:61
daal::data_management::interface1::FileDataSource::FileDataSource
FileDataSource(const std::string &fileName, DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable=DataSource::notAllocateNumericTable, DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext=DataSource::notDictionaryFromContext, size_t initialMaxRows=10)
Definition: file_data_source.h:88
daal::data_management::interface1::DataSourceIface::NumericTableAllocationFlag
NumericTableAllocationFlag
Specifies whether a Numeric Table is allocated inside of the Data Source object.
Definition: data_source.h:79
daal::data_management::interface1::FileDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: file_data_source.h:120
daal::data_management::interface1::FileDataSource::getStatus
DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
Definition: file_data_source.h:128
daal::data_management::interface1::FileDataSource
Specifies methods to access data stored in files.
Definition: file_data_source.h:51

For more complete information about compiler optimizations, see our Optimization Notice.