C++ API Reference for Intel® Data Analytics Acceleration Library 2019

file_data_source.h
1 /* file: file_data_source.h */
2 /*******************************************************************************
3 * Copyright 2014-2018 Intel Corporation.
4 *
5 * This software and the related documents are Intel copyrighted materials, and
6 * your use of them is governed by the express license under which they were
7 * provided to you (License). Unless the License provides otherwise, you may not
8 * use, modify, copy, publish, distribute, disclose or transmit this software or
9 * the related documents without Intel's prior written permission.
10 *
11 * This software and the related documents are provided as is, with no express
12 * or implied warranties, other than those that are expressly stated in the
13 * License.
14 *******************************************************************************/
15 
16 /*
17 //++
18 // Implementation of the file data source class.
19 //--
20 */
21 
22 #ifndef __FILE_DATA_SOURCE_H__
23 #define __FILE_DATA_SOURCE_H__
24 
25 #include <cstdio>
26 
27 #include "services/daal_memory.h"
28 #include "data_management/data_source/data_source.h"
29 #include "data_management/data_source/csv_data_source.h"
30 #include "data_management/data/data_dictionary.h"
31 #include "data_management/data/numeric_table.h"
32 #include "data_management/data/homogen_numeric_table.h"
33 
34 namespace daal
35 {
36 namespace data_management
37 {
38 
39 namespace interface1
40 {
51 template< typename FeatureManager, typename SummaryStatisticsType = DAAL_SUMMARY_STATISTICS_TYPE>
52 class FileDataSource : public CsvDataSource<FeatureManager, SummaryStatisticsType>
53 {
54 private:
55  typedef CsvDataSource<FeatureManager, SummaryStatisticsType> super;
56 
57 protected:
58  using super::_rawLineBuffer;
59  using super::_rawLineBufferLen;
60  using super::_rawLineLength;
61  using super::_status;
62 
63 public:
73  FileDataSource(const std::string &fileName,
74  DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable = DataSource::notAllocateNumericTable,
75  DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext = DataSource::notDictionaryFromContext,
76  size_t initialMaxRows = 10) :
77  super(doAllocateNumericTable, doCreateDictionaryFromContext, initialMaxRows)
78  {
79  _status |= initialize(fileName);
80  }
81 
88  FileDataSource(const std::string &fileName,
89  CsvDataSourceOptions options,
90  size_t initialMaxRows = 10) :
91  super(options, initialMaxRows)
92  {
93  _status |= initialize(fileName);
94  }
95 
96  virtual ~FileDataSource()
97  {
98  if (_file)
99  fclose(_file);
100  daal::services::daal_free( _fileBuffer );
101  }
102 
103 public:
104  services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
105  {
106  services::Status s = super::createDictionaryFromContext();
107  fseek(_file, 0, SEEK_SET);
108  _fileBufferPos = _fileBufferLen;
109  return s;
110  }
111 
112  DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
113  {
114  return (iseof() ? DataSourceIface::endOfData : DataSourceIface::readyForLoad);
115  }
116 
117 protected:
118  bool iseof() const DAAL_C11_OVERRIDE
119  {
120  return ((_fileBufferPos == _fileBufferLen || _fileBuffer[_fileBufferPos] == '\0') && feof(_file));
121  }
122 
123  bool readLine(char *buffer, int count, int& pos)
124  {
125  bool bRes = true;
126  pos = 0;
127  while (pos + 1 < count)
128  {
129  if (_fileBufferPos < _fileBufferLen && _fileBuffer[_fileBufferPos] != '\0')
130  {
131  buffer[pos] = _fileBuffer[_fileBufferPos];
132  pos++;
133  _fileBufferPos++;
134  if (buffer[pos - 1] == '\n')
135  break;
136  }
137  else
138  {
139  if (iseof ())
140  break;
141  _fileBufferPos = 0;
142  const int readLen = (int)fread(_fileBuffer, 1, _fileBufferLen, _file);
143  if (readLen < _fileBufferLen)
144  {
145  _fileBuffer[readLen] = '\0';
146  }
147  if (ferror(_file))
148  {
149  bRes = false;
150  break;
151  }
152  }
153  }
154  buffer[pos] = '\0';
155  return bRes;
156  }
157 
158  services::Status readLine() DAAL_C11_OVERRIDE
159  {
160  _rawLineLength = 0;
161  while(!iseof())
162  {
163  int readLen = 0;
164  if(!readLine(_rawLineBuffer + _rawLineLength, _rawLineBufferLen - _rawLineLength, readLen))
165  return services::Status(services::ErrorOnFileRead);
166 
167  if (readLen <= 0)
168  {
169  _rawLineLength = 0;
170  break;
171  }
172  _rawLineLength += readLen;
173  if (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r')
174  {
175  while (_rawLineLength > 0 && (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r'))
176  {
177  _rawLineLength--;
178  }
179  _rawLineBuffer[_rawLineLength] = '\0';
180  break;
181  }
182  if(!super::enlargeBuffer())
183  return services::Status(services::ErrorMemoryAllocationFailed);
184  }
185  return services::Status();
186  }
187 
188 private:
189  services::Status initialize(const std::string &fileName)
190  {
191  _file = NULL;
192  _fileName = fileName;
193  _fileBufferLen = (int)INITIAL_FILE_BUFFER_LENGTH;
194  _fileBufferPos = _fileBufferLen;
195  _fileBuffer = NULL;
196 
197  #if (defined(_MSC_VER)&&(_MSC_VER >= 1400))
198  errno_t error;
199  error = fopen_s( &_file, fileName.c_str(), "r" );
200  if (error != 0 || !_file)
201  { return services::throwIfPossible(services::ErrorOnFileOpen); }
202  #else
203  _file = fopen( (char*)(fileName.c_str()), "r" );
204  if (!_file)
205  { return services::throwIfPossible(services::ErrorOnFileOpen); }
206  #endif
207 
208  _fileBuffer = (char *)daal::services::daal_malloc(_fileBufferLen);
209  if (!_fileBuffer)
210  {
211  fclose(_file);
212  _file = NULL;
213  return services::throwIfPossible(services::ErrorMemoryAllocationFailed);
214  }
215 
216  return services::Status();
217  }
218 
219 protected:
220  std::string _fileName;
221 
222  FILE *_file;
223 
224  char *_fileBuffer;
225  int _fileBufferLen;
226  int _fileBufferPos;
227 
228 private:
229  static const size_t INITIAL_FILE_BUFFER_LENGTH = 1048576;
230 };
233 } // namespace interface1
234 
235 using interface1::FileDataSource;
236 
237 } // namespace data_management
238 } // namespace daal
239 
240 #endif
daal
Definition: algorithm_base_common.h:31
daal::data_management::interface1::CsvDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:305
daal::data_management::interface1::FileDataSource::FileDataSource
FileDataSource(const std::string &fileName, CsvDataSourceOptions options, size_t initialMaxRows=10)
Definition: file_data_source.h:88
daal::data_management::interface1::DataSourceIface::DictionaryCreationFlag
DictionaryCreationFlag
Specifies whether a Data Dictionary is created from the context of a Data Source. ...
Definition: data_source.h:69
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:146
daal::data_management::interface1::DataSourceIface::notAllocateNumericTable
Definition: data_source.h:81
daal::services::ErrorOnFileOpen
Definition: error_indexes.h:379
daal::data_management::interface1::DataSourceIface::doAllocateNumericTable
Definition: data_source.h:82
daal::data_management::interface1::DataSourceTemplate
Implements the abstract DataSourceIface interface.
Definition: data_source.h:463
daal::data_management::interface1::CsvDataSource
Specifies methods to access data stored in files.
Definition: csv_data_source.h:97
daal::services::daal_malloc
DAAL_EXPORT void * daal_malloc(size_t size, size_t alignment=DAAL_MALLOC_DEFAULT_ALIGNMENT)
daal::services::ErrorOnFileRead
Definition: error_indexes.h:380
daal::data_management::interface1::DataSourceIface::DataSourceStatus
DataSourceStatus
Specifies the status of the Data Source.
Definition: data_source.h:57
daal::services::daal_free
DAAL_EXPORT void daal_free(void *ptr)
daal::data_management::interface1::DataSourceIface::readyForLoad
Definition: data_source.h:59
daal::data_management::interface1::CsvDataSourceOptions
Options of CSV data source.
Definition: csv_data_source.h:47
daal::data_management::interface1::DataSourceIface::notDictionaryFromContext
Definition: data_source.h:71
daal::data_management::interface1::DataSourceIface::endOfData
Definition: data_source.h:61
daal::data_management::interface1::FileDataSource::FileDataSource
FileDataSource(const std::string &fileName, DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable=DataSource::notAllocateNumericTable, DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext=DataSource::notDictionaryFromContext, size_t initialMaxRows=10)
Definition: file_data_source.h:73
daal::data_management::interface1::DataSourceIface::NumericTableAllocationFlag
NumericTableAllocationFlag
Specifies whether a Numeric Table is allocated inside of the Data Source object.
Definition: data_source.h:79
daal::data_management::interface1::FileDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: file_data_source.h:104
daal::data_management::interface1::FileDataSource::getStatus
DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
Definition: file_data_source.h:112
daal::data_management::interface1::FileDataSource
Specifies methods to access data stored in files.
Definition: file_data_source.h:52

For more complete information about compiler optimizations, see our Optimization Notice.