22 #ifndef __FILE_DATA_SOURCE_H__
23 #define __FILE_DATA_SOURCE_H__
27 #include "services/daal_memory.h"
28 #include "data_management/data_source/data_source.h"
29 #include "data_management/data_source/csv_data_source.h"
30 #include "data_management/data/data_dictionary.h"
31 #include "data_management/data/numeric_table.h"
32 #include "data_management/data/homogen_numeric_table.h"
36 namespace data_management
51 template<
typename FeatureManager,
typename SummaryStatisticsType = DAAL_SUMMARY_STATISTICS_TYPE>
52 class FileDataSource :
public CsvDataSource<FeatureManager, SummaryStatisticsType>
55 typedef CsvDataSource<FeatureManager, SummaryStatisticsType> super;
58 using super::_rawLineBuffer;
59 using super::_rawLineBufferLen;
60 using super::_rawLineLength;
73 FileDataSource(
const std::string &fileName,
74 DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable = DataSource::notAllocateNumericTable,
75 DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext = DataSource::notDictionaryFromContext,
76 size_t initialMaxRows = 10) :
77 super(doAllocateNumericTable, doCreateDictionaryFromContext, initialMaxRows)
79 _status |= initialize(fileName);
88 FileDataSource(
const std::string &fileName,
89 CsvDataSourceOptions options,
90 size_t initialMaxRows = 10) :
91 super(options, initialMaxRows)
93 _status |= initialize(fileName);
96 virtual ~FileDataSource()
100 daal::services::daal_free( _fileBuffer );
104 services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
106 services::Status s = super::createDictionaryFromContext();
107 fseek(_file, 0, SEEK_SET);
108 _fileBufferPos = _fileBufferLen;
112 DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
114 return (iseof() ? DataSourceIface::endOfData : DataSourceIface::readyForLoad);
118 bool iseof() const DAAL_C11_OVERRIDE
120 return ((_fileBufferPos == _fileBufferLen || _fileBuffer[_fileBufferPos] ==
'\0') && feof(_file));
123 bool readLine(
char *buffer,
int count,
int& pos)
127 while (pos + 1 < count)
129 if (_fileBufferPos < _fileBufferLen && _fileBuffer[_fileBufferPos] !=
'\0')
131 buffer[pos] = _fileBuffer[_fileBufferPos];
134 if (buffer[pos - 1] ==
'\n')
142 const int readLen = (int)fread(_fileBuffer, 1, _fileBufferLen, _file);
143 if (readLen < _fileBufferLen)
145 _fileBuffer[readLen] =
'\0';
158 services::Status readLine() DAAL_C11_OVERRIDE
164 if(!readLine(_rawLineBuffer + _rawLineLength, _rawLineBufferLen - _rawLineLength, readLen))
165 return services::Status(services::ErrorOnFileRead);
172 _rawLineLength += readLen;
173 if (_rawLineBuffer[_rawLineLength - 1] ==
'\n' || _rawLineBuffer[_rawLineLength - 1] ==
'\r')
175 while (_rawLineLength > 0 && (_rawLineBuffer[_rawLineLength - 1] ==
'\n' || _rawLineBuffer[_rawLineLength - 1] ==
'\r'))
179 _rawLineBuffer[_rawLineLength] =
'\0';
182 if(!super::enlargeBuffer())
183 return services::Status(services::ErrorMemoryAllocationFailed);
185 return services::Status();
189 services::Status initialize(
const std::string &fileName)
192 _fileName = fileName;
193 _fileBufferLen = (int)INITIAL_FILE_BUFFER_LENGTH;
194 _fileBufferPos = _fileBufferLen;
197 #if (defined(_MSC_VER)&&(_MSC_VER >= 1400))
199 error = fopen_s( &_file, fileName.c_str(),
"r" );
200 if (error != 0 || !_file)
201 {
return services::throwIfPossible(services::ErrorOnFileOpen); }
203 _file = fopen( (
char*)(fileName.c_str()),
"r" );
205 {
return services::throwIfPossible(services::ErrorOnFileOpen); }
208 _fileBuffer = (
char *)daal::services::daal_malloc(_fileBufferLen);
213 return services::throwIfPossible(services::ErrorMemoryAllocationFailed);
216 return services::Status();
220 std::string _fileName;
229 static const size_t INITIAL_FILE_BUFFER_LENGTH = 1048576;
235 using interface1::FileDataSource;
daal
Definition: algorithm_base_common.h:31
daal::data_management::interface1::CsvDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:305
daal::data_management::interface1::FileDataSource::FileDataSource
FileDataSource(const std::string &fileName, CsvDataSourceOptions options, size_t initialMaxRows=10)
Definition: file_data_source.h:88
daal::data_management::interface1::DataSourceIface::DictionaryCreationFlag
DictionaryCreationFlag
Specifies whether a Data Dictionary is created from the context of a Data Source. ...
Definition: data_source.h:69
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:147
daal::data_management::interface1::DataSourceIface::notAllocateNumericTable
Definition: data_source.h:81
daal::services::ErrorOnFileOpen
Definition: error_indexes.h:382
daal::data_management::interface1::DataSourceIface::doAllocateNumericTable
Definition: data_source.h:82
daal::data_management::interface1::DataSourceTemplate
Implements the abstract DataSourceIface interface.
Definition: data_source.h:463
daal::data_management::interface1::CsvDataSource
Specifies methods to access data stored in files.
Definition: csv_data_source.h:97
daal::services::daal_malloc
DAAL_EXPORT void * daal_malloc(size_t size, size_t alignment=DAAL_MALLOC_DEFAULT_ALIGNMENT)
daal::services::ErrorOnFileRead
Definition: error_indexes.h:383
daal::data_management::interface1::DataSourceIface::DataSourceStatus
DataSourceStatus
Specifies the status of the Data Source.
Definition: data_source.h:57
daal::services::daal_free
DAAL_EXPORT void daal_free(void *ptr)
daal::data_management::interface1::DataSourceIface::readyForLoad
Definition: data_source.h:59
daal::data_management::interface1::CsvDataSourceOptions
Options of CSV data source.
Definition: csv_data_source.h:47
daal::data_management::interface1::DataSourceIface::notDictionaryFromContext
Definition: data_source.h:71
daal::data_management::interface1::DataSourceIface::endOfData
Definition: data_source.h:61
daal::data_management::interface1::FileDataSource::FileDataSource
FileDataSource(const std::string &fileName, DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable=DataSource::notAllocateNumericTable, DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext=DataSource::notDictionaryFromContext, size_t initialMaxRows=10)
Definition: file_data_source.h:73
daal::data_management::interface1::DataSourceIface::NumericTableAllocationFlag
NumericTableAllocationFlag
Specifies whether a Numeric Table is allocated inside of the Data Source object.
Definition: data_source.h:79
daal::data_management::interface1::FileDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: file_data_source.h:104
daal::data_management::interface1::FileDataSource::getStatus
DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
Definition: file_data_source.h:112
daal::data_management::interface1::FileDataSource
Specifies methods to access data stored in files.
Definition: file_data_source.h:52