48 #ifndef __FILE_DATA_SOURCE_H__
49 #define __FILE_DATA_SOURCE_H__
53 #include "services/daal_memory.h"
54 #include "data_management/data_source/data_source.h"
55 #include "data_management/data_source/csv_data_source.h"
56 #include "data_management/data/data_dictionary.h"
57 #include "data_management/data/numeric_table.h"
58 #include "data_management/data/homogen_numeric_table.h"
62 namespace data_management
76 template<
typename _featureManager,
typename _summaryStatisticsType = DAAL_SUMMARY_STATISTICS_TYPE >
77 class FileDataSource :
public CsvDataSource< _featureManager, _summaryStatisticsType >
80 using CsvDataSource<_featureManager,_summaryStatisticsType>::checkDictionary;
81 using CsvDataSource<_featureManager,_summaryStatisticsType>::checkNumericTable;
82 using CsvDataSource<_featureManager,_summaryStatisticsType>::freeNumericTable;
83 using CsvDataSource<_featureManager,_summaryStatisticsType>::_dict;
84 using CsvDataSource<_featureManager,_summaryStatisticsType>::_initialMaxRows;
85 using CsvDataSource<_featureManager,_summaryStatisticsType>::loadDataBlock;
87 using CsvDataSource<_featureManager,_summaryStatisticsType>::featureManager;
89 using CsvDataSource<_featureManager,_summaryStatisticsType>::createDictionaryFromContext;
94 typedef _featureManager FeatureManager;
97 typedef data_management::HomogenNumericTable<DAAL_DATA_TYPE> DefaultNumericTableType;
99 using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineBuffer;
100 using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineBufferLen;
101 using CsvDataSource<_featureManager,_summaryStatisticsType>::_rawLineLength;
102 using CsvDataSource<_featureManager,_summaryStatisticsType>::enlargeBuffer;
114 FileDataSource(
const std::string &fileName,
115 DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable = DataSource::notAllocateNumericTable,
116 DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext = DataSource::notDictionaryFromContext,
117 size_t initialMaxRows = 10):
118 CsvDataSource<_featureManager,_summaryStatisticsType>(doAllocateNumericTable, doCreateDictionaryFromContext, initialMaxRows), _fileBuffer(NULL)
120 _fileName = fileName;
122 #if (defined(_MSC_VER)&&(_MSC_VER >= 1400))
124 error = fopen_s( &_file, fileName.c_str(),
"r" );
125 if(error != 0 || !_file)
126 this->_status.add(services::throwIfPossible(services::Status(services::ErrorOnFileOpen)));
128 _file = fopen( (
char*)(fileName.c_str()),
"r" );
130 this->_status.add(services::throwIfPossible(services::Status(services::ErrorOnFileOpen)));
133 _fileBufferLen = 1048576;
134 _fileBufferPos = _fileBufferLen;
135 _fileBuffer = (
char *)daal::services::daal_malloc(_fileBufferLen);
142 daal::services::daal_free( _fileBuffer );
146 services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
148 services::Status s = CsvDataSource<_featureManager,_summaryStatisticsType>::createDictionaryFromContext();
149 fseek(_file, 0, SEEK_SET);
150 _fileBufferPos = _fileBufferLen;
154 DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
156 return (iseof() ? DataSourceIface::endOfData : DataSourceIface::readyForLoad);
160 bool iseof() const DAAL_C11_OVERRIDE
162 return ((_fileBufferPos == _fileBufferLen || _fileBuffer[_fileBufferPos] ==
'\0') && feof(_file));
165 bool readLine(
char *buffer,
int count,
int& pos)
169 while (pos + 1 < count)
171 if (_fileBufferPos < _fileBufferLen && _fileBuffer[_fileBufferPos] !=
'\0')
173 buffer[pos] = _fileBuffer[_fileBufferPos];
176 if (buffer[pos - 1] ==
'\n')
184 const int readLen = (int)fread(_fileBuffer, 1, _fileBufferLen, _file);
185 if (readLen < _fileBufferLen)
187 _fileBuffer[readLen] =
'\0';
200 services::Status readLine() DAAL_C11_OVERRIDE
206 if(!readLine(_rawLineBuffer + _rawLineLength, _rawLineBufferLen - _rawLineLength, readLen))
207 return services::Status(services::ErrorOnFileRead);
214 _rawLineLength += readLen;
215 if (_rawLineBuffer[_rawLineLength - 1] ==
'\n' || _rawLineBuffer[_rawLineLength - 1] ==
'\r')
217 while (_rawLineLength > 0 && (_rawLineBuffer[_rawLineLength - 1] ==
'\n' || _rawLineBuffer[_rawLineLength - 1] ==
'\r'))
221 _rawLineBuffer[_rawLineLength] =
'\0';
225 return services::Status(services::ErrorMemoryAllocationFailed);
227 return services::Status();
231 std::string _fileName;
241 using interface1::FileDataSource;
daal
Definition: algorithm_base_common.h:57
daal::data_management::interface1::CsvDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:275
daal::data_management::interface1::DataSourceIface::DictionaryCreationFlag
DictionaryCreationFlag
Specifies whether a Data Dictionary is created from the context of a Data Source. ...
Definition: data_source.h:95
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:170
daal::data_management::interface1::DataSourceIface::notAllocateNumericTable
Definition: data_source.h:107
daal::services::ErrorOnFileOpen
Definition: error_indexes.h:395
daal::data_management::interface1::DataSourceIface::doAllocateNumericTable
Definition: data_source.h:108
daal::data_management::interface1::CsvDataSource
Specifies methods to access data stored in files.
Definition: csv_data_source.h:74
daal::services::daal_malloc
DAAL_EXPORT void * daal_malloc(size_t size, size_t alignment=DAAL_MALLOC_DEFAULT_ALIGNMENT)
daal::data_management::interface1::FileDataSource::FeatureManager
_featureManager FeatureManager
Definition: file_data_source.h:94
daal::services::ErrorOnFileRead
Definition: error_indexes.h:396
daal::data_management::interface1::DataSourceIface::DataSourceStatus
DataSourceStatus
Specifies the status of the Data Source.
Definition: data_source.h:83
daal::services::daal_free
DAAL_EXPORT void daal_free(void *ptr)
daal::data_management::interface1::DataSourceIface::readyForLoad
Definition: data_source.h:85
daal::data_management::interface1::DataSourceIface::notDictionaryFromContext
Definition: data_source.h:97
daal::data_management::interface1::DataSourceIface::endOfData
Definition: data_source.h:87
daal::data_management::interface1::FileDataSource::FileDataSource
FileDataSource(const std::string &fileName, DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable=DataSource::notAllocateNumericTable, DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext=DataSource::notDictionaryFromContext, size_t initialMaxRows=10)
Definition: file_data_source.h:114
daal::data_management::interface1::DataSourceIface::NumericTableAllocationFlag
NumericTableAllocationFlag
Specifies whether a Numeric Table is allocated inside of the Data Source object.
Definition: data_source.h:105
daal::data_management::interface1::FileDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: file_data_source.h:146
daal::data_management::interface1::FileDataSource::getStatus
DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
Definition: file_data_source.h:154
daal::data_management::interface1::FileDataSource
Specifies methods to access data stored in files.
Definition: file_data_source.h:77