48 #ifndef __CSV_DATA_SOURCE_H__ 49 #define __CSV_DATA_SOURCE_H__ 51 #include "services/daal_memory.h" 52 #include "data_management/data_source/data_source.h" 53 #include "data_management/data/data_dictionary.h" 54 #include "data_management/data/numeric_table.h" 55 #include "data_management/data/homogen_numeric_table.h" 59 namespace data_management
73 template<
typename _featureManager,
typename _summaryStatisticsType = DAAL_SUMMARY_STATISTICS_TYPE >
74 class CsvDataSource :
public DataSourceTemplate<data_management::HomogenNumericTable<DAAL_DATA_TYPE>, _summaryStatisticsType>
77 using DataSource::checkDictionary;
78 using DataSource::checkNumericTable;
79 using DataSource::freeNumericTable;
80 using DataSource::_dict;
81 using DataSource::_initialMaxRows;
86 typedef _featureManager FeatureManager;
89 typedef data_management::HomogenNumericTable<DAAL_DATA_TYPE> DefaultNumericTableType;
91 FeatureManager featureManager;
102 CsvDataSource(DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable = DataSource::notAllocateNumericTable,
103 DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext = DataSource::notDictionaryFromContext,
104 size_t initialMaxRows = 10):
105 DataSourceTemplate<DefaultNumericTableType, _summaryStatisticsType>(doAllocateNumericTable, doCreateDictionaryFromContext),
106 _rawLineBuffer(NULL),
109 _rawLineBufferLen = 1024;
110 _rawLineBuffer = (
char *)daal::services::daal_malloc(_rawLineBufferLen);
112 _contextDictFlag =
false;
113 _initialMaxRows = initialMaxRows;
118 daal::services::daal_free( _rawLineBuffer );
119 DataSourceTemplate<DefaultNumericTableType, _summaryStatisticsType>::freeNumericTable();
126 FeatureManager &getFeatureManager()
128 return featureManager;
132 size_t getNumericTableNumberOfColumns() DAAL_C11_OVERRIDE
134 return featureManager.getNumericTableNumberOfColumns();
137 services::Status setDictionary(DataSourceDictionary *dict) DAAL_C11_OVERRIDE
139 services::Status s = DataSource::setDictionary(dict);
140 featureManager.setFeatureDetailsFromDictionary(dict);
145 size_t loadDataBlock(NumericTable* nt) DAAL_C11_OVERRIDE
147 services::Status s = checkDictionary();
150 this->_status.add(services::throwIfPossible(s));
153 s = checkInputNumericTable(nt);
156 this->_status.add(services::throwIfPossible(s));
160 size_t maxRows = (_initialMaxRows > 0 ? _initialMaxRows : 10);
162 const size_t ncols = getNumericTableNumberOfColumns();
163 DataCollection tables;
164 for( ;; maxRows *= 2)
166 NumericTablePtr ntCurrent = HomogenNumericTable<DAAL_DATA_TYPE>::create(ncols, maxRows, NumericTableIface::doAllocate, &s);
169 this->_status.add(services::throwIfPossible(services::Status(services::ErrorNumericTableNotAllocated)));
172 tables.push_back(ntCurrent);
173 const size_t rows = loadDataBlock(maxRows, ntCurrent.get());
179 s = resetNumericTable(nt, nrows);
182 this->_status.add(services::throwIfPossible(s));
186 BlockDescriptor<DAAL_DATA_TYPE> blockCurrent, block;
188 for (
size_t i = 0; i < tables.size(); i++)
190 NumericTable *ntCurrent = (NumericTable*)(tables[i].
get());
191 size_t rows = ntCurrent->getNumberOfRows();
196 ntCurrent->getBlockOfRows(0, rows, readOnly, blockCurrent);
197 nt->getBlockOfRows(pos, rows, writeOnly, block);
199 services::daal_memcpy_s(block.getBlockPtr(), rows * ncols *
sizeof(DAAL_DATA_TYPE), blockCurrent.getBlockPtr(), rows * ncols *
sizeof(DAAL_DATA_TYPE));
201 ntCurrent->releaseBlockOfRows(blockCurrent);
202 nt->releaseBlockOfRows(block);
204 DataSourceTemplate<DefaultNumericTableType, _summaryStatisticsType>::combineStatistics( ntCurrent, nt, pos == 0);
210 size_t loadDataBlock(
size_t maxRows, NumericTable* nt) DAAL_C11_OVERRIDE
212 size_t nLines = loadDataBlock(maxRows, 0, maxRows, nt);
213 nt->resize( nLines );
217 size_t loadDataBlock(
size_t maxRows,
size_t rowOffset,
size_t fullRows, NumericTable *nt) DAAL_C11_OVERRIDE
219 services::Status s = checkDictionary();
222 this->_status.add(services::throwIfPossible(s));
225 s = checkInputNumericTable(nt);
228 this->_status.add(services::throwIfPossible(s));
232 if (rowOffset + maxRows > fullRows)
234 this->_status.add(services::throwIfPossible(services::ErrorIncorrectDataRange));
238 s = resetNumericTable(nt, fullRows);
241 this->_status.add(services::throwIfPossible(s));
246 for(; j < maxRows && !iseof() ; j++ )
249 if(!s || !_rawLineLength)
251 featureManager.parseRowIn( _rawLineBuffer, _rawLineLength, this->_dict.get(), nt, rowOffset + j );
253 DataSourceTemplate<DefaultNumericTableType, _summaryStatisticsType>::updateStatistics( j, nt, rowOffset );
256 return rowOffset + j;
259 size_t loadDataBlock() DAAL_C11_OVERRIDE
261 return DataSource::loadDataBlock();
264 size_t loadDataBlock(
size_t maxRows) DAAL_C11_OVERRIDE
266 return DataSource::loadDataBlock(maxRows);
269 size_t loadDataBlock(
size_t maxRows,
size_t rowOffset,
size_t fullRows) DAAL_C11_OVERRIDE
271 return DataSource::loadDataBlock(maxRows, rowOffset, fullRows);
275 services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
278 return services::throwIfPossible(services::Status(services::ErrorDictionaryAlreadyAvailable));
280 _contextDictFlag =
true;
282 _dict = DataSourceDictionary::create(&s);
289 return services::throwIfPossible(s);
292 featureManager.parseRowAsDictionary( _rawLineBuffer, _rawLineLength, this->_dict.get() );
293 return services::Status();
296 size_t getNumberOfAvailableRows() DAAL_C11_OVERRIDE
302 virtual bool iseof()
const = 0;
303 virtual services::Status readLine() = 0;
305 virtual services::Status resetNumericTable(NumericTable *nt,
const size_t newSize)
309 NumericTableDictionaryPtr ntDict = nt->getDictionarySharedPtr();
310 const size_t nFeatures = getNumericTableNumberOfColumns();
311 ntDict->setNumberOfFeatures(nFeatures);
312 for (
size_t i = 0; i < nFeatures; i++)
313 ntDict->setFeature((*_dict)[i].ntFeature, i);
315 s = DataSourceTemplate<DefaultNumericTableType, _summaryStatisticsType>::resizeNumericTableImpl(newSize, nt);
321 nt->setNormalizationFlag(NumericTable::nonNormalized);
322 return services::Status();
325 virtual services::Status checkInputNumericTable(
const NumericTable*
const nt)
const 329 return services::Status(services::ErrorNullInputNumericTable);
332 const NumericTable::StorageLayout layout = nt->getDataLayout();
333 if (layout == NumericTable::csrArray)
335 return services::Status(services::ErrorIncorrectTypeOfInputNumericTable);
338 return services::Status();
343 int newRawLineBufferLen = _rawLineBufferLen * 2;
344 char* newRawLineBuffer = (
char *)daal::services::daal_malloc( newRawLineBufferLen );
345 if(newRawLineBuffer == 0)
347 daal::services::daal_memcpy_s(newRawLineBuffer, newRawLineBufferLen, _rawLineBuffer, _rawLineBufferLen);
348 daal::services::daal_free( _rawLineBuffer );
349 _rawLineBuffer = newRawLineBuffer;
350 _rawLineBufferLen = newRawLineBufferLen;
355 char *_rawLineBuffer;
356 int _rawLineBufferLen;
359 bool _contextDictFlag;
363 using interface1::CsvDataSource;
daal::data_management::interface1::CsvDataSource::loadDataBlock
size_t loadDataBlock(size_t maxRows, size_t rowOffset, size_t fullRows, NumericTable *nt) DAAL_C11_OVERRIDE
Definition: csv_data_source.h:217
daal::data_management::interface1::DataSource::checkDictionary
services::Status checkDictionary()
Definition: data_source.h:385
daal::services::ErrorDictionaryAlreadyAvailable
Definition: error_indexes.h:177
daal::services::ErrorNullInputNumericTable
Definition: error_indexes.h:107
daal::services::interface1::Status
Class that holds the results of API calls. In case of API routine failure it contains the list of err...
Definition: error_handling.h:491
daal::data_management::interface1::CsvDataSource::setDictionary
services::Status setDictionary(DataSourceDictionary *dict) DAAL_C11_OVERRIDE
Definition: csv_data_source.h:137
daal
Definition: algorithm_base_common.h:57
daal::data_management::interface1::DataSourceIface::doAllocateNumericTable
Definition: data_source.h:108
daal::services::ErrorNumericTableNotAllocated
Definition: error_indexes.h:181
daal::data_management::interface1::DataSourceIface::NumericTableAllocationFlag
NumericTableAllocationFlag
Specifies whether a Numeric Table is allocated inside of the Data Source object.
Definition: data_source.h:105
daal::data_management::interface1::CsvDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:275
daal::services::interface1::Status::add
Status & add(ErrorID id)
daal::data_management::interface1::DataCollection
Class that provides functionality of Collection container for objects derived from SerializationIface...
Definition: data_collection.h:71
daal::data_management::interface1::NumericTable::getDictionarySharedPtr
virtual NumericTableDictionaryPtr getDictionarySharedPtr() const DAAL_C11_OVERRIDE
Definition: numeric_table.h:658
daal::data_management::interface1::CsvDataSource::getNumberOfAvailableRows
size_t getNumberOfAvailableRows() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:296
daal::data_management::interface1::DataSourceIface::notDictionaryFromContext
Definition: data_source.h:97
daal::data_management::interface1::CsvDataSource::loadDataBlock
size_t loadDataBlock(NumericTable *nt) DAAL_C11_OVERRIDE
Definition: csv_data_source.h:145
daal::data_management::interface1::DataSourceIface::freeNumericTable
virtual void freeNumericTable()=0
daal::data_management::interface1::DataCollection::push_back
DataCollection & push_back(const SerializationIfacePtr &x)
daal::data_management::interface1::BlockDescriptor::getBlockPtr
DataType * getBlockPtr() const
Definition: numeric_table.h:95
daal::data_management::interface1::DataSourceIface::notAllocateNumericTable
Definition: data_source.h:107
daal::data_management::interface1::DataSourceTemplate
Implements the abstract DataSourceIface interface.
Definition: data_source.h:489
daal::services::daal_memcpy_s
DAAL_EXPORT void daal_memcpy_s(void *dest, size_t numberOfElements, const void *src, size_t count)
daal::data_management::interface1::DataSource::checkNumericTable
services::Status checkNumericTable()
Definition: data_source.h:371
daal::data_management::interface1::NumericTable::setNormalizationFlag
NormalizationType setNormalizationFlag(NormalizationType flag)
Definition: numeric_table.h:762
daal::services::interface1::SharedPtr
Shared pointer that retains shared ownership of an object through a pointer. Several SharedPtr object...
Definition: daal_shared_ptr.h:187
daal::data_management::interface1::DataCollection::size
size_t size() const
daal::data_management::interface1::CsvDataSource
Specifies methods to access data stored in files.
Definition: csv_data_source.h:74
daal::data_management::interface1::NumericTableIface::nonNormalized
Definition: numeric_table.h:343
daal::services::daal_malloc
DAAL_EXPORT void * daal_malloc(size_t size, size_t alignment=DAAL_MALLOC_DEFAULT_ALIGNMENT)
daal::data_management::interface1::HomogenNumericTable::create
static services::SharedPtr< HomogenNumericTable< DataType > > create(NumericTableDictionaryPtr ddictForHomogenNumericTable, services::Status *stat=NULL)
Definition: homogen_numeric_table.h:117
daal::data_management::interface1::DataSourceIface::DictionaryCreationFlag
DictionaryCreationFlag
Specifies whether a Data Dictionary is created from the context of a Data Source. ...
Definition: data_source.h:95
daal::services::interface1::SharedPtr::get
T * get() const
Definition: daal_shared_ptr.h:332
daal::data_management::interface1::NumericTable
Class for a data management component responsible for representation of data in the numeric format...
Definition: numeric_table.h:600
daal::data_management::interface1::NumericTableIface::StorageLayout
StorageLayout
Storage layouts that may need to be supported.
Definition: numeric_table.h:352
daal::data_management::interface1::DataSourceTemplate::freeNumericTable
void freeNumericTable() DAAL_C11_OVERRIDE
Definition: data_source.h:535
daal::data_management::interface1::DataSource::loadDataBlock
size_t loadDataBlock() DAAL_C11_OVERRIDE
Definition: data_source.h:298
daal::data_management::interface1::CsvDataSource::getFeatureManager
FeatureManager & getFeatureManager()
Definition: csv_data_source.h:126
daal::services::interface1::SharedPtr::reset
void reset()
Definition: daal_shared_ptr.h:265
daal::data_management::interface1::CsvDataSource::CsvDataSource
CsvDataSource(DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable=DataSource::notAllocateNumericTable, DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext=DataSource::notDictionaryFromContext, size_t initialMaxRows=10)
Definition: csv_data_source.h:102
daal::services::ErrorIncorrectTypeOfInputNumericTable
Definition: error_indexes.h:117
daal::data_management::interface1::CsvDataSource::loadDataBlock
size_t loadDataBlock() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:259
daal::services::daal_free
DAAL_EXPORT void daal_free(void *ptr)
daal::services::ErrorIncorrectDataRange
Definition: error_indexes.h:103
daal::data_management::interface1::CsvDataSource::loadDataBlock
size_t loadDataBlock(size_t maxRows, size_t rowOffset, size_t fullRows) DAAL_C11_OVERRIDE
Definition: csv_data_source.h:269
daal::data_management::interface1::NumericTableIface::doAllocate
Definition: numeric_table.h:313
daal::data_management::interface1::CsvDataSource::loadDataBlock
size_t loadDataBlock(size_t maxRows) DAAL_C11_OVERRIDE
Definition: csv_data_source.h:264
daal::data_management::interface1::NumericTable::getNumberOfRows
size_t getNumberOfRows() const
Definition: numeric_table.h:686
daal::data_management::interface1::BlockDescriptor< DAAL_DATA_TYPE >
daal::data_management::interface1::CsvDataSource::loadDataBlock
size_t loadDataBlock(size_t maxRows, NumericTable *nt) DAAL_C11_OVERRIDE
Definition: csv_data_source.h:210
daal::data_management::interface1::DenseNumericTableIface::releaseBlockOfRows
virtual services::Status releaseBlockOfRows(BlockDescriptor< double > &block)=0
daal::data_management::interface1::NumericTable::getDataLayout
StorageLayout getDataLayout() const DAAL_C11_OVERRIDE
Definition: numeric_table.h:708
daal::data_management::interface1::Dictionary::create
static services::SharedPtr< Dictionary > create(size_t nfeat, FeaturesEqual featuresEqual=notEqual, services::Status *stat=NULL)
Definition: data_dictionary.h:209
daal::data_management::interface1::CsvDataSource::FeatureManager
_featureManager FeatureManager
Definition: csv_data_source.h:86
daal::data_management::interface1::DataSource::setDictionary
services::Status setDictionary(DataSourceDictionary *dict) DAAL_C11_OVERRIDE
Definition: data_source.h:243
daal::data_management::interface1::HomogenNumericTable
Class that provides methods to access data stored as a contiguous array of homogeneous feature vector...
Definition: homogen_numeric_table.h:76
daal::data_management::interface1::CsvDataSource::getNumericTableNumberOfColumns
size_t getNumericTableNumberOfColumns() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:132
daal::data_management::interface1::DenseNumericTableIface::getBlockOfRows
virtual services::Status getBlockOfRows(size_t vector_idx, size_t vector_num, ReadWriteMode rwflag, BlockDescriptor< double > &block)=0
daal::data_management::interface1::Dictionary
Class that represents a dictionary of a data set and provides methods to work with the data dictionar...
Definition: data_dictionary.h:184