Python* API Reference for Intel® Data Analytics Acceleration Library 2019

datasource_featureextraction.py

1 # file: datasource_featureextraction.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example for using of data source feature extraction
19 # !*****************************************************************************
20 
21 #
22 ## <a name = "DAAL-EXAMPLE-PY-DATASOURCE_FEATUREEXTRACTION"></a>
23 ## \example datasource_featureextraction.py
24 #
25 import os
26 import sys
27 
28 from daal.data_management import FileDataSource, DataSourceIface, ColumnFilter, OneHotEncoder
29 
30 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
31 if utils_folder not in sys.path:
32  sys.path.insert(0, utils_folder)
33 from utils import printNumericTable
34 
35 
36 # Input data set parameters
37 datasetFileName = "../data/batch/kmeans_dense.csv"
38 
39 if __name__ == "__main__":
40 
41  # Initialize FileDataSource to retrieve the input data from a .csv file
42  dataSource = FileDataSource(datasetFileName, DataSourceIface.doAllocateNumericTable)
43 
44  # Create data source dictionary from loading of the first .csv file
45  dataSource.createDictionaryFromContext()
46 
47  # Filter in 3 chosen columns from a .csv file
48  validList = [1, 2, 5]
49 
50  colFilter = ColumnFilter()
51  filterList = colFilter.list(validList)
52  dataSource.getFeatureManager().addModifier(filterList)
53 
54  # Consider column with index 1 as categorical and convert it into 3 binary categorical features
55  dataSource.getFeatureManager().addModifier(OneHotEncoder(1, 3))
56 
57  # Load data from .csv file
58  dataSource.loadDataBlock()
59 
60  # Print result
61  table = dataSource.getNumericTable()
62  printNumericTable(table, "Loaded data", 4, 20)

For more complete information about compiler optimizations, see our Optimization Notice.