Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 5

custom_csv_feature_modifiers.py

1 # file: custom_csv_feature_modifiers.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 # ! Content:
17 # ! Python example of modifiers usage with file data source
18 # !*****************************************************************************
19 
20 #
21 
22 
23 #
24 
25 from daal.data_management import FileDataSource, CsvDataSourceOptions, modifiers
26 from daal.data_management.modifiers.csv import FeatureModifier
27 
28 import os, sys
29 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
30 if utils_folder not in sys.path:
31  sys.path.insert(0, utils_folder)
32 from utils import printNumericTable
33 
34 # User-defined feature modifier that computes a square for every feature
35 class MySquaringModifier(FeatureModifier):
36  def apply(self, tokens):
37  return [[float(x)*float(x) for x in t] for t in tokens]
38 
39 
40 # User-defined feature modifier that selects max element among all features
41 class MyMaxFeatureModifier(FeatureModifier):
42  def __init__(self):
43  super(MyMaxFeatureModifier, self).__init__(1,4)
44 
45  # This method is called for every row in CSV file
46  def apply(self, tokens):
47  return [[float(max(t))] for t in tokens]
48 
49 
50 if __name__ == "__main__":
51  # Path to the CSV to be read
52  csvFileName = "../data/batch/mixed_text_and_numbers.csv"
53 
54  # Define options for CSV data source
55  csvOptions = CsvDataSourceOptions(CsvDataSourceOptions.allocateNumericTable | CsvDataSourceOptions.createDictionaryFromContext | CsvDataSourceOptions.parseHeader)
56 
57  # Define CSV file data source
58  ds = FileDataSource(csvFileName, csvOptions)
59 
60  # Configure format of output numeric table by applying modifiers.
61  # Output numeric table will have the following format:
62  # | Numeric1 | Numeric2 ^ 2 | Numeric5 ^ 2 | max(Numeric0, Numeric5) |
63  fm = ds.getFeatureManager()
64  fm.addModifier(["Numeric1"], modifiers.csv.continuous())
65  fm.addModifier(["Numeric2", "Numeric5"], MySquaringModifier())
66  fm.addModifier(["Numeric0", "Numeric5"], MyMaxFeatureModifier())
67 
68  # Load and parse CSV file
69  ds.loadDataBlock()
70  printNumericTable(ds.getNumericTable(), "Loaded numeric table:")

For more complete information about compiler optimizations, see our Optimization Notice.