Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 3

basic_statistics.py

1 # file: basic_statistics.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example for using of basic statistics
19 # !*****************************************************************************
20 
21 #
22 ## <a name = "DAAL-EXAMPLE-PY-BASIC_STATISTICS"></a>
23 ## \example basic_statistics.py
24 #
25 
26 import os
27 import sys
28 import numpy as np
29 
30 from daal.data_management import HomogenNumericTable, NumericTableIface, FileDataSource, DataSourceIface
31 
32 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
33 if utils_folder not in sys.path:
34  sys.path.insert(0, utils_folder)
35 from utils import printNumericTable
36 
37 
38 if __name__ == "__main__":
39 
40  print("Basic statistics example\n")
41 
42  # Input data set parameters
43  datasetFileName = "../data/batch/basic_statistics.csv"
44  data = np.array([(7.0, 3.0, 6.0, 2.0),
45  (1.0, 3.0, 0.0, 2.0),
46  (9.0, 2.0, 6.0, 2.0),
47  (3.0, 4.0, 7.0, 2.0),])
48 
49  # Initialize FileDataSource to retrieve the input data from a .csv file
50  dataSource = FileDataSource(datasetFileName, DataSourceIface.doAllocateNumericTable)
51 
52  dataSource.createDictionaryFromContext()
53  dataSource.loadDataBlock()
54  table = dataSource.getNumericTable()
55 
56  # Get basic statistics from the table. They were calculated inside DataSource for each column.
57  min = table.basicStatistics.get(NumericTableIface.minimum)
58  max = table.basicStatistics.get(NumericTableIface.maximum)
59  sum = table.basicStatistics.get(NumericTableIface.sum)
60  sumSquares = table.basicStatistics.get(NumericTableIface.sumSquares)
61 
62  # Print calculated basic statistics
63  printNumericTable(table, "Basic statistics of table:")
64  printNumericTable(min, "Minimum:")
65  printNumericTable(max, "Maximum:")
66  printNumericTable(sum, "Sum:")
67  printNumericTable(sumSquares, "SumSquares:")
68 
69  # Create NumericTable with the same data. But in this case basic statistics are not calculated.
70  dataTable = HomogenNumericTable(data)
71 
72  # Set basic statistics in the new NumericTable
73  dataTable.basicStatistics.set(NumericTableIface.minimum, min);
74  dataTable.basicStatistics.set(NumericTableIface.maximum, max);
75  dataTable.basicStatistics.set(NumericTableIface.sum, sum);
76  dataTable.basicStatistics.set(NumericTableIface.sumSquares, sumSquares);
77 
78  # Print basic statistics those were set
79  printNumericTable(dataTable, "New table:")
80  printNumericTable(dataTable.basicStatistics.get(NumericTableIface.minimum), "Minimum:")
81  printNumericTable(dataTable.basicStatistics.get(NumericTableIface.maximum), "Maximum:")
82  printNumericTable(dataTable.basicStatistics.get(NumericTableIface.sum), "Sum:")
83  printNumericTable(dataTable.basicStatistics.get(NumericTableIface.sumSquares), "SumSquares:")

For more complete information about compiler optimizations, see our Optimization Notice.