Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 3

pca_transform_dense_batch.py

1 # file: pca_transform_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example of PCA transformation algorithm.
19 # !*****************************************************************************
20 
21 #
22 
23 
24 #
25 
26 import os
27 import sys
28 import numpy as np
29 
30 import daal.algorithms.pca as pca
31 import daal.algorithms.pca.transform as pca_transform
32 from daal.data_management import DataSourceIface, FileDataSource
33 
34 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
35 if utils_folder not in sys.path:
36  sys.path.insert(0, utils_folder)
37 from utils import printNumericTable
38 from daal.data_management import NumericTable
39 # Input data set parameters
40 datasetName = os.path.join('..', 'data', 'batch', 'pca_transform.csv')
41 
42 if __name__ == "__main__":
43 
44  # Retrieve the input data
45  dataSource = FileDataSource(datasetName,
46  DataSourceIface.doAllocateNumericTable,
47  DataSourceIface.doDictionaryFromContext)
48  dataSource.loadDataBlock()
49  data = dataSource.getNumericTable()
50 
51  # Create an algorithm
52  algorithm = pca.Batch(fptype=np.float64,method=pca.svdDense)
53 
54  # Set the algorithm input data
55  algorithm.input.setDataset(pca.data, data)
56 
57  # Set the algorithm normalization parameters (mean and variance)
58  # to be exported for transform and whitening parameter (eigenvalue)
59  # If whitening is not required eigenvalues should be removed
60  # The eigenvalues would be calculated in pca.eigenvalues table of result
61  # but would not be passed to dataForTranform collection
62  # algorithm.paramter.resultsToCompute = (pca.mean | pca.variance | pca.eigenvalue)
63 
64  algorithm.parameter.resultsToCompute = pca.mean | pca.variance | pca.eigenvalue;
65 
66  # Compute PCA
67  res = algorithm.compute()
68  # Output basis, eigenvalues and mean values
69  printNumericTable(res.get(pca.eigenvalues), "Eigenvalues:")
70  printNumericTable(res.get(pca.eigenvectors), "Eigenvectors:")
71 
72  eigenvaluesT = res.get(pca.eigenvalues)
73  printNumericTable(eigenvaluesT, "Eigenvalues kv:")
74 
75  meansT = res.get(pca.means)
76  printNumericTable(meansT, "Means kv:")
77 
78  #eigenvaluesT = res.getCollection(pca.eigenvalue)
79  variancesT = res.get(pca.variances)
80  printNumericTable(variancesT, "Variances kv:")
81 
82  # Create an algorithm
83  tralgorithm = pca_transform.Batch(fptype=np.float64)
84 
85  # Set lower and upper bounds for the algorithm
86  tralgorithm.parameter.nComponents = 2
87 
88  # Set an input object for the algorithm
89  tralgorithm.input.setTable(pca_transform.data, data)
90 
91  # Set an input object for the eigenvectors
92  tralgorithm.input.setTable(pca_transform.eigenvectors, res.get(pca.eigenvectors))
93 
94  # Set an input object for the eigenvectors
95  tralgorithm.input.setCollection(pca_transform.dataForTransform, res.getCollection(pca.dataForTransform))
96 
97  # Compute PCA transformation function
98  trres = tralgorithm.compute()
99 
100  printNumericTable(trres.get(pca.transform.transformedData), "Transformed data:");
101  #printNumericTable(data, "First rows of the input data:", 4)
102  #printNumericTable(trres.get(pca_transform.transformedData), "First rows of the min-max normalization result:", 4)

For more complete information about compiler optimizations, see our Optimization Notice.