Python* API Reference for Intel® Data Analytics Acceleration Library 2019

pca_metrics_dense_batch.py

1 # file: pca_metrics_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 ## <a name="DAAL-EXAMPLE-PY-PCA_METRICS_DENSE_BATCH"></a>
17 ## \example pca_metrics_dense_batch.py
18 
19 import os
20 import sys
21 
22 import daal.algorithms.pca as pca
23 import daal.algorithms.pca.quality_metric_set as quality_metric_set
24 from daal.algorithms.pca.quality_metric import explained_variance
25 from daal.data_management import (
26  DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable,
27  NumericTableIface, BlockDescriptor, readWrite
28 )
29 
30 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
31 if utils_folder not in sys.path:
32  sys.path.insert(0, utils_folder)
33 from utils import printNumericTable
34 
35 datasetFileName = os.path.join('..', 'data', 'batch', 'pca_normalized.csv')
36 nVectors = 1000
37 nComponents = 5
38 
39 qmsResult = None
40 eigenData = None
41 
42 def trainModel():
43  global eigenData
44 
45  # Initialize FileDataSource to retrieve the input data from a .csv file
46  dataSource = FileDataSource(
47  datasetFileName,
48  DataSourceIface.doAllocateNumericTable,
49  DataSourceIface.doDictionaryFromContext
50  )
51 
52  # Retrieve the data from the input file
53  dataSource.loadDataBlock(nVectors)
54 
55  # Create an algorithm for principal component analysis using the SVD method
56  algorithm = pca.Batch(method=pca.svdDense)
57 
58  # Set the algorithm input data
59  algorithm.input.setDataset(pca.data, dataSource.getNumericTable())
60 
61  # Compute results of the PCA algorithm
62  result = algorithm.compute()
63  eigenData = result.get(pca.eigenvalues)
64 
65 def testPcaQuality():
66  global qmsResult
67 
68  # Create a quality metric set object to compute quality metrics of the PCA algorithm
69  qualityMetricSet = quality_metric_set.Batch(nComponents)
70  explainedVariances = explained_variance.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.explainedVariancesMetrics))
71  explainedVariances.setInput(explained_variance.eigenvalues, eigenData)
72 
73  # Compute quality metrics
74  qualityMetricSet.compute()
75 
76  # Retrieve the quality metrics
77  qmsResult = qualityMetricSet.getResultCollection()
78 
79 def printResults():
80  print ("Quality metrics for PCA")
81  result = explained_variance.Result.downCast(qmsResult.getResult(quality_metric_set.explainedVariancesMetrics))
82  printNumericTable(result.getResult(explained_variance.explainedVariances), "Explained variances:")
83  printNumericTable(result.getResult(explained_variance.explainedVariancesRatios), "Explained variances ratios:")
84  printNumericTable(result.getResult(explained_variance.noiseVariance), "Noise variance:")
85 
86 if __name__ == "__main__":
87  trainModel()
88  testPcaQuality()
89  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.