Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 2

pca_cor_csr_distr.py

1 # file: pca_cor_csr_distr.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation
4 # All Rights Reserved.
5 #
6 # If this software was obtained under the Intel Simplified Software License,
7 # the following terms apply:
8 #
9 # The source code, information and material ("Material") contained herein is
10 # owned by Intel Corporation or its suppliers or licensors, and title to such
11 # Material remains with Intel Corporation or its suppliers or licensors. The
12 # Material contains proprietary information of Intel or its suppliers and
13 # licensors. The Material is protected by worldwide copyright laws and treaty
14 # provisions. No part of the Material may be used, copied, reproduced,
15 # modified, published, uploaded, posted, transmitted, distributed or disclosed
16 # in any way without Intel's prior express written permission. No license under
17 # any patent, copyright or other intellectual property rights in the Material
18 # is granted to or conferred upon you, either expressly, by implication,
19 # inducement, estoppel or otherwise. Any license under such intellectual
20 # property rights must be express and approved by Intel in writing.
21 #
22 # Unless otherwise agreed by Intel in writing, you may not remove or alter this
23 # notice or any other notice embedded in Materials by Intel or Intel's
24 # suppliers or licensors in any way.
25 #
26 #
27 # If this software was obtained under the Apache License, Version 2.0 (the
28 # "License"), the following terms apply:
29 #
30 # You may not use this file except in compliance with the License. You may
31 # obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
32 #
33 #
34 # Unless required by applicable law or agreed to in writing, software
35 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
36 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 #
38 # See the License for the specific language governing permissions and
39 # limitations under the License.
40 #===============================================================================
41 
42 
43 
44 
45 import os
46 import sys
47 
48 import numpy as np
49 
50 from daal import step1Local, step2Master
51 from daal.algorithms import covariance
52 from daal.algorithms import pca
53 
54 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
55 if utils_folder not in sys.path:
56  sys.path.insert(0, utils_folder)
57 from utils import printNumericTable, createSparseTable
58 
59 DAAL_PREFIX = os.path.join('..', 'data')
60 
61 # Input data set parameters
62 nBlocks = 4
63 datasetFileNames = [
64  os.path.join(DAAL_PREFIX, 'distributed', 'covcormoments_csr_1.csv'),
65  os.path.join(DAAL_PREFIX, 'distributed', 'covcormoments_csr_2.csv'),
66  os.path.join(DAAL_PREFIX, 'distributed', 'covcormoments_csr_3.csv'),
67  os.path.join(DAAL_PREFIX, 'distributed', 'covcormoments_csr_4.csv')
68 ]
69 
70 if __name__ == "__main__":
71 
72  # Create an algorithm for principal component analysis using the correlation method on the master node
73  masterAlgorithm = pca.Distributed(step2Master,fptype=np.float64)
74 
75  for i in range(nBlocks):
76  dataTable = createSparseTable(datasetFileNames[i])
77 
78  # Create algorithm objects to compute a variance-covariance matrix in the distributed processing mode using the default method
79  localAlgorithm = pca.Distributed(step1Local,fptype=np.float64)
80 
81  # Create an algorithm for principal component analysis using the correlation method on the local node
82  localAlgorithm.parameter.covariance = covariance.Distributed(step1Local, fptype=np.float64, method=covariance.fastCSR)
83 
84  # Set input objects for the algorithm
85  localAlgorithm.input.setDataset(pca.data, dataTable)
86 
87  # Compute partial estimates on local nodes
88  # Set local partial results as input for the master-node algorithm
89  masterAlgorithm.input.add(pca.partialResults, localAlgorithm.compute())
90 
91  # Use covariance algorithm for sparse data inside the PCA algorithm
92  masterAlgorithm.parameter.covariance = covariance.Distributed(step2Master, fptype=np.float64, method=covariance.fastCSR)
93 
94  # Merge and finalize PCA decomposition on the master node
95  masterAlgorithm.compute()
96 
97  result = masterAlgorithm.finalizeCompute()
98 
99  # Print the results
100  printNumericTable(result.get(pca.eigenvalues), "Eigenvalues:")
101  printNumericTable(result.get(pca.eigenvectors), "Eigenvectors:")

For more complete information about compiler optimizations, see our Optimization Notice.