Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 3

kmeans_csr_distr.py

1 # file: kmeans_csr_distr.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 
17 
18 
19 import os
20 import sys
21 
22 import daal.algorithms.kmeans as kmeans
23 import daal.algorithms.kmeans.init as init
24 from daal import step1Local, step2Master
25 
26 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
27 if utils_folder not in sys.path:
28  sys.path.insert(0, utils_folder)
29 from utils import printNumericTable, createSparseTable
30 
31 DAAL_PREFIX = os.path.join('..', 'data')
32 
33 # K-Means algorithm parameters
34 nClusters = 20
35 nIterations = 5
36 nBlocks = 4
37 nVectorsInBlock = 8000
38 
39 dataFileNames = [
40  os.path.join(DAAL_PREFIX, 'batch', 'kmeans_csr.csv'),
41  os.path.join(DAAL_PREFIX, 'batch', 'kmeans_csr.csv'),
42  os.path.join(DAAL_PREFIX, 'batch', 'kmeans_csr.csv'),
43  os.path.join(DAAL_PREFIX, 'batch', 'kmeans_csr.csv')
44 ]
45 
46 dataTable = [0] * nBlocks
47 
48 if __name__ == "__main__":
49 
50  masterAlgorithm = kmeans.Distributed(step2Master, nClusters, method=kmeans.lloydCSR, )
51 
52  centroids = None
53  assignments = [0] * nBlocks
54 
55  masterInitAlgorithm = init.Distributed(step2Master, nClusters, method=init.randomDense)
56 
57  for i in range(nBlocks):
58 
59  # Read dataFileNames and create a numeric table to store the input data
60  dataTable[i] = createSparseTable(dataFileNames[i])
61 
62  # Create an algorithm object for the K-Means algorithm
63  localInit = init.Distributed(step1Local, nClusters, nBlocks * nVectorsInBlock, i * nVectorsInBlock, method=init.randomDense)
64 
65  localInit.input.set(init.data, dataTable[i])
66  # compute and add input for next
67  masterInitAlgorithm.input.add(init.partialResults, localInit.compute())
68 
69  masterInitAlgorithm.compute()
70  res = masterInitAlgorithm.finalizeCompute()
71  centroids = res.get(init.centroids)
72 
73  for it in range(nIterations):
74  for i in range(nBlocks):
75  # Create an algorithm object for the K-Means algorithm
76  localAlgorithm = kmeans.Distributed(step1Local, nClusters, it == nIterations, method=kmeans.lloydCSR)
77 
78  # Set the input data to the algorithm
79  localAlgorithm.input.set(kmeans.data, dataTable[i])
80  localAlgorithm.input.set(kmeans.inputCentroids, centroids)
81 
82  pres = localAlgorithm.compute()
83 
84  masterAlgorithm.input.add(kmeans.partialResults, pres)
85 
86  masterAlgorithm.compute()
87  result = masterAlgorithm.finalizeCompute()
88 
89  centroids = result.get(kmeans.centroids)
90  objectiveFunction = result.get(kmeans.objectiveFunction)
91 
92  for i in range(nBlocks):
93  # Create an algorithm object for the K-Means algorithm
94  localAlgorithm = kmeans.Batch(nClusters, 0, method=kmeans.lloydCSR)
95 
96  # Set the input data to the algorithm
97  localAlgorithm.input.set(kmeans.data, dataTable[i])
98  localAlgorithm.input.set(kmeans.inputCentroids, centroids)
99 
100  res = localAlgorithm.compute()
101 
102  assignments[i] = res.get(kmeans.assignments)
103 
104  # Print the clusterization results
105  printNumericTable(assignments[0], "First 10 cluster assignments from 1st node:", 10)
106  printNumericTable(centroids, "First 10 dimensions of centroids:", 20, 10)
107  printNumericTable(objectiveFunction, "Objective function value:")

For more complete information about compiler optimizations, see our Optimization Notice.