22 import daal.algorithms.kmeans
as kmeans
23 import daal.algorithms.kmeans.init
as init
24 from daal
import step1Local, step2Master
26 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
27 if utils_folder
not in sys.path:
28 sys.path.insert(0, utils_folder)
29 from utils
import printNumericTable, createSparseTable
31 DAAL_PREFIX = os.path.join(
'..',
'data')
37 nVectorsInBlock = 8000
40 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
41 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
42 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
43 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv')
46 dataTable = [0] * nBlocks
48 if __name__ ==
"__main__":
50 masterAlgorithm = kmeans.Distributed(step2Master, nClusters, method=kmeans.lloydCSR, )
53 assignments = [0] * nBlocks
55 masterInitAlgorithm = init.Distributed(step2Master, nClusters, method=init.randomDense)
57 for i
in range(nBlocks):
60 dataTable[i] = createSparseTable(dataFileNames[i])
63 localInit = init.Distributed(step1Local, nClusters, nBlocks * nVectorsInBlock, i * nVectorsInBlock, method=init.randomDense)
65 localInit.input.set(init.data, dataTable[i])
67 masterInitAlgorithm.input.add(init.partialResults, localInit.compute())
69 masterInitAlgorithm.compute()
70 res = masterInitAlgorithm.finalizeCompute()
71 centroids = res.get(init.centroids)
73 for it
in range(nIterations):
74 for i
in range(nBlocks):
76 localAlgorithm = kmeans.Distributed(step1Local, nClusters, it == nIterations, method=kmeans.lloydCSR)
79 localAlgorithm.input.set(kmeans.data, dataTable[i])
80 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
82 pres = localAlgorithm.compute()
84 masterAlgorithm.input.add(kmeans.partialResults, pres)
86 masterAlgorithm.compute()
87 result = masterAlgorithm.finalizeCompute()
89 centroids = result.get(kmeans.centroids)
90 objectiveFunction = result.get(kmeans.objectiveFunction)
92 for i
in range(nBlocks):
94 localAlgorithm = kmeans.Batch(nClusters, 0, method=kmeans.lloydCSR)
97 localAlgorithm.input.set(kmeans.data, dataTable[i])
98 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
100 res = localAlgorithm.compute()
102 assignments[i] = res.get(kmeans.assignments)
105 printNumericTable(assignments[0],
"First 10 cluster assignments from 1st node:", 10)
106 printNumericTable(centroids,
"First 10 dimensions of centroids:", 20, 10)
107 printNumericTable(objectiveFunction,
"Objective function value:")