47 import daal.algorithms.kmeans
as kmeans
48 import daal.algorithms.kmeans.init
as init
49 from daal
import step1Local, step2Master
51 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
52 if utils_folder
not in sys.path:
53 sys.path.insert(0, utils_folder)
54 from utils
import printNumericTable, createSparseTable
56 DAAL_PREFIX = os.path.join(
'..',
'data')
62 nVectorsInBlock = 8000
65 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
66 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
67 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
68 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv')
71 dataTable = [0] * nBlocks
73 if __name__ ==
"__main__":
75 masterAlgorithm = kmeans.Distributed(step2Master, nClusters, method=kmeans.lloydCSR, )
78 assignments = [0] * nBlocks
80 masterInitAlgorithm = init.Distributed(step2Master, nClusters, method=init.randomDense)
82 for i
in range(nBlocks):
85 dataTable[i] = createSparseTable(dataFileNames[i])
88 localInit = init.Distributed(step1Local, nClusters, nBlocks * nVectorsInBlock, i * nVectorsInBlock, method=init.randomDense)
90 localInit.input.set(init.data, dataTable[i])
92 masterInitAlgorithm.input.add(init.partialResults, localInit.compute())
94 masterInitAlgorithm.compute()
95 res = masterInitAlgorithm.finalizeCompute()
96 centroids = res.get(init.centroids)
98 for it
in range(nIterations):
99 for i
in range(nBlocks):
101 localAlgorithm = kmeans.Distributed(step1Local, nClusters, it == nIterations, method=kmeans.lloydCSR)
104 localAlgorithm.input.set(kmeans.data, dataTable[i])
105 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
107 pres = localAlgorithm.compute()
109 masterAlgorithm.input.add(kmeans.partialResults, pres)
111 masterAlgorithm.compute()
112 result = masterAlgorithm.finalizeCompute()
114 centroids = result.get(kmeans.centroids)
115 objectiveFunction = result.get(kmeans.objectiveFunction)
117 for i
in range(nBlocks):
119 localAlgorithm = kmeans.Batch(nClusters, 0, method=kmeans.lloydCSR)
122 localAlgorithm.input.set(kmeans.data, dataTable[i])
123 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
125 res = localAlgorithm.compute()
127 assignments[i] = res.get(kmeans.assignments)
130 printNumericTable(assignments[0],
"First 10 cluster assignments from 1st node:", 10)
131 printNumericTable(centroids,
"First 10 dimensions of centroids:", 20, 10)
132 printNumericTable(objectiveFunction,
"Objective function value:")