48 import daal.algorithms.kmeans
as kmeans
49 import daal.algorithms.kmeans.init
as init
50 from daal
import step1Local, step2Master
52 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
53 if utils_folder
not in sys.path:
54 sys.path.insert(0, utils_folder)
55 from utils
import printNumericTable, createSparseTable
57 DAAL_PREFIX = os.path.join(
'..',
'data')
63 nVectorsInBlock = 8000
66 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
67 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
68 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
69 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv')
72 dataTable = [0] * nBlocks
74 if __name__ ==
"__main__":
76 masterAlgorithm = kmeans.Distributed(step2Master, nClusters, method=kmeans.lloydCSR, )
79 assignments = [0] * nBlocks
81 masterInitAlgorithm = init.Distributed(step2Master, nClusters, method=init.randomDense)
83 for i
in range(nBlocks):
86 dataTable[i] = createSparseTable(dataFileNames[i])
89 localInit = init.Distributed(step1Local, nClusters, nBlocks * nVectorsInBlock, i * nVectorsInBlock, method=init.randomDense)
91 localInit.input.set(init.data, dataTable[i])
93 masterInitAlgorithm.input.add(init.partialResults, localInit.compute())
95 masterInitAlgorithm.compute()
96 res = masterInitAlgorithm.finalizeCompute()
97 centroids = res.get(init.centroids)
99 for it
in range(nIterations):
100 for i
in range(nBlocks):
102 localAlgorithm = kmeans.Distributed(step1Local, nClusters, it == nIterations, method=kmeans.lloydCSR)
105 localAlgorithm.input.set(kmeans.data, dataTable[i])
106 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
108 pres = localAlgorithm.compute()
110 masterAlgorithm.input.add(kmeans.partialResults, pres)
112 masterAlgorithm.compute()
113 result = masterAlgorithm.finalizeCompute()
115 centroids = result.get(kmeans.centroids)
116 objectiveFunction = result.get(kmeans.objectiveFunction)
118 for i
in range(nBlocks):
120 localAlgorithm = kmeans.Batch(nClusters, 0, method=kmeans.lloydCSR)
123 localAlgorithm.input.set(kmeans.data, dataTable[i])
124 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
126 res = localAlgorithm.compute()
128 assignments[i] = res.get(kmeans.assignments)
131 printNumericTable(assignments[0],
"First 10 cluster assignments from 1st node:", 10)
132 printNumericTable(centroids,
"First 10 dimensions of centroids:", 20, 10)
133 printNumericTable(objectiveFunction,
"Objective function value:")