22 import daal.algorithms.kmeans
as kmeans
23 import daal.algorithms.kmeans.init
as init
24 from daal
import step1Local, step2Master
25 from daal.data_management
import FileDataSource, DataSourceIface
27 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
28 if utils_folder
not in sys.path:
29 sys.path.insert(0, utils_folder)
30 from utils
import printNumericTable
32 DAAL_PREFIX = os.path.join(
'..',
'data')
35 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_1.csv'),
36 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_2.csv'),
37 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_3.csv'),
38 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_4.csv')
44 nVectorsInBlock = 2500
46 dataTable = [0] * nBlocks
48 if __name__ ==
"__main__":
50 masterAlgorithm = kmeans.Distributed(step2Master, nClusters, method=kmeans.lloydDense)
53 assignments = [0] * nBlocks
55 masterInitAlgorithm = init.Distributed(step2Master, nClusters, method=init.randomDense)
56 for i
in range(nBlocks):
58 dataSource = FileDataSource(
59 dataFileNames[i], DataSourceIface.doAllocateNumericTable,
60 DataSourceIface.doDictionaryFromContext
64 dataSource.loadDataBlock()
66 dataTable[i] = dataSource.getNumericTable()
69 localInit = init.Distributed(step1Local, nClusters, nBlocks * nVectorsInBlock, i * nVectorsInBlock, method=init.randomDense)
71 localInit.input.set(init.data, dataTable[i])
72 res = localInit.compute()
73 masterInitAlgorithm.input.add(init.partialResults, res)
75 masterInitAlgorithm.compute()
76 res = masterInitAlgorithm.finalizeCompute()
77 centroids = res.get(init.centroids)
79 for it
in range(nIterations):
80 for i
in range(nBlocks):
82 localAlgorithm = kmeans.Distributed(step1Local, nClusters, it == nIterations, method=kmeans.lloydDense)
85 localAlgorithm.input.set(kmeans.data, dataTable[i])
86 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
88 pres = localAlgorithm.compute()
90 masterAlgorithm.input.add(kmeans.partialResults, pres)
92 masterAlgorithm.compute()
93 result = masterAlgorithm.finalizeCompute()
95 centroids = result.get(kmeans.centroids)
96 goalFunction = result.get(kmeans.goalFunction)
98 for i
in range(nBlocks):
100 localAlgorithm = kmeans.Batch(nClusters, 0, method=kmeans.lloydDense)
103 localAlgorithm.input.set(kmeans.data, dataTable[i])
104 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
106 res = localAlgorithm.compute()
108 assignments[i] = res.get(kmeans.assignments)
111 printNumericTable(assignments[0],
"First 10 cluster assignments from 1st node:", 10)
112 printNumericTable(centroids,
"First 10 dimensions of centroids:", 20, 10)
113 printNumericTable(goalFunction,
"Goal function value:")