48 import daal.algorithms.kmeans
as kmeans
49 import daal.algorithms.kmeans.init
as init
50 from daal
import step1Local, step2Master
51 from daal.data_management
import FileDataSource, DataSourceIface
53 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
54 if utils_folder
not in sys.path:
55 sys.path.insert(0, utils_folder)
56 from utils
import printNumericTable
58 DAAL_PREFIX = os.path.join(
'..',
'data')
61 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_1.csv'),
62 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_2.csv'),
63 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_3.csv'),
64 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_4.csv')
70 nVectorsInBlock = 2500
72 dataTable = [0] * nBlocks
74 if __name__ ==
"__main__":
76 masterAlgorithm = kmeans.Distributed(step2Master, nClusters, method=kmeans.lloydDense)
79 assignments = [0] * nBlocks
81 masterInitAlgorithm = init.Distributed(step2Master, nClusters, method=init.randomDense)
82 for i
in range(nBlocks):
84 dataSource = FileDataSource(
85 dataFileNames[i], DataSourceIface.doAllocateNumericTable,
86 DataSourceIface.doDictionaryFromContext
90 dataSource.loadDataBlock()
92 dataTable[i] = dataSource.getNumericTable()
95 localInit = init.Distributed(step1Local, nClusters, nBlocks * nVectorsInBlock, i * nVectorsInBlock, method=init.randomDense)
97 localInit.input.set(init.data, dataTable[i])
98 res = localInit.compute()
99 masterInitAlgorithm.input.add(init.partialResults, res)
101 masterInitAlgorithm.compute()
102 res = masterInitAlgorithm.finalizeCompute()
103 centroids = res.get(init.centroids)
105 for it
in range(nIterations):
106 for i
in range(nBlocks):
108 localAlgorithm = kmeans.Distributed(step1Local, nClusters, it == nIterations, method=kmeans.lloydDense)
111 localAlgorithm.input.set(kmeans.data, dataTable[i])
112 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
114 pres = localAlgorithm.compute()
116 masterAlgorithm.input.add(kmeans.partialResults, pres)
118 masterAlgorithm.compute()
119 result = masterAlgorithm.finalizeCompute()
121 centroids = result.get(kmeans.centroids)
122 goalFunction = result.get(kmeans.goalFunction)
124 for i
in range(nBlocks):
126 localAlgorithm = kmeans.Batch(nClusters, 0, method=kmeans.lloydDense)
129 localAlgorithm.input.set(kmeans.data, dataTable[i])
130 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
132 res = localAlgorithm.compute()
134 assignments[i] = res.get(kmeans.assignments)
137 printNumericTable(assignments[0],
"First 10 cluster assignments from 1st node:", 10)
138 printNumericTable(centroids,
"First 10 dimensions of centroids:", 20, 10)
139 printNumericTable(goalFunction,
"Goal function value:")