47 import daal.algorithms.kmeans
as kmeans
48 import daal.algorithms.kmeans.init
as init
49 from daal
import step1Local, step2Master
50 from daal.data_management
import FileDataSource, DataSourceIface
52 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
53 if utils_folder
not in sys.path:
54 sys.path.insert(0, utils_folder)
55 from utils
import printNumericTable
57 DAAL_PREFIX = os.path.join(
'..',
'data')
60 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_1.csv'),
61 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_2.csv'),
62 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_3.csv'),
63 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_4.csv')
69 nVectorsInBlock = 2500
71 dataTable = [0] * nBlocks
73 if __name__ ==
"__main__":
75 masterAlgorithm = kmeans.Distributed(step2Master, nClusters, method=kmeans.lloydDense)
78 assignments = [0] * nBlocks
80 masterInitAlgorithm = init.Distributed(step2Master, nClusters, method=init.randomDense)
81 for i
in range(nBlocks):
83 dataSource = FileDataSource(
84 dataFileNames[i], DataSourceIface.doAllocateNumericTable,
85 DataSourceIface.doDictionaryFromContext
89 dataSource.loadDataBlock()
91 dataTable[i] = dataSource.getNumericTable()
94 localInit = init.Distributed(step1Local, nClusters, nBlocks * nVectorsInBlock, i * nVectorsInBlock, method=init.randomDense)
96 localInit.input.set(init.data, dataTable[i])
97 res = localInit.compute()
98 masterInitAlgorithm.input.add(init.partialResults, res)
100 masterInitAlgorithm.compute()
101 res = masterInitAlgorithm.finalizeCompute()
102 centroids = res.get(init.centroids)
104 for it
in range(nIterations):
105 for i
in range(nBlocks):
107 localAlgorithm = kmeans.Distributed(step1Local, nClusters, it == nIterations, method=kmeans.lloydDense)
110 localAlgorithm.input.set(kmeans.data, dataTable[i])
111 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
113 pres = localAlgorithm.compute()
115 masterAlgorithm.input.add(kmeans.partialResults, pres)
117 masterAlgorithm.compute()
118 result = masterAlgorithm.finalizeCompute()
120 centroids = result.get(kmeans.centroids)
121 goalFunction = result.get(kmeans.goalFunction)
123 for i
in range(nBlocks):
125 localAlgorithm = kmeans.Batch(nClusters, 0, method=kmeans.lloydDense)
128 localAlgorithm.input.set(kmeans.data, dataTable[i])
129 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
131 res = localAlgorithm.compute()
133 assignments[i] = res.get(kmeans.assignments)
136 printNumericTable(assignments[0],
"First 10 cluster assignments from 1st node:", 10)
137 printNumericTable(centroids,
"First 10 dimensions of centroids:", 20, 10)
138 printNumericTable(goalFunction,
"Goal function value:")