29 from daal.algorithms
import kmeans
30 import daal.algorithms.kmeans.init
31 from daal.data_management
import HomogenNumericTable, FileDataSource, DataSource, BlockDescriptor, readOnly
33 DAAL_PREFIX = os.path.join(
'..',
'data')
35 datasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'kmeans_init_dense.csv')
39 cAccuracyThreshold = 0.01
42 def getSingleValue(pTbl, ntype):
43 block = BlockDescriptor(ntype=ntype)
44 pTbl.getBlockOfRows(0, 1, readOnly, block)
45 value = block.getArray().flatten()[0]
46 pTbl.releaseBlockOfRows(block)
50 def runKmeans(inputData, nClusters, method, methodName, oversamplingFactor = -1.0):
52 init = kmeans.init.Batch(nClusters, fptype=np.float32, method=method)
53 init.input.set(kmeans.init.data, inputData)
54 if oversamplingFactor > 0:
55 init.parameter.oversamplingFactor = oversamplingFactor
56 if method == kmeans.init.parallelPlusDense:
57 print(
"K-means init parameters: method = " + methodName +
", oversamplingFactor = "
58 + str(init.parameter.oversamplingFactor) +
", nRounds = " + str(init.parameter.nRounds))
60 print(
"K-means init parameters: method = " + methodName)
62 centroids = init.compute().get(kmeans.init.centroids)
65 algorithm = kmeans.Batch(nClusters, nMaxIterations)
67 algorithm.input.set(kmeans.data, inputData)
68 algorithm.input.set(kmeans.inputCentroids, centroids)
69 algorithm.parameter.accuracyThreshold = cAccuracyThreshold
70 print(
"K-means algorithm parameters: maxIterations = " + str(algorithm.parameter.maxIterations)
71 +
", accuracyThreshold = " + str(algorithm.parameter.accuracyThreshold))
72 res = algorithm.compute()
75 goalFunc = getSingleValue(res.get(kmeans.objectiveFunction), ntype=np.float32)
76 nIterations = getSingleValue(res.get(kmeans.nIterations), ntype=np.intc)
77 print(
"K-means algorithm results: Objective function value = " + str(goalFunc*1e-6)
78 +
"*1E+6, number of iterations = " + str(nIterations) +
"\n")
81 if __name__ ==
"__main__":
83 inputData = HomogenNumericTable(ntype=np.float32)
84 dataSource = FileDataSource(datasetFileName,
85 DataSource.notAllocateNumericTable,
86 DataSource.doDictionaryFromContext)
89 dataSource.loadDataBlock(inputData)
91 runKmeans(inputData, nClusters, kmeans.init.deterministicDense,
"deterministicDense")
92 runKmeans(inputData, nClusters, kmeans.init.randomDense,
"randomDense")
93 runKmeans(inputData, nClusters, kmeans.init.plusPlusDense,
"plusPlusDense")
94 runKmeans(inputData, nClusters, kmeans.init.parallelPlusDense,
"parallelPlusDense", 0.5)
95 runKmeans(inputData, nClusters, kmeans.init.parallelPlusDense,
"parallelPlusDense", 2.0)