55 from daal.algorithms
import kmeans
56 import daal.algorithms.kmeans.init
57 from daal.data_management
import HomogenNumericTable, FileDataSource, DataSource, BlockDescriptor, readOnly
59 DAAL_PREFIX = os.path.join(
'..',
'data')
61 datasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'kmeans_init_dense.csv')
65 cAccuracyThreshold = 0.01
68 def getSingleValue(pTbl, ntype):
69 block = BlockDescriptor(ntype=ntype)
70 pTbl.getBlockOfRows(0, 1, readOnly, block)
71 value = block.getArray().flatten()[0]
72 pTbl.releaseBlockOfRows(block)
76 def runKmeans(inputData, nClusters, method, methodName, oversamplingFactor = -1.0):
78 init = kmeans.init.Batch(nClusters, fptype=np.float32, method=method)
79 init.input.set(kmeans.init.data, inputData)
80 if oversamplingFactor > 0:
81 init.parameter.oversamplingFactor = oversamplingFactor
82 if method == kmeans.init.parallelPlusDense:
83 print(
"K-means init parameters: method = " + methodName +
", oversamplingFactor = " 84 + str(init.parameter.oversamplingFactor) +
", nRounds = " + str(init.parameter.nRounds))
86 print(
"K-means init parameters: method = " + methodName)
88 centroids = init.compute().get(kmeans.init.centroids)
91 algorithm = kmeans.Batch(nClusters, nMaxIterations)
93 algorithm.input.set(kmeans.data, inputData)
94 algorithm.input.set(kmeans.inputCentroids, centroids)
95 algorithm.parameter.accuracyThreshold = cAccuracyThreshold
96 print(
"K-means algorithm parameters: maxIterations = " + str(algorithm.parameter.maxIterations)
97 +
", accuracyThreshold = " + str(algorithm.parameter.accuracyThreshold))
98 res = algorithm.compute()
101 goalFunc = getSingleValue(res.get(kmeans.objectiveFunction), ntype=np.float32)
102 nIterations = getSingleValue(res.get(kmeans.nIterations), ntype=np.intc)
103 print(
"K-means algorithm results: Objective function value = " + str(goalFunc*1e-6)
104 +
"*1E+6, number of iterations = " + str(nIterations) +
"\n")
107 if __name__ ==
"__main__":
109 inputData = HomogenNumericTable(ntype=np.float32)
110 dataSource = FileDataSource(datasetFileName,
111 DataSource.notAllocateNumericTable,
112 DataSource.doDictionaryFromContext)
115 dataSource.loadDataBlock(inputData)
117 runKmeans(inputData, nClusters, kmeans.init.deterministicDense,
"deterministicDense")
118 runKmeans(inputData, nClusters, kmeans.init.randomDense,
"randomDense")
119 runKmeans(inputData, nClusters, kmeans.init.plusPlusDense,
"plusPlusDense")
120 runKmeans(inputData, nClusters, kmeans.init.parallelPlusDense,
"parallelPlusDense", 0.5)
121 runKmeans(inputData, nClusters, kmeans.init.parallelPlusDense,
"parallelPlusDense", 2.0)