Python* API Reference for Intel® Data Analytics Acceleration Library 2019 Update 5

svm_two_class_metrics_dense_batch.py

Deprecation Notice: With the introduction of daal4py, a package that supersedes PyDAAL, Intel is deprecating PyDAAL and will discontinue support starting with Intel® DAAL 2021 and Intel® Distribution for Python 2021. Until then Intel will continue to provide compatible pyDAAL pip and conda packages for newer releases of Intel DAAL and make it available in open source. However, Intel will not add the new features of Intel DAAL to pyDAAL. Intel recommends developers switch to and use daal4py.

Note: To find daal4py examples, refer to daal4py documentation or browse github repository.

1 # file: svm_two_class_metrics_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2019 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example of two-class support vector machine (SVM) quality metrics
19 # !
20 # !*****************************************************************************
21 
22 #
23 ## <a name="DAAL-EXAMPLE-PY-SVM_TWO_CLASS_QUALITY_METRIC_SET_BATCH"></a>
24 ## \example svm_two_class_metrics_dense_batch.py
25 #
26 
27 import os
28 import sys
29 
30 from daal.algorithms import kernel_function
31 from daal.algorithms.classifier.quality_metric import binary_confusion_matrix
32 from daal.algorithms import svm
33 from daal.algorithms import classifier
34 from daal.data_management import (
35  DataSourceIface, FileDataSource, readOnly, BlockDescriptor,
36  HomogenNumericTable, NumericTableIface, MergedNumericTable
37 )
38 
39 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
40 if utils_folder not in sys.path:
41  sys.path.insert(0, utils_folder)
42 from utils import printNumericTables, printNumericTable
43 
44 # Input data set parameters
45 DATA_PREFIX = os.path.join('..', 'data', 'batch')
46 trainDatasetFileName = os.path.join(DATA_PREFIX, 'svm_two_class_train_dense.csv')
47 testDatasetFileName = os.path.join(DATA_PREFIX, 'svm_two_class_test_dense.csv')
48 
49 nFeatures = 20
50 
51 # Parameters for the SVM kernel function
52 kernel = kernel_function.linear.Batch()
53 
54 # Model object for the SVM algorithm
55 trainingResult = None
56 predictionResult = None
57 qualityMetricSetResult = None
58 
59 predictedLabels = None
60 groundTruthLabels = None
61 
62 
63 def trainModel():
64  global trainingResult
65 
66  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
67  trainDataSource = FileDataSource(
68  trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
69  DataSourceIface.doDictionaryFromContext
70  )
71 
72  # Create Numeric Tables for training data and labels
73  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
74  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
75  mergedData = MergedNumericTable(trainData, trainGroundTruth)
76 
77  # Retrieve the data from the input file
78  trainDataSource.loadDataBlock(mergedData)
79 
80  # Create an algorithm object to train the SVM model
81  algorithm = svm.training.Batch()
82 
83  algorithm.parameter.kernel = kernel
84  algorithm.parameter.cacheSize = 600000000
85 
86  # Pass a training data set and dependent values to the algorithm
87  algorithm.input.set(classifier.training.data, trainData)
88  algorithm.input.set(classifier.training.labels, trainGroundTruth)
89 
90  # Build the SVM model and get the algorithm results
91  trainingResult = algorithm.compute()
92 
93 def testModel():
94  global predictionResult, groundTruthLabels
95 
96  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
97  testDataSource = FileDataSource(
98  testDatasetFileName, DataSourceIface.doAllocateNumericTable,
99  DataSourceIface.doDictionaryFromContext
100  )
101 
102  # Create Numeric Tables for testing data and labels
103  testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
104  groundTruthLabels = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
105  mergedData = MergedNumericTable(testData, groundTruthLabels)
106 
107  # Retrieve the data from input file
108  testDataSource.loadDataBlock(mergedData)
109 
110  # Create an algorithm object to predict SVM values
111  algorithm = svm.prediction.Batch()
112 
113  algorithm.parameter.kernel = kernel
114 
115  # Pass a testing data set and the trained model to the algorithm
116  algorithm.input.setTable(classifier.prediction.data, testData)
117  algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
118 
119  # Predict SVM values
120  # returns Result class from daal.algorithms.classifier.prediction
121  predictionResult = algorithm.compute()
122 
123 
124 def testModelQuality():
125  global predictedLabels, qualityMetricSetResult, groundTruthLabels
126 
127  # Retrieve predicted labels
128  predictedLabels = predictionResult.get(classifier.prediction.prediction)
129 
130  # Create a quality metric set object to compute quality metrics of the SVM algorithm
131  qualityMetricSet = svm.quality_metric_set.Batch()
132 
133  input = qualityMetricSet.getInputDataCollection().getInput(svm.quality_metric_set.confusionMatrix)
134 
135  input.set(binary_confusion_matrix.predictedLabels, predictedLabels)
136  input.set(binary_confusion_matrix.groundTruthLabels, groundTruthLabels)
137 
138  # Compute quality metrics and get the quality metrics
139  # returns ResultCollection class from svm.quality_metric_set
140  qualityMetricSetResult = qualityMetricSet.compute()
141 
142 
143 def printResults():
144 
145  # Print the classification results
146  printNumericTables(
147  groundTruthLabels, predictedLabels,
148  "Ground truth", "Classification results",
149  "SVM classification results (first 20 observations):", 20, interval=15, flt64=False
150  )
151 
152  # Print the quality metrics
153  qualityMetricResult = qualityMetricSetResult.getResult(svm.quality_metric_set.confusionMatrix)
154  printNumericTable(qualityMetricResult.get(binary_confusion_matrix.confusionMatrix), "Confusion matrix:")
155 
156  block = BlockDescriptor()
157  qualityMetricsTable = qualityMetricResult.get(binary_confusion_matrix.binaryMetrics)
158  qualityMetricsTable.getBlockOfRows(0, 1, readOnly, block)
159  qualityMetricsData = block.getArray().flatten()
160  print("Accuracy: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.accuracy]))
161  print("Precision: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.precision]))
162  print("Recall: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.recall]))
163  print("F-score: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.fscore]))
164  print("Specificity: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.specificity]))
165  print("AUC: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.AUC]))
166  qualityMetricsTable.releaseBlockOfRows(block)
167 
168 if __name__ == "__main__":
169  trainModel()
170  testModel()
171  testModelQuality()
172  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.