Python* API Reference for Intel® Data Analytics Acceleration Library 2019

svm_two_class_dense_batch.py

1 # file: svm_two_class_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 
17 
18 
19 import os
20 import sys
21 
22 from daal.algorithms.svm import training, prediction
23 from daal.algorithms import kernel_function, classifier
24 from daal.data_management import (
25  DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable, NumericTableIface
26 )
27 
28 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
29 if utils_folder not in sys.path:
30  sys.path.insert(0, utils_folder)
31 from utils import printNumericTables
32 
33 # Input data set parameters
34 DATA_PREFIX = os.path.join('..', 'data', 'batch')
35 
36 trainDatasetFileName = os.path.join(DATA_PREFIX, 'svm_two_class_train_dense.csv')
37 testDatasetFileName = os.path.join(DATA_PREFIX, 'svm_two_class_test_dense.csv')
38 
39 nFeatures = 20
40 
41 # Parameters for the SVM kernel function
42 kernel = kernel_function.linear.Batch()
43 
44 # Model object for the SVM algorithm
45 trainingResult = None
46 predictionResult = None
47 testGroundTruth = None
48 
49 
50 def trainModel():
51  global trainingResult
52 
53  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
54  trainDataSource = FileDataSource(
55  trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
56  DataSourceIface.doDictionaryFromContext
57  )
58 
59  # Create Numeric Tables for training data and labels
60  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
61  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
62  mergedData = MergedNumericTable(trainData, trainGroundTruth)
63 
64  # Retrieve the data from the input file
65  trainDataSource.loadDataBlock(mergedData)
66 
67  # Create an algorithm object to train the SVM model
68  algorithm = training.Batch()
69 
70  algorithm.parameter.kernel = kernel
71  algorithm.parameter.cacheSize = 600000000
72 
73  # Pass a training data set and dependent values to the algorithm
74  algorithm.input.set(classifier.training.data, trainData)
75  algorithm.input.set(classifier.training.labels, trainGroundTruth)
76 
77  # Build the SVM model
78  trainingResult = algorithm.compute()
79 
80 
81 def testModel():
82  global predictionResult, testGroundTruth
83 
84  # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
85  testDataSource = FileDataSource(
86  testDatasetFileName, DataSourceIface.notAllocateNumericTable,
87  DataSourceIface.doDictionaryFromContext
88  )
89 
90  # Create Numeric Tables for testing data and labels
91  testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
92  testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
93  mergedData = MergedNumericTable(testData, testGroundTruth)
94 
95  # Retrieve the data from input file
96  testDataSource.loadDataBlock(mergedData)
97 
98  # Create an algorithm object to predict SVM values
99  algorithm = prediction.Batch()
100 
101  algorithm.parameter.kernel = kernel
102 
103  # Pass a testing data set and the trained model to the algorithm
104  algorithm.input.setTable(classifier.prediction.data, testData)
105  algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
106 
107  # Predict SVM values
108  algorithm.compute()
109 
110  # Retrieve the algorithm results
111  predictionResult = algorithm.getResult()
112 
113 
114 def printResults():
115 
116  printNumericTables(
117  testGroundTruth, predictionResult.get(classifier.prediction.prediction),
118  "Ground truth\t", "Classification results",
119  "SVM classification results (first 20 observations):", 20, flt64=False
120  )
121 
122 if __name__ == "__main__":
123 
124  trainModel()
125  testModel()
126  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.