Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 3

svm_multi_class_csr_batch.py

1 # file: svm_multi_class_csr_batch.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 ## <a name="DAAL-EXAMPLE-PY-SVM_MULTI_CLASS_CSR_BATCH"></a>
17 ## \example svm_multi_class_csr_batch.py
18 
19 import os
20 import sys
21 
22 from daal.algorithms.svm import training, prediction
23 from daal.algorithms import classifier, kernel_function, multi_class_classifier
24 from daal.data_management import DataSourceIface, FileDataSource
25 
26 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
27 if utils_folder not in sys.path:
28  sys.path.insert(0, utils_folder)
29 from utils import printNumericTables, createSparseTable
30 
31 # Input data set parameters
32 data_dir = os.path.join('..', 'data', 'batch')
33 trainDatasetFileName = os.path.join(data_dir, 'svm_multi_class_train_csr.csv')
34 trainLabelsFileName = os.path.join(data_dir, 'svm_multi_class_train_labels.csv')
35 testDatasetFileName = os.path.join(data_dir, 'svm_multi_class_test_csr.csv')
36 testLabelsFileName = os.path.join(data_dir, 'svm_multi_class_test_labels.csv')
37 
38 nClasses = 5
39 
40 trainingAlg = training.Batch()
41 predictionAlg = prediction.Batch()
42 
43 # Parameters for the SVM kernel function
44 kernel = kernel_function.linear.Batch(method=kernel_function.linear.fastCSR)
45 
46 trainingResult = None
47 predictionResult = None
48 testGroundTruth = None
49 
50 
51 def trainModel():
52  global trainingResult
53 
54  # Initialize FileDataSource to retrieve the input data from a .csv file
55  trainLabelsDataSource = FileDataSource(
56  trainLabelsFileName, DataSourceIface.doAllocateNumericTable,
57  DataSourceIface.doDictionaryFromContext
58  )
59 
60  # Create numeric table for training data
61  trainData = createSparseTable(trainDatasetFileName)
62 
63  # Retrieve the data from the input file
64  trainLabelsDataSource.loadDataBlock()
65 
66  # Create an algorithm object to train the multi-class SVM model
67  algorithm = multi_class_classifier.training.Batch(nClasses)
68 
69  algorithm.parameter.training = trainingAlg
70  algorithm.parameter.prediction = predictionAlg
71 
72  # Pass a training data set and dependent values to the algorithm
73  algorithm.input.set(classifier.training.data, trainData)
74  algorithm.input.set(classifier.training.labels, trainLabelsDataSource.getNumericTable())
75 
76  # Build the multi-class SVM model and retrieve the algorithm results
77  # (Result class from multi_class_classifier.training)
78  trainingResult = algorithm.compute()
79 
80 
81 def testModel():
82  global predictionResult
83 
84  # Create Numeric Tables for testing data
85  testData = createSparseTable(testDatasetFileName)
86 
87  # Create an algorithm object to predict multi-class SVM values
88  algorithm = multi_class_classifier.prediction.Batch(nClasses)
89 
90  algorithm.parameter.training = trainingAlg
91  algorithm.parameter.prediction = predictionAlg
92 
93  # Pass a testing data set and the trained model to the algorithm
94  algorithm.input.setTable(classifier.prediction.data, testData)
95  algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
96 
97  # Predict multi-class SVM values and retrieve the algorithm results
98  # (Result class from classifier.prediction)
99  predictionResult = algorithm.compute()
100 
101 
102 def printResults():
103 
104  # Initialize FileDataSource to retrieve the test data from a .csv file
105  testLabelsDataSource = FileDataSource(
106  testLabelsFileName, DataSourceIface.doAllocateNumericTable,
107  DataSourceIface.doDictionaryFromContext
108  )
109  # Retrieve the data from input file
110  testLabelsDataSource.loadDataBlock()
111  testGroundTruth = testLabelsDataSource.getNumericTable()
112 
113  printNumericTables(
114  testGroundTruth, predictionResult.get(classifier.prediction.prediction),
115  "Ground truth", "Classification results",
116  "Multi-class SVM classification sample program results (first 20 observations):",
117  20, flt64=False
118  )
119 
120 if __name__ == "__main__":
121  trainingAlg.parameter.cacheSize = 100000000
122  trainingAlg.parameter.kernel = kernel
123  predictionAlg.parameter.kernel = kernel
124 
125  trainModel()
126  testModel()
127  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.