Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 1

neural_net_dense_distr.py

1 # file: neural_net_dense_distr.py
2 #===============================================================================
3 # Copyright 2014-2017 Intel Corporation
4 # All Rights Reserved.
5 #
6 # If this software was obtained under the Intel Simplified Software License,
7 # the following terms apply:
8 #
9 # The source code, information and material ("Material") contained herein is
10 # owned by Intel Corporation or its suppliers or licensors, and title to such
11 # Material remains with Intel Corporation or its suppliers or licensors. The
12 # Material contains proprietary information of Intel or its suppliers and
13 # licensors. The Material is protected by worldwide copyright laws and treaty
14 # provisions. No part of the Material may be used, copied, reproduced,
15 # modified, published, uploaded, posted, transmitted, distributed or disclosed
16 # in any way without Intel's prior express written permission. No license under
17 # any patent, copyright or other intellectual property rights in the Material
18 # is granted to or conferred upon you, either expressly, by implication,
19 # inducement, estoppel or otherwise. Any license under such intellectual
20 # property rights must be express and approved by Intel in writing.
21 #
22 # Unless otherwise agreed by Intel in writing, you may not remove or alter this
23 # notice or any other notice embedded in Materials by Intel or Intel's
24 # suppliers or licensors in any way.
25 #
26 #
27 # If this software was obtained under the Apache License, Version 2.0 (the
28 # "License"), the following terms apply:
29 #
30 # You may not use this file except in compliance with the License. You may
31 # obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
32 #
33 #
34 # Unless required by applicable law or agreed to in writing, software
35 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
36 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 #
38 # See the License for the specific language governing permissions and
39 # limitations under the License.
40 #===============================================================================
41 
42 #
43 # ! Content:
44 # ! Python example of neural network training and scoring in the distributed processing mode
45 # !*****************************************************************************
46 
47 #
48 
49 
50 #
51 
52 import os
53 import sys
54 
55 import numpy as np
56 
57 from daal import step1Local, step2Master
58 from daal.algorithms.neural_networks import initializers
59 from daal.algorithms.neural_networks import layers
60 from daal.algorithms import optimization_solver
61 from daal.algorithms.neural_networks import prediction, training
62 from daal.data_management import NumericTable, HomogenNumericTable, readOnly, SubtensorDescriptor, HomogenTensor
63 
64 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
65 if utils_folder not in sys.path:
66  sys.path.insert(0, utils_folder)
67 from utils import printTensors, readTensorFromCSV
68 
69 # Input data set parameters
70 trainDatasetFileNames = [
71  os.path.join("..", "data", "distributed", "neural_network_train_dense_1.csv"),
72  os.path.join("..", "data", "distributed", "neural_network_train_dense_2.csv"),
73  os.path.join("..", "data", "distributed", "neural_network_train_dense_3.csv"),
74  os.path.join("..", "data", "distributed", "neural_network_train_dense_4.csv")
75 ]
76 trainGroundTruthFileNames = [
77  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_1.csv"),
78  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_2.csv"),
79  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_3.csv"),
80  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_4.csv")
81 ]
82 
83 testDatasetFile = os.path.join("..", "data", "batch", "neural_network_test.csv")
84 testGroundTruthFile = os.path.join("..", "data", "batch", "neural_network_test_ground_truth.csv")
85 
86 nNodes = 4
87 batchSize = 100
88 batchSizeLocal = int(batchSize / nNodes)
89 
90 
91 def configureNet():
92  m2 = 40
93  # Create layers of the neural network
94  # Create fully-connected layer and initialize layer parameters
95  fullyConnectedLayer1 = layers.fullyconnected.Batch(20)
96  fullyConnectedLayer1.parameter.weightsInitializer = initializers.uniform.Batch(-0.001, 0.001)
97  fullyConnectedLayer1.parameter.biasesInitializer = initializers.uniform.Batch(0, 0.5)
98 
99  # Create fully-connected layer and initialize layer parameters
100  fullyConnectedLayer2 = layers.fullyconnected.Batch(m2)
101  fullyConnectedLayer2.parameter.weightsInitializer = initializers.uniform.Batch(0.5, 1)
102  fullyConnectedLayer2.parameter.biasesInitializer = initializers.uniform.Batch(0.5, 1)
103 
104  # Create fully-connected layer and initialize layer parameters
105  fullyConnectedLayer3 = layers.fullyconnected.Batch(2)
106  fullyConnectedLayer3.parameter.weightsInitializer = initializers.uniform.Batch(-0.005, 0.005)
107  fullyConnectedLayer3.parameter.biasesInitializer = initializers.uniform.Batch(0, 1)
108 
109  # Create softmax layer and initialize layer parameters
110  softmaxCrossEntropyLayer = layers.loss.softmax_cross.Batch()
111 
112  # Create topology of the neural network
113  topology = training.Topology()
114 
115  # Add layers to the topology of the neural network
116  fc1 = topology.add(fullyConnectedLayer1)
117  fc2 = topology.add(fullyConnectedLayer2)
118  fc3 = topology.add(fullyConnectedLayer3)
119  sm = topology.add(softmaxCrossEntropyLayer)
120  topology.get(fc1).addNext(fc2)
121  topology.get(fc2).addNext(fc3)
122  topology.get(fc3).addNext(sm)
123 
124  return topology
125 
126 
127 def getNextSubtensor(inputTensor, startPos, nElements):
128  dims = inputTensor.getDimensions()
129  dims[0] = nElements
130 
131  subtensorBlock = SubtensorDescriptor(ntype=np.float32)
132  inputTensor.getSubtensor([], startPos, nElements, readOnly, subtensorBlock)
133  subtensorData = np.array(subtensorBlock.getArray(), dtype=np.float32)
134  inputTensor.releaseSubtensor(subtensorBlock)
135 
136  return HomogenTensor(subtensorData, ntype=np.float32)
137 
138 
139 def initializeNetwork():
140  trainingData = [None] * nNodes
141  trainingGroundTruth = [None] * nNodes
142  # Read training data set from a .csv file and create tensors to store input data
143  for node in range(nNodes):
144  trainingData[node] = readTensorFromCSV(trainDatasetFileNames[node])
145  trainingGroundTruth[node] = readTensorFromCSV(trainGroundTruthFileNames[node], True)
146 
147  sampleSize = trainingData[0].getDimensions()
148  sampleSize[0] = batchSizeLocal
149 
150  # Create stochastic gradient descent (SGD) optimization solver algorithm
151  sgdAlgorithm = optimization_solver.sgd.Batch(fptype=np.float32)
152  sgdAlgorithm.parameter.batchSize = batchSizeLocal
153 
154  # Configure the neural network
155  topologyMaster = configureNet()
156  net = training.Distributed(step2Master, sgdAlgorithm)
157  net.parameter.batchSize = batchSizeLocal
158 
159  # Initialize the neural network on master node
160  net.initialize(sampleSize, topologyMaster)
161 
162  topology = [None] * nNodes
163  netLocal = [None] * nNodes
164  for node in range(nNodes):
165  # Configure the neural network
166  topology[node] = configureNet()
167 
168  # Pass a model from master node to the algorithms on local nodes
169  trainingModel = training.Model()
170  trainingModel.initialize_Float32(sampleSize, topology[node])
171 
172  netLocal[node] = training.Distributed(step1Local)
173  netLocal[node].input.setStep1LocalInput(training.inputModel, trainingModel)
174 
175  # Set the batch size for the neural network training
176  netLocal[node].parameter.batchSize = batchSizeLocal
177 
178  return (net, netLocal, trainingData, trainingGroundTruth)
179 
180 
181 def trainModel(net, netLocal, trainingData, trainingGroundTruth):
182  # Create stochastic gradient descent (SGD) optimization solver algorithm
183  sgdAlgorithm = optimization_solver.sgd.Batch(fptype=np.float32)
184 
185  # Set learning rate for the optimization solver used in the neural network
186  learningRate = 0.001
187  sgdAlgorithm.parameter.learningRateSequence = HomogenNumericTable(1, 1, NumericTable.doAllocate, learningRate)
188 
189  # Set the optimization solver for the neural network training
190  net.parameter.optimizationSolver = sgdAlgorithm
191 
192  # Run the neural network training
193  nSamples = trainingData[0].getDimensions()[0]
194  for i in range(0, nSamples - batchSizeLocal + 1, batchSizeLocal):
195  # Compute weights and biases for the batch of inputs on local nodes
196  for node in range(nNodes):
197  # Pass a training data set and dependent values to the algorithm
198  netLocal[node].input.setInput(training.data, getNextSubtensor(trainingData[node], i, batchSizeLocal))
199  netLocal[node].input.setInput(training.groundTruth, getNextSubtensor(trainingGroundTruth[node], i, batchSizeLocal))
200 
201  # Compute weights and biases on local node
202  pres = netLocal[node].compute()
203 
204  # Pass computed weights and biases to the master algorithm
205  net.input.add(training.partialResults, node, pres)
206 
207  # Update weights and biases on master node
208  net.compute()
209  wb = net.getPartialResult().get(training.resultFromMaster).get(training.model).getWeightsAndBiases()
210 
211  # Update weights and biases on local nodes
212  for node in range(nNodes):
213  netLocal[node].input.getStep1LocalInput(training.inputModel).setWeightsAndBiases(wb)
214 
215  # Finalize neural network training on the master node
216  res = net.finalizeCompute()
217 
218  # Retrieve training and prediction models of the neural network
219  return res.get(training.model).getPredictionModel_Float32()
220 
221 
222 def testModel(predictionModel):
223  # Read testing data set from a .csv file and create a tensor to store input data
224  predictionData = readTensorFromCSV(testDatasetFile)
225 
226  # Create an algorithm to compute the neural network predictions
227  net = prediction.Batch()
228 
229  # Set the batch size for the neural network prediction
230  net.parameter.batchSize = predictionData.getDimensionSize(0)
231 
232  # Set input objects for the prediction neural network
233  net.input.setModelInput(prediction.model, predictionModel)
234  net.input.setTensorInput(prediction.data, predictionData)
235 
236  # Run the neural network prediction and return result
237  return net.compute()
238 
239 
240 def printResults(testGroundTruthFile, predictionResult):
241  # Read testing ground truth from a .csv file and create a tensor to store the data
242  predictionGroundTruth = readTensorFromCSV(testGroundTruthFile)
243 
244  printTensors(predictionGroundTruth, predictionResult.getResult(prediction.prediction),
245  "Ground truth", "Neural network predictions: each class probability",
246  "Neural network classification results (first 20 observations):", 20)
247 
248 
249 def main():
250  init = initializeNetwork()
251  predictionModel = trainModel(*init)
252  predictionResult = testModel(predictionModel)
253  printResults(testGroundTruthFile, predictionResult)
254 
255 
256 if __name__ == "__main__":
257  main()

For more complete information about compiler optimizations, see our Optimization Notice.