Python* API Reference for Intel® Data Analytics Acceleration Library 2019

neural_net_dense_distr.py

1 # file: neural_net_dense_distr.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example of neural network training and scoring in the distributed processing mode
19 # !*****************************************************************************
20 
21 #
22 ## <a name="DAAL-EXAMPLE-PY-NEURAL_NET_DENSE_DISTR"></a>
23 ## \example neural_net_dense_distr.py
24 #
25 
26 import os
27 import sys
28 
29 import numpy as np
30 
31 from daal import step1Local, step2Master
32 from daal.algorithms.neural_networks import initializers
33 from daal.algorithms.neural_networks import layers
34 from daal.algorithms import optimization_solver
35 from daal.algorithms.neural_networks import prediction, training
36 from daal.data_management import NumericTable, HomogenNumericTable, readOnly, SubtensorDescriptor, HomogenTensor
37 
38 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
39 if utils_folder not in sys.path:
40  sys.path.insert(0, utils_folder)
41 from utils import printTensors, readTensorFromCSV
42 
43 # Input data set parameters
44 trainDatasetFileNames = [
45  os.path.join("..", "data", "distributed", "neural_network_train_dense_1.csv"),
46  os.path.join("..", "data", "distributed", "neural_network_train_dense_2.csv"),
47  os.path.join("..", "data", "distributed", "neural_network_train_dense_3.csv"),
48  os.path.join("..", "data", "distributed", "neural_network_train_dense_4.csv")
49 ]
50 trainGroundTruthFileNames = [
51  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_1.csv"),
52  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_2.csv"),
53  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_3.csv"),
54  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_4.csv")
55 ]
56 
57 testDatasetFile = os.path.join("..", "data", "batch", "neural_network_test.csv")
58 testGroundTruthFile = os.path.join("..", "data", "batch", "neural_network_test_ground_truth.csv")
59 
60 nNodes = 4
61 batchSize = 100
62 batchSizeLocal = int(batchSize / nNodes)
63 
64 
65 def configureNet():
66  m2 = 40
67  # Create layers of the neural network
68  # Create fully-connected layer and initialize layer parameters
69  fullyConnectedLayer1 = layers.fullyconnected.Batch(20)
70  fullyConnectedLayer1.parameter.weightsInitializer = initializers.uniform.Batch(-0.001, 0.001)
71  fullyConnectedLayer1.parameter.biasesInitializer = initializers.uniform.Batch(0, 0.5)
72 
73  # Create fully-connected layer and initialize layer parameters
74  fullyConnectedLayer2 = layers.fullyconnected.Batch(m2)
75  fullyConnectedLayer2.parameter.weightsInitializer = initializers.uniform.Batch(0.5, 1)
76  fullyConnectedLayer2.parameter.biasesInitializer = initializers.uniform.Batch(0.5, 1)
77 
78  # Create fully-connected layer and initialize layer parameters
79  fullyConnectedLayer3 = layers.fullyconnected.Batch(2)
80  fullyConnectedLayer3.parameter.weightsInitializer = initializers.uniform.Batch(-0.005, 0.005)
81  fullyConnectedLayer3.parameter.biasesInitializer = initializers.uniform.Batch(0, 1)
82 
83  # Create softmax layer and initialize layer parameters
84  softmaxCrossEntropyLayer = layers.loss.softmax_cross.Batch()
85 
86  # Create topology of the neural network
87  topology = training.Topology()
88 
89  # Add layers to the topology of the neural network
90  fc1 = topology.add(fullyConnectedLayer1)
91  fc2 = topology.add(fullyConnectedLayer2)
92  fc3 = topology.add(fullyConnectedLayer3)
93  sm = topology.add(softmaxCrossEntropyLayer)
94  topology.get(fc1).addNext(fc2)
95  topology.get(fc2).addNext(fc3)
96  topology.get(fc3).addNext(sm)
97 
98  return topology
99 
100 
101 def getNextSubtensor(inputTensor, startPos, nElements):
102  dims = inputTensor.getDimensions()
103  dims[0] = nElements
104 
105  subtensorBlock = SubtensorDescriptor(ntype=np.float32)
106  inputTensor.getSubtensor([], startPos, nElements, readOnly, subtensorBlock)
107  subtensorData = np.array(subtensorBlock.getArray(), dtype=np.float32)
108  inputTensor.releaseSubtensor(subtensorBlock)
109 
110  return HomogenTensor(subtensorData, ntype=np.float32)
111 
112 
113 def initializeNetwork():
114  trainingData = [None] * nNodes
115  trainingGroundTruth = [None] * nNodes
116  # Read training data set from a .csv file and create tensors to store input data
117  for node in range(nNodes):
118  trainingData[node] = readTensorFromCSV(trainDatasetFileNames[node])
119  trainingGroundTruth[node] = readTensorFromCSV(trainGroundTruthFileNames[node], True)
120 
121  sampleSize = trainingData[0].getDimensions()
122  sampleSize[0] = batchSizeLocal
123 
124  # Create stochastic gradient descent (SGD) optimization solver algorithm
125  sgdAlgorithm = optimization_solver.sgd.Batch(fptype=np.float32)
126  sgdAlgorithm.parameter.batchSize = batchSizeLocal
127 
128  # Configure the neural network
129  topologyMaster = configureNet()
130  net = training.Distributed(step2Master, sgdAlgorithm)
131  net.parameter.batchSize = batchSizeLocal
132 
133  # Initialize the neural network on master node
134  net.initialize(sampleSize, topologyMaster)
135 
136  topology = [None] * nNodes
137  netLocal = [None] * nNodes
138  for node in range(nNodes):
139  # Configure the neural network
140  topology[node] = configureNet()
141 
142  # Pass a model from master node to the algorithms on local nodes
143  trainingModel = training.Model()
144  trainingModel.initialize_Float32(sampleSize, topology[node])
145 
146  netLocal[node] = training.Distributed(step1Local)
147  netLocal[node].input.setStep1LocalInput(training.inputModel, trainingModel)
148 
149  # Set the batch size for the neural network training
150  netLocal[node].parameter.batchSize = batchSizeLocal
151 
152  return (net, netLocal, trainingData, trainingGroundTruth)
153 
154 
155 def trainModel(net, netLocal, trainingData, trainingGroundTruth):
156  # Create stochastic gradient descent (SGD) optimization solver algorithm
157  sgdAlgorithm = optimization_solver.sgd.Batch(fptype=np.float32)
158 
159  # Set learning rate for the optimization solver used in the neural network
160  learningRate = 0.001
161  sgdAlgorithm.parameter.learningRateSequence = HomogenNumericTable(1, 1, NumericTable.doAllocate, learningRate)
162 
163  # Set the optimization solver for the neural network training
164  net.parameter.optimizationSolver = sgdAlgorithm
165 
166  # Run the neural network training
167  nSamples = trainingData[0].getDimensions()[0]
168  for i in range(0, nSamples - batchSizeLocal + 1, batchSizeLocal):
169  # Compute weights and biases for the batch of inputs on local nodes
170  for node in range(nNodes):
171  # Pass a training data set and dependent values to the algorithm
172  netLocal[node].input.setInput(training.data, getNextSubtensor(trainingData[node], i, batchSizeLocal))
173  netLocal[node].input.setInput(training.groundTruth, getNextSubtensor(trainingGroundTruth[node], i, batchSizeLocal))
174 
175  # Compute weights and biases on local node
176  pres = netLocal[node].compute()
177 
178  # Pass computed weights and biases to the master algorithm
179  net.input.add(training.partialResults, node, pres)
180 
181  # Update weights and biases on master node
182  net.compute()
183  wb = net.getPartialResult().get(training.resultFromMaster).get(training.model).getWeightsAndBiases()
184 
185  # Update weights and biases on local nodes
186  for node in range(nNodes):
187  netLocal[node].input.getStep1LocalInput(training.inputModel).setWeightsAndBiases(wb)
188 
189  # Finalize neural network training on the master node
190  res = net.finalizeCompute()
191 
192  # Retrieve training and prediction models of the neural network
193  return res.get(training.model).getPredictionModel_Float32()
194 
195 
196 def testModel(predictionModel):
197  # Read testing data set from a .csv file and create a tensor to store input data
198  predictionData = readTensorFromCSV(testDatasetFile)
199 
200  # Create an algorithm to compute the neural network predictions
201  net = prediction.Batch()
202 
203  # Set the batch size for the neural network prediction
204  net.parameter.batchSize = predictionData.getDimensionSize(0)
205 
206  # Set input objects for the prediction neural network
207  net.input.setModelInput(prediction.model, predictionModel)
208  net.input.setTensorInput(prediction.data, predictionData)
209 
210  # Run the neural network prediction and return result
211  return net.compute()
212 
213 
214 def printResults(testGroundTruthFile, predictionResult):
215  # Read testing ground truth from a .csv file and create a tensor to store the data
216  predictionGroundTruth = readTensorFromCSV(testGroundTruthFile)
217 
218  printTensors(predictionGroundTruth, predictionResult.getResult(prediction.prediction),
219  "Ground truth", "Neural network predictions: each class probability",
220  "Neural network classification results (first 20 observations):", 20)
221 
222 
223 def main():
224  init = initializeNetwork()
225  predictionModel = trainModel(*init)
226  predictionResult = testModel(predictionModel)
227  printResults(testGroundTruthFile, predictionResult)
228 
229 
230 if __name__ == "__main__":
231  main()

For more complete information about compiler optimizations, see our Optimization Notice.