Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 2

compressor.py

1 # file: compressor.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation
4 # All Rights Reserved.
5 #
6 # If this software was obtained under the Intel Simplified Software License,
7 # the following terms apply:
8 #
9 # The source code, information and material ("Material") contained herein is
10 # owned by Intel Corporation or its suppliers or licensors, and title to such
11 # Material remains with Intel Corporation or its suppliers or licensors. The
12 # Material contains proprietary information of Intel or its suppliers and
13 # licensors. The Material is protected by worldwide copyright laws and treaty
14 # provisions. No part of the Material may be used, copied, reproduced,
15 # modified, published, uploaded, posted, transmitted, distributed or disclosed
16 # in any way without Intel's prior express written permission. No license under
17 # any patent, copyright or other intellectual property rights in the Material
18 # is granted to or conferred upon you, either expressly, by implication,
19 # inducement, estoppel or otherwise. Any license under such intellectual
20 # property rights must be express and approved by Intel in writing.
21 #
22 # Unless otherwise agreed by Intel in writing, you may not remove or alter this
23 # notice or any other notice embedded in Materials by Intel or Intel's
24 # suppliers or licensors in any way.
25 #
26 #
27 # If this software was obtained under the Apache License, Version 2.0 (the
28 # "License"), the following terms apply:
29 #
30 # You may not use this file except in compliance with the License. You may
31 # obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
32 #
33 #
34 # Unless required by applicable law or agreed to in writing, software
35 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
36 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 #
38 # See the License for the specific language governing permissions and
39 # limitations under the License.
40 #===============================================================================
41 
42 #
43 # ! Content:
44 # ! Python example of a compressor
45 # !
46 # !*****************************************************************************
47 
48 #
49 
52 
53 import os
54 import sys
55 
56 if sys.version[0] == '2':
57  import Queue as Queue
58 else:
59  import queue as Queue
60 
61 import numpy as np
62 
63 from daal.data_management import Compressor_Zlib, Decompressor_Zlib
64 
65 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
66 if utils_folder not in sys.path:
67  sys.path.insert(0, utils_folder)
68 from utils import getCRC32, readTextFile
69 
70 datasetFileName = os.path.join('..', 'data', 'batch', 'logitboost_train.csv')
71 
72 # Queue for sending and receiving compressed data blocks
73 sendReceiveQueue = Queue.Queue()
74 
75 maxDataBlockSize = 16384 # Maximum size of a data block
76 
77 def getUncompressedDataBlock(sentDataStream, availableDataSize):
78  cur_pos = sentDataStream.size - availableDataSize
79 
80  # return next slice of array and remaining datasize
81  if availableDataSize >= maxDataBlockSize:
82  return (sentDataStream[cur_pos:cur_pos + maxDataBlockSize], availableDataSize - maxDataBlockSize)
83  elif availableDataSize < maxDataBlockSize and availableDataSize > 0:
84  return (sentDataStream[cur_pos:cur_pos + availableDataSize], 0)
85  return (None,None)
86 
87 
88 def sendCompressedDataBlock(block):
89  currentBlock = np.copy(block)
90  # Push the current compressed block to the queue
91  sendReceiveQueue.put(currentBlock)
92 
93 
94 def receiveCompressedDataBlock():
95  # Stop at the end of the queue
96  if sendReceiveQueue.empty():
97  return None
98  # Receive and copy the current compressed block from the queue
99  return np.copy(sendReceiveQueue.get())
100 
101 
102 def printCRC32(sentDataStream, receivedDataStream):
103  # Compute checksums for full input data and full received data
104  crcSentDataStream = getCRC32(sentDataStream)
105  crcReceivedDataStream = getCRC32(receivedDataStream)
106 
107  print("\nCompression example program results:\n")
108 
109  print("Input data checksum: 0x{:02X}".format(crcSentDataStream))
110  print("Received data checksum: 0x{:02X}".format(crcReceivedDataStream))
111 
112  if sentDataStream.size != receivedDataStream.size:
113  print("ERROR: Received data size mismatches with the sent data size")
114 
115  elif crcSentDataStream != crcReceivedDataStream:
116  print("ERROR: Received data CRC mismatches with the sent data CRC")
117  else:
118  print("OK: Received data CRC matches with the sent data CRC")
119 
120 
121 if __name__ == "__main__":
122 
123  # Read data from a file
124  sentDataStream = readTextFile(datasetFileName)
125 
126  # Allocate buffers for compressed and received data
127  compressedDataBlock = np.empty(maxDataBlockSize, dtype=np.uint8)
128  receivedDataStream = np.empty(sentDataStream.size, dtype=np.uint8)
129 
130  # Create a compressor
131  compressor = Compressor_Zlib()
132 
133  # Receive the next data block for compression
134  (uncompressedDataBlock, availableDataSize) = getUncompressedDataBlock(sentDataStream, sentDataStream.size)
135  while uncompressedDataBlock is not None:
136  # Associate data to compress with the compressor
137  compressor.setInputDataBlock(uncompressedDataBlock, 0)
138 
139  # Memory for a compressed block might not be enough to compress the input block at once
140  while True:
141  # Compress uncompressedDataBlock to compressedDataBlock
142  compressor.run(compressedDataBlock, 0)
143 
144  # Get the actual size of a compressed block
145  compressedDataView = compressedDataBlock[0:compressor.getUsedOutputDataBlockSize()]
146 
147  # Send the current compressed block
148  sendCompressedDataBlock(compressedDataView)
149 
150  # Check if an additional data block is needed to complete compression
151  if not compressor.isOutputDataBlockFull():
152  break
153 
154  # Receive the next data block for compression
155  (uncompressedDataBlock, availableDataSize) = getUncompressedDataBlock(sentDataStream, availableDataSize)
156 
157  # Create a decompressor
158  decompressor = Decompressor_Zlib()
159 
160  # Receive compressed data by blocks
161  compressedDataBlock = receiveCompressedDataBlock()
162  offset = 0
163 
164  while compressedDataBlock is not None:
165  # Associate compressed data with the decompressor
166  decompressor.setInputDataBlock(compressedDataBlock, 0)
167 
168  # Decompress an incoming block to the end of receivedDataStream
169  decompressor.run(receivedDataStream, offset)
170 
171  # Update the size of actual data in receivedDataStream
172  offset += decompressor.getUsedOutputDataBlockSize()
173 
174  # Receive next block
175  compressedDataBlock = receiveCompressedDataBlock()
176 
177  # Compute and print checksums for sentDataStream and receivedDataStream
178  printCRC32(sentDataStream, receivedDataStream)

For more complete information about compiler optimizations, see our Optimization Notice.