Python* API Reference for Intel® Data Analytics Acceleration Library 2019

compression_batch.py

1 # file: compression_batch.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation.
4 #
5 # This software and the related documents are Intel copyrighted materials, and
6 # your use of them is governed by the express license under which they were
7 # provided to you (License). Unless the License provides otherwise, you may not
8 # use, modify, copy, publish, distribute, disclose or transmit this software or
9 # the related documents without Intel's prior written permission.
10 #
11 # This software and the related documents are provided as is, with no express
12 # or implied warranties, other than those that are expressly stated in the
13 # License.
14 #===============================================================================
15 
16 #
17 # ! Content:
18 # ! Python example of compression in the batch processing mode
19 # !
20 # !*****************************************************************************
21 
22 #
23 
24 
25 #
26 
27 import os
28 import sys
29 
30 import numpy as np
31 
32 from daal.data_management import Compressor_Zlib, Decompressor_Zlib, level9, DecompressionStream, CompressionStream
33 
34 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
35 if utils_folder not in sys.path:
36  sys.path.insert(0, utils_folder)
37 from utils import getCRC32, readTextFile
38 
39 DATA_PREFIX = os.path.join('..', 'data', 'batch')
40 datasetFileName = os.path.join(DATA_PREFIX, 'logitboost_train.csv')
41 
42 
43 def printCRC32(rawData, deCompressedData):
44 
45  # Compute checksums for raw data and the decompressed data
46  crcRawData = getCRC32(rawData)
47  crcDecompressedData = getCRC32(deCompressedData)
48 
49  print("\nCompression example program results:\n")
50 
51  print("Raw data checksum: 0x{:02X}".format(crcRawData))
52  print("Decompressed data checksum: 0x{:02X}".format(crcDecompressedData))
53 
54  if rawData.size != deCompressedData.size:
55  print("ERROR: Decompressed data size mismatches with the raw data size")
56 
57  elif crcRawData != crcDecompressedData:
58  print("ERROR: Decompressed data CRC mismatches with the raw data CRC")
59 
60  else:
61  print("OK: Decompressed data CRC matches with the raw data CRC")
62 
63 
64 if __name__ == "__main__":
65  # Read data from a file
66  rawData = readTextFile(datasetFileName)
67 
68  # Create a compressor
69  compressor = Compressor_Zlib()
70  compressor.parameter.gzHeader = True
71  compressor.parameter.level = level9
72 
73  # Create a stream for compression
74  comprStream = CompressionStream(compressor)
75 
76  # Write raw data to the compression stream and compress if needed
77  comprStream.push_back(rawData)
78 
79  # Allocate memory to store the compressed data
80  compressedData = np.empty(comprStream.getCompressedDataSize(), dtype=np.uint8)
81 
82  # Store the compressed data
83  comprStream.copyCompressedArray(compressedData)
84 
85  # Create a decompressor
86  decompressor = Decompressor_Zlib()
87  decompressor.parameter.gzHeader = True
88 
89  # Create a stream for decompression
90  deComprStream = DecompressionStream(decompressor)
91 
92  # Write the compressed data to the decompression stream and decompress it
93  deComprStream.push_back(compressedData)
94 
95  # Allocate memory to store the decompressed data
96  deCompressedData = np.empty(deComprStream.getDecompressedDataSize(), dtype=np.uint8)
97 
98  # Store the decompressed data
99  deComprStream.copyDecompressedArray(deCompressedData)
100 
101  # Compute and print checksums for raw data and the decompressed data
102  printCRC32(rawData, deCompressedData)

For more complete information about compiler optimizations, see our Optimization Notice.