Python* API Reference for Intel® Data Analytics Acceleration Library 2018 Update 2

datastructures_csr.py

1 # file: datastructures_csr.py
2 #===============================================================================
3 # Copyright 2014-2018 Intel Corporation
4 # All Rights Reserved.
5 #
6 # If this software was obtained under the Intel Simplified Software License,
7 # the following terms apply:
8 #
9 # The source code, information and material ("Material") contained herein is
10 # owned by Intel Corporation or its suppliers or licensors, and title to such
11 # Material remains with Intel Corporation or its suppliers or licensors. The
12 # Material contains proprietary information of Intel or its suppliers and
13 # licensors. The Material is protected by worldwide copyright laws and treaty
14 # provisions. No part of the Material may be used, copied, reproduced,
15 # modified, published, uploaded, posted, transmitted, distributed or disclosed
16 # in any way without Intel's prior express written permission. No license under
17 # any patent, copyright or other intellectual property rights in the Material
18 # is granted to or conferred upon you, either expressly, by implication,
19 # inducement, estoppel or otherwise. Any license under such intellectual
20 # property rights must be express and approved by Intel in writing.
21 #
22 # Unless otherwise agreed by Intel in writing, you may not remove or alter this
23 # notice or any other notice embedded in Materials by Intel or Intel's
24 # suppliers or licensors in any way.
25 #
26 #
27 # If this software was obtained under the Apache License, Version 2.0 (the
28 # "License"), the following terms apply:
29 #
30 # You may not use this file except in compliance with the License. You may
31 # obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
32 #
33 #
34 # Unless required by applicable law or agreed to in writing, software
35 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
36 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 #
38 # See the License for the specific language governing permissions and
39 # limitations under the License.
40 #===============================================================================
41 
42 
43 
44 
45 import os
46 import sys
47 
48 import numpy as np
49 
50 from daal.data_management import BlockDescriptor, CSRBlockDescriptor, CSRNumericTable, readOnly, readWrite
51 
52 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
53 if utils_folder not in sys.path:
54  sys.path.insert(0, utils_folder)
55 from utils import printArray
56 
57 
58 if __name__ == "__main__":
59 
60  print("Compressed spares rows (CSR) numeric table example\n")
61 
62  nObservations = 5
63  nFeatures = 5
64  firstReadRow = 1
65  nRead = 3
66 
67  # Example of using CSR numeric table
68  values = np.array([1, -1, -3, -2, 5, 4, 6, 4, -4, 2, 7, 8, -5], dtype=np.float64)
69  colIndices = np.array([1, 2, 4, 1, 2, 3, 4, 5, 1, 3, 4, 2, 5], dtype=np.uint64)
70  rowOffsets = np.array([1, 4, 6, 9, 12, 14], dtype=np.uint64)
71 
72  dataTable = CSRNumericTable(values, colIndices, rowOffsets, nFeatures, nObservations)
73 
74  # Read block of rows in dense format
75  block = BlockDescriptor(ntype=np.float64)
76  dataTable.getBlockOfRows(firstReadRow, nRead, readOnly, block)
77  print(str(block.getNumberOfRows()) + " rows are read\n")
78  printArray(
79  block.getArray(), nFeatures, block.getNumberOfRows(), block.getNumberOfColumns(),
80  "Print 3 rows from CSR data array as dense double array:"
81  )
82  dataTable.releaseBlockOfRows(block)
83 
84  # Read block of rows in CSR format and write into it
85  csrBlock = CSRBlockDescriptor(ntpye=np.float32)
86  num_cols = csrBlock.getNumberOfColumns()
87  dataTable.getSparseBlock(firstReadRow, nRead, readWrite, csrBlock)
88  valuesBlock = csrBlock.getBlockValues()
89  nValuesInBlock = csrBlock.getDataSize()
90  printArray(valuesBlock, nValuesInBlock, 1, num_cols, "Values in 3 rows from CSR data array:")
91  printArray(
92  csrBlock.getBlockColumnIndices(), nValuesInBlock, 1, num_cols,
93  "Columns indices in 3 rows from CSR data array:", flt64=False
94  )
95  printArray(
96  csrBlock.getBlockRowIndices(), nRead + 1, 1, num_cols,
97  "Rows offsets in 3 rows from CSR data array:", flt64=False
98  )
99 
100  for i in range(nValuesInBlock):
101  valuesBlock[i] = -(1.0 + i)
102 
103  dataTable.releaseSparseBlock(csrBlock)
104 
105  # Read block of rows in dense format
106  dataTable.getBlockOfRows(firstReadRow, nRead, readOnly, block)
107  print(str(block.getNumberOfRows()) + " rows are read\n")
108  printArray(
109  block.getArray(), nFeatures, block.getNumberOfRows(), block.getNumberOfColumns(),
110  "Print 3 rows from CSR data array as dense double array:"
111  )
112  dataTable.releaseBlockOfRows(block)

For more complete information about compiler optimizations, see our Optimization Notice.