Intel® Math Kernel Library 2018 Developer Reference - C
The following sample program shows how to employ internal OpenMP* threading in Intel MKL for FFT computation.
To specify the number of threads inside Intel MKL, use the following settings:
set MKL_NUM_THREADS = 1 for one-threaded mode;
set MKL_NUM_THREADS = 4 for multi-threaded mode.
#include "mkl_dfti.h" int main () { float x[200][100]; DFTI_DESCRIPTOR_HANDLE fft; MKL_LONG len[2] = {200, 100}; // initialize x DftiCreateDescriptor ( &fft, DFTI_SINGLE, DFTI_REAL, 2, len ); DftiCommitDescriptor ( fft ); DftiComputeForward ( fft, x ); DftiFreeDescriptor ( &fft ); return 0; }
The following Example “Using Parallel Mode with Multiple Descriptors Initialized in a Parallel Region” and Example “Using Parallel Mode with Multiple Descriptors Initialized in One Thread” illustrate a parallel customer program with each descriptor instance used only in a single thread.
Specify the number of OpenMP threads for Example “Using Parallel Mode with Multiple Descriptors Initialized in a Parallel Region” like this:
set MKL_NUM_THREADS = 1 for Intel MKL to work in the single-threaded mode (recommended);
set OMP_NUM_THREADS = 4 for the customer program to work in the multi-threaded mode.
Note that in this example, the program can be transformed to become single-threaded at the customer level but using parallel mode within Intel MKL. To achieve this, you need to set the parameter DFTI_NUMBER_OF_TRANSFORMS = 4 and to set the corresponding parameter DFTI_INPUT_DISTANCE = 5000.
#include "mkl_dfti.h" #include <omp.h> #define ARRAY_LEN(a) sizeof(a)/sizeof(a[0]) int main () { // 4 OMP threads, each does 2D FFT 50x100 points MKL_Complex8 x[4][50][100]; int nth = ARRAY_LEN(x); MKL_LONG len[2] = {ARRAY_LEN(x[0]), ARRAY_LEN(x[0][0])}; int th; // assume x is initialized and do 2D FFTs #pragma omp parallel for shared(len, x) for (th = 0; th < nth; th++) { DFTI_DESCRIPTOR_HANDLE myFFT; DftiCreateDescriptor (&myFFT, DFTI_SINGLE, DFTI_COMPLEX, 2, len); DftiCommitDescriptor (myFFT); DftiComputeForward (myFFT, x[th]); DftiFreeDescriptor (&myFFT); } return 0; }
Specify the number of OpenMP threads for Example “Using Parallel Mode with Multiple Descriptors Initialized in One Thread” like this:
set MKL_NUM_THREADS = 1 for Intel MKL to work in the single-threaded mode (obligatory);
set OMP_NUM_THREADS = 4 for the customer program to work in the multi-threaded mode.
#include "mkl_dfti.h" #include <omp.h> #define ARRAY_LEN(a) sizeof(a)/sizeof(a[0]) int main () { // 4 OMP threads, each does 2D FFT 50x100 points MKL_Complex8 x[4][50][100]; int nth = ARRAY_LEN(x); MKL_LONG len[2] = {ARRAY_LEN(x[0]), ARRAY_LEN(x[0][0])}; DFTI_DESCRIPTOR_HANDLE FFT[ARRAY_LEN(x)]; int th; for (th = 0; th < nth; th++) DftiCreateDescriptor (&FFT[th], DFTI_SINGLE, DFTI_COMPLEX, 2, len); for (th = 0; th < nth; th++) DftiCommitDescriptor (FFT[th]); // assume x is initialized and do 2D FFTs #pragma omp parallel for shared(FFT, x) for (th = 0; th < nth; th++) DftiComputeForward (FFT[th], x[th]); for (th = 0; th < nth; th++) DftiFreeDescriptor (&FFT[th]); return 0; }
The following Example “Using Parallel Mode with a Common Descriptor” illustrates a parallel customer program with a common descriptor used in several threads.
#include "mkl_dfti.h" #include <omp.h> #define ARRAY_LEN(a) sizeof(a)/sizeof(a[0]) int main () { // 4 OMP threads, each does 2D FFT 50x100 points MKL_Complex8 x[4][50][100]; int nth = ARRAY_LEN(x); MKL_LONG len[2] = {ARRAY_LEN(x[0]), ARRAY_LEN(x[0][0])}; DFTI_DESCRIPTOR_HANDLE FFT; int th; DftiCreateDescriptor (&FFT, DFTI_SINGLE, DFTI_COMPLEX, 2, len); DftiCommitDescriptor (FFT); // assume x is initialized and do 2D FFTs #pragma omp parallel for shared(FFT, x) for (th = 0; th < nth; th++) DftiComputeForward (FFT, x[th]); DftiFreeDescriptor (&FFT); return 0; }