Intel® Math Kernel Library 2018 Developer Reference - C

Examples of Using OpenMP* Threading for FFT Computation

The following sample program shows how to employ internal OpenMP* threading in Intel MKL for FFT computation.

To specify the number of threads inside Intel MKL, use the following settings:

set MKL_NUM_THREADS = 1 for one-threaded mode;

set MKL_NUM_THREADS = 4 for multi-threaded mode.

Using Intel MKL Internal Threading Mode (C Example)

 
#include "mkl_dfti.h"
 
int main ()
 
{
    float x[200][100];
    DFTI_DESCRIPTOR_HANDLE fft;
    MKL_LONG len[2] = {200, 100};
    // initialize x
    DftiCreateDescriptor ( &fft, DFTI_SINGLE, DFTI_REAL, 2, len );
    DftiCommitDescriptor ( fft );
    DftiComputeForward ( fft, x );
    DftiFreeDescriptor ( &fft );
    return 0;
}
 

The following Example “Using Parallel Mode with Multiple Descriptors Initialized in a Parallel Region” and Example “Using Parallel Mode with Multiple Descriptors Initialized in One Thread” illustrate a parallel customer program with each descriptor instance used only in a single thread.

Specify the number of OpenMP threads for Example “Using Parallel Mode with Multiple Descriptors Initialized in a Parallel Region” like this:

set MKL_NUM_THREADS = 1 for Intel MKL to work in the single-threaded mode (recommended);

set OMP_NUM_THREADS = 4 for the customer program to work in the multi-threaded mode.

Using Parallel Mode with Multiple Descriptors Initialized in a Parallel Region

Note that in this example, the program can be transformed to become single-threaded at the customer level but using parallel mode within Intel MKL. To achieve this, you need to set the parameter DFTI_NUMBER_OF_TRANSFORMS = 4 and to set the corresponding parameter DFTI_INPUT_DISTANCE = 5000.

#include "mkl_dfti.h"
#include <omp.h>
#define ARRAY_LEN(a) sizeof(a)/sizeof(a[0])
int main ()
{
    // 4 OMP threads, each does 2D FFT 50x100 points
    MKL_Complex8 x[4][50][100];
    int nth = ARRAY_LEN(x);
    MKL_LONG len[2] = {ARRAY_LEN(x[0]), ARRAY_LEN(x[0][0])};
    int th;
    // assume x is initialized and do 2D FFTs
#pragma omp parallel for shared(len, x)
    for (th = 0; th < nth; th++)
    {
        DFTI_DESCRIPTOR_HANDLE myFFT;

        DftiCreateDescriptor (&myFFT, DFTI_SINGLE, DFTI_COMPLEX, 2, len);
        DftiCommitDescriptor (myFFT);
        DftiComputeForward (myFFT, x[th]);
        DftiFreeDescriptor (&myFFT);
    }
    return 0;
}

Specify the number of OpenMP threads for Example “Using Parallel Mode with Multiple Descriptors Initialized in One Thread” like this:

set MKL_NUM_THREADS = 1 for Intel MKL to work in the single-threaded mode (obligatory);

set OMP_NUM_THREADS = 4 for the customer program to work in the multi-threaded mode.

Using Parallel Mode with Multiple Descriptors Initialized in One Thread

#include "mkl_dfti.h"
#include <omp.h>
#define ARRAY_LEN(a) sizeof(a)/sizeof(a[0])
int main ()
{
    // 4 OMP threads, each does 2D FFT 50x100 points
    MKL_Complex8 x[4][50][100];
    int nth = ARRAY_LEN(x);
    MKL_LONG len[2] = {ARRAY_LEN(x[0]), ARRAY_LEN(x[0][0])};
    DFTI_DESCRIPTOR_HANDLE FFT[ARRAY_LEN(x)];
    int th;

    for (th = 0; th < nth; th++)
        DftiCreateDescriptor (&FFT[th], DFTI_SINGLE, DFTI_COMPLEX, 2, len);
    for (th = 0; th < nth; th++)
        DftiCommitDescriptor (FFT[th]);
    // assume x is initialized and do 2D FFTs
#pragma omp parallel for shared(FFT, x)
    for (th = 0; th < nth; th++)
        DftiComputeForward (FFT[th], x[th]);
    for (th = 0; th < nth; th++)
        DftiFreeDescriptor (&FFT[th]);
    return 0;
}

Using Parallel Mode with a Common Descriptor

The following Example “Using Parallel Mode with a Common Descriptor” illustrates a parallel customer program with a common descriptor used in several threads.

#include "mkl_dfti.h"
#include <omp.h>
#define ARRAY_LEN(a) sizeof(a)/sizeof(a[0])
int main ()
{
    // 4 OMP threads, each does 2D FFT 50x100 points
    MKL_Complex8 x[4][50][100];
    int nth = ARRAY_LEN(x);
    MKL_LONG len[2] = {ARRAY_LEN(x[0]), ARRAY_LEN(x[0][0])};
    DFTI_DESCRIPTOR_HANDLE FFT;
    int th;

    DftiCreateDescriptor (&FFT, DFTI_SINGLE, DFTI_COMPLEX, 2, len);
    DftiCommitDescriptor (FFT);
    // assume x is initialized and do 2D FFTs
#pragma omp parallel for shared(FFT, x)
    for (th = 0; th < nth; th++)
        DftiComputeForward (FFT, x[th]);
    DftiFreeDescriptor (&FFT);
    return 0;
}