/*******************************************************************************
* Copyright 2011-2018 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
! An example of using Intel(R) MKL DFTI configuration parameter
! DFTI_CONJUGATE_EVEN_STORAGE. The parameter defines layout of complex data in
! the backward domain of real-to-complex FFT
! (when DFTI_FORWARD_DOMAIN==DFTI_REAL).
!
! Values:
! DFTI_COMPLEX_REAL (default for 1d and 2d transforms, not recommended)
! represent the complex data by real and imaginary parts packed in a real
! array as defined by DFTI_PACKED_FORMAT configuration parameter.
! This example shows usage of this default setting.
!
! DFTI_COMPLEX_COMPLEX (recommented, default for 3d and higher rank FFTs)
! represent the complex data by complex elements. For the recommended use
! of DFTI_CONJUGATE_EVEN_STORAGE see examples of real DFT.
!
! Note: DFTI_COMPLEX_COMPLEX is recommended value for
! DFTI_CONJUGATE_EVEN_STORAGE configuration.
!
!****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include "mkl_service.h"
#include "mkl_dfti.h"
typedef MKL_Complex16 (*packed_t)(double *x,MKL_LONG N1,MKL_LONG N2,
MKL_LONG n1,MKL_LONG n2);
static MKL_Complex16 ccs(double *x,MKL_LONG N1,MKL_LONG N2,
MKL_LONG n1,MKL_LONG n2);
static MKL_Complex16 pack(double *x,MKL_LONG N1,MKL_LONG N2,
MKL_LONG n1,MKL_LONG n2);
static MKL_Complex16 perm(double *x,MKL_LONG N1,MKL_LONG N2,
MKL_LONG n1,MKL_LONG n2);
static void init(double *x, MKL_LONG N1, MKL_LONG N2, MKL_LONG S1, MKL_LONG S2,
MKL_LONG H1, MKL_LONG H2);
static int verify(packed_t p, double *x, MKL_LONG N1, MKL_LONG N2,
MKL_LONG H1, MKL_LONG H2);
/* Define the format to printf MKL_LONG values */
#if !defined(MKL_ILP64)
#define LI "%li"
#else
#define LI "%lli"
#endif
int main(void)
{
/* Sizes of 2D transform */
MKL_LONG N[2] = { 16, 32 };
/* Arbitrary harmonic to verify FFT */
MKL_LONG H[] = { 1, -1 };
/* Execution status */
MKL_LONG status = 0;
/* Pointer to input/output data */
double *x;
/* Strides define the data layout for forward and backward domains */
MKL_LONG strides[3];
DFTI_DESCRIPTOR_HANDLE hand = 0;
char version[DFTI_VERSION_LENGTH];
DftiGetValue(0, DFTI_VERSION, version); /* get DFTI version */
printf("%s\n", version);
printf("Example config_conjugate_even_storage\n");
printf("Real-to-complex in-place 2D FFT\n");
printf("Configuration parameters:\n");
printf(" DFTI_PRECISION = DFTI_DOUBLE\n");
printf(" DFTI_FORWARD_DOMAIN = DFTI_REAL\n");
printf(" DFTI_DIMENSION = 2\n");
printf(" DFTI_LENGTHS = { "LI", "LI" }\n", N[0], N[1]);
printf(" DFTI_CONJUGATE_EVEN_STORAGE = DFTI_COMPLEX_REAL\n");
printf("Allocate array x of size (N1+2)*(N2+2),"
" sufficient space for all packed formats\n");
x = mkl_malloc(sizeof(double) * (N[0]+2) * (N[1]+2), 64);
if (x == NULL) goto failed;
printf("===== Configure descriptor for CCS format =====\n");
printf("Create DFTI descriptor\n");
status = DftiCreateDescriptor(&hand, DFTI_DOUBLE, DFTI_REAL, 2, N);
if (status != DFTI_NO_ERROR) goto failed;
/* This may be skipped because COMPLEX_REAL is default for 2D real FFT */
printf("Set configuration: real elements in conjugate-even domain\n");
status = DftiSetValue(hand,DFTI_CONJUGATE_EVEN_STORAGE,DFTI_COMPLEX_REAL);
if (status != DFTI_NO_ERROR) goto failed;
/* This may be skipped because CCS is default for 2D real FFT */
printf("Set DFTI_PACKED_FORMAT = DFTI_CCS_FORMAT\n");
status = DftiSetValue(hand, DFTI_PACKED_FORMAT, DFTI_CCS_FORMAT);
if (status != DFTI_NO_ERROR) goto failed;
strides[2] = 1;
strides[1] = N[1]+2;
strides[0] = 0;
printf("Set input/output strides = { "LI", "LI", "LI" }\n",
strides[0], strides[1], strides[2]);
status = DftiSetValue(hand, DFTI_INPUT_STRIDES, strides);
if (status != DFTI_NO_ERROR) goto failed;
/*
* Output strides are ignored for real-to-complex in-place FFT
* with DFTI_COMPLEX_REAL storage.
*
* status = DftiSetValue(hand, DFTI_OUTPUT_STRIDES, strides);
* if (status != DFTI_NO_ERROR) goto failed;
*/
printf("Commit DFTI descriptor\n");
status = DftiCommitDescriptor(hand);
if (status != DFTI_NO_ERROR) goto failed;
printf("Initialize input\n");
init(x, N[0], N[1], strides[1], strides[2], H[0], H[1]);
printf("Compute forward transform\n");
status = DftiComputeForward(hand, x);
if (status != DFTI_NO_ERROR) goto failed;
printf("Verify the result in CCS format\n");
status = verify(ccs, x, N[0], N[1], H[0], H[1]);
if (status != 0) goto failed;
printf("===== Configure descriptor for PACK format =====\n");
printf("Set DFTI_PACKED_FORMAT = DFTI_PACK_FORMAT\n");
status = DftiSetValue(hand, DFTI_PACKED_FORMAT, DFTI_PACK_FORMAT);
if (status != DFTI_NO_ERROR) goto failed;
strides[2] = 1;
strides[1] = N[1];
strides[0] = 0;
printf("Set input/output strides = { "LI", "LI", "LI" }\n",
strides[0], strides[1], strides[2]);
status = DftiSetValue(hand, DFTI_INPUT_STRIDES, strides);
if (status != DFTI_NO_ERROR) goto failed;
/*
* Output strides are ignored for real-to-complex in-place FFT
* with DFTI_COMPLEX_REAL storage.
*
* status = DftiSetValue(hand, DFTI_OUTPUT_STRIDES, strides);
* if (status != DFTI_NO_ERROR) goto failed;
*/
printf("Commit DFTI descriptor\n");
status = DftiCommitDescriptor(hand);
if (status != DFTI_NO_ERROR) goto failed;
printf("Initialize input\n");
init(x, N[0], N[1], strides[1], strides[2], H[0], H[1]);
printf("Compute forward transform\n");
status = DftiComputeForward(hand, x);
if (status != DFTI_NO_ERROR) goto failed;
printf("Verify the result in PACK format\n");
status = verify(pack, x, N[0], N[1], H[0], H[1]);
if (status != 0) goto failed;
printf("===== Configure descriptor for PERM format =====\n");
printf("Set DFTI_PACKED_FORMAT = DFTI_PERM_FORMAT\n");
status = DftiSetValue(hand, DFTI_PACKED_FORMAT, DFTI_PERM_FORMAT);
if (status != DFTI_NO_ERROR) goto failed;
strides[2] = 1;
strides[1] = N[1];
strides[0] = 0;
printf("Set input/output strides = { "LI", "LI", "LI" }\n",
strides[0], strides[1], strides[2]);
status = DftiSetValue(hand, DFTI_INPUT_STRIDES, strides);
if (status != DFTI_NO_ERROR) goto failed;
/*
* Output strides are ignored for real-to-complex in-place FFT
* with DFTI_COMPLEX_REAL storage.
*
* status = DftiSetValue(hand, DFTI_OUTPUT_STRIDES, strides);
* if (status != DFTI_NO_ERROR) goto failed;
*/
printf("Commit DFTI descriptor\n");
status = DftiCommitDescriptor(hand);
if (status != DFTI_NO_ERROR) goto failed;
printf("Initialize input\n");
init(x, N[0], N[1], strides[1], strides[2], H[0], H[1]);
printf("Compute forward transform\n");
status = DftiComputeForward(hand, x);
if (status != DFTI_NO_ERROR) goto failed;
printf("Verify the result in PERM format\n");
status = verify(perm, x, N[0], N[1], H[0], H[1]);
if (status != 0) goto failed;
cleanup:
printf("Free DFTI descriptor\n");
DftiFreeDescriptor(&hand);
printf("Free data array\n");
mkl_free(x);
printf("TEST %s\n", (status == 0) ? "PASSED" : "FAILED");
return status;
failed:
printf(" ERROR, status = "LI"\n", status);
status = 1;
goto cleanup;
}
/* Compute (K*L)%M accurately */
static double moda(MKL_LONG K, MKL_LONG L, MKL_LONG M)
{
return (double)(((long long)K * L) % M);
}
/* Initialize array x(N) to produce unit peaks at x(H) and x(N-H) */
static void init(double *x, MKL_LONG N1, MKL_LONG N2, MKL_LONG S1, MKL_LONG S2,
MKL_LONG H1, MKL_LONG H2)
{
double TWOPI = 6.2831853071795864769, phase, factor;
MKL_LONG n1, n2, index;
factor = (2*(N1-H1)%N1==0 && 2*(N2-H2)%N2==0) ? 1.0 : 2.0;
for (n1 = 0; n1 < N1; n1++)
{
for (n2 = 0; n2 < N2; n2++)
{
phase = moda(n1,H1,N1) / N1;
phase += moda(n2,H2,N2) / N2;
index = n1*S1 + n2*S2;
x[index] = factor * cos( TWOPI * phase ) / (N1*N2);
}
}
}
/* Verify that x has unit peak at H */
static int verify(packed_t packed,double *x,MKL_LONG N1,MKL_LONG N2,
MKL_LONG H1,MKL_LONG H2)
{
double err, errthr, maxerr;
MKL_LONG n1, n2;
/*
* Note, this simple error bound doesn't take into account error of
* input data
*/
errthr = 2.5 * log( (double)N1*N2 ) / log(2.0) * DBL_EPSILON;
printf(" Check if err is below errthr %.3lg\n", errthr);
maxerr = 0;
for (n1 = 0; n1 < N1; n1++)
{
for (n2 = 0; n2 < N2/2+1; n2++)
{
double re_exp = 0, im_exp = 0, re_got, im_got;
MKL_Complex16 x_got;
if ((( n1-H1)%N1==0 && ( n2-H2)%N2==0) ||
((-n1-H1)%N1==0 && (-n2-H2)%N2==0))
{
re_exp = 1.0;
}
x_got = packed(x, N1, N2, n1, n2);
re_got = x_got.real;
im_got = x_got.imag;
err = fabs(re_got - re_exp) + fabs(im_got - im_exp);
if (err > maxerr) maxerr = err;
if (!(err < errthr))
{
printf(" x["LI"]["LI"]: ",n1,n2);
printf(" expected (%.17lg,%.17lg), ",re_exp,im_exp);
printf(" got (%.17lg,%.17lg), ",re_got,im_got);
printf(" err %.3lg\n", err);
printf(" Verification FAILED\n");
return 1;
}
}
}
printf(" Verified, maximum error was %.3lg\n", maxerr);
return 0;
}
/*
* Fetch x(n1,n2) from the result of N1-by-N2 real FFT CCS-packed in x(N1+2,N2+2).
* Note: x should be embedded in a matrix at least x(N1+2,N2+2).
* Assume n1=0:N1-1, n2=0:N2-1.
*/
static MKL_Complex16 ccs(double *y,MKL_LONG N1,MKL_LONG N2,
MKL_LONG n1,MKL_LONG n2)
{
MKL_Complex16 res;
if (n1==0)
{
if (n2 <= N2/2)
{
res.real = y[2*n2+0];
res.imag = y[2*n2+1];
}
else
{
res.real = y[2*(N2-n2)+0];
res.imag = -y[2*(N2-n2)+1];
}
}
else if (n2==0)
{
if (n1 <= N1/2)
{
res.real = y[(2*n1+0)*(N2+2)];
res.imag = y[(2*n1+1)*(N2+2)];
}
else
{
res.real = y[(2*(N1-n1)+0)*(N2+2)];
res.imag = -y[(2*(N1-n1)+1)*(N2+2)];
}
}
else if (n2 == N2-n2)
{
if (n1 <= N1/2)
{
res.real = y[(2*n1+0)*(N2+2) + 2*(N2/2)];
res.imag = y[(2*n1+1)*(N2+2) + 2*(N2/2)];
}
else
{
res.real = y[(2*(N1-n1)+0)*(N2+2) + 2*(N2/2)];
res.imag = -y[(2*(N1-n1)+1)*(N2+2) + 2*(N2/2)];
}
}
else if (n2 <= N2/2)
{
res.real = y[n1*(N2+2)+2*n2+0];
res.imag = y[n1*(N2+2)+2*n2+1];
}
else
{
res.real = y[(N1-n1)*(N2+2)+2*(N2-n2)+0];
res.imag = -y[(N1-n1)*(N2+2)+2*(N2-n2)+1];
}
return res;
}
/*
* Fetch x(n1,n2) from the result of N1-by-N2 real FFT PACK-packed in x(N1,N2).
* Assume n1=0:N1-1, n2=0:N2-1.
*/
static MKL_Complex16 pack(double *y,MKL_LONG N1,MKL_LONG N2,
MKL_LONG n1,MKL_LONG n2)
{
MKL_Complex16 res;
if (n1==0)
{
if (n2 == 0)
{
res.real = y[0];
res.imag = 0;
}
else if (n2 == N2-n2)
{
res.real = y[2*n2-1];
res.imag = 0;
}
else if (n2 <= N2/2)
{
res.real = y[2*n2-1];
res.imag = y[2*n2-0];
}
else
{
res.real = y[2*(N2-n2)-1];
res.imag = -y[2*(N2-n2)-0];
}
}
else if (n2==0)
{
if (n1 == N1-n1)
{
res.real = y[(N1-1)*N2];
res.imag = 0;
}
else if (n1 <= N1/2)
{
res.real = y[(2*n1-1)*N2];
res.imag = y[(2*n1-0)*N2];
}
else
{
res.real = y[(2*(N1-n1)-1)*N2];
res.imag = -y[(2*(N1-n1)-0)*N2];
}
}
else if (n2 == N2-n2)
{
if (n1 == N1-n1)
{
res.real = y[N1*N2 - 1];
res.imag = 0;
}
else if (n1 <= N1/2)
{
res.real = y[(2*n1 - 1)*N2 + N2-1];
res.imag = y[(2*n1 - 0)*N2 + N2-1];
}
else
{
res.real = y[(2*(N1-n1) - 1)*N2 + N2-1];
res.imag = -y[(2*(N1-n1) - 0)*N2 + N2-1];
}
}
else if (n2 <= N2/2)
{
res.real = y[n1*N2+2*n2-1];
res.imag = y[n1*N2+2*n2-0];
}
else
{
res.real = y[(N1-n1)*N2+2*(N2-n2) - 1];
res.imag = -y[(N1-n1)*N2+2*(N2-n2) - 0];
}
return res;
}
/*
* Fetch x(n1,n2) from the result of N1-by-N2 real FFT PERM-packed in x(N1,N2).
* Assume n1=0:N1-1, n2=0:N2-1.
*/
static MKL_Complex16 perm(double *y,MKL_LONG N1,MKL_LONG N2,
MKL_LONG n1,MKL_LONG n2)
{
MKL_Complex16 res;
if (n1==0)
{
if (n2 == 0)
{
res.real = y[0];
res.imag = 0;
}
else if (n2 == N2-n2)
{
res.real = y[1];
res.imag = 0;
}
else if (n2 <= N2/2)
{
res.real = y[2*n2+0 - N2%2];
res.imag = y[2*n2+1 - N2%2];
}
else
{
res.real = y[2*(N2-n2)+0 - N2%2];
res.imag = -y[2*(N2-n2)+1 - N2%2];
}
}
else if (n2==0)
{
if (n1 == N1-n1)
{
res.real = y[N2];
res.imag = 0;
}
else if (n1 <= N1/2)
{
res.real = y[(2*n1+0 - N1%2)*N2];
res.imag = y[(2*n1+1 - N1%2)*N2];
}
else
{
res.real = y[(2*(N1-n1)+0 - N1%2)*N2];
res.imag = -y[(2*(N1-n1)+1 - N1%2)*N2];
}
}
else if (n2 == N2-n2)
{
if (n1 == N1-n1)
{
res.real = y[N2 + 1];
res.imag = 0;
}
else if (n1 <= N1/2)
{
res.real = y[(2*n1+0 - N1%2)*N2 + 1];
res.imag = y[(2*n1+1 - N1%2)*N2 + 1];
}
else
{
res.real = y[(2*(N1-n1)+0 - N1%2)*N2 + 1];
res.imag = -y[(2*(N1-n1)+1 - N1%2)*N2 + 1];
}
}
else if (n2 <= N2/2)
{
res.real = y[n1*N2+2*n2+0 - N2%2];
res.imag = y[n1*N2+2*n2+1 - N2%2];
}
else
{
res.real = y[(N1-n1)*N2+2*(N2-n2)+0 - N2%2];
res.imag = -y[(N1-n1)*N2+2*(N2-n2)+1 - N2%2];
}
return res;
}