Intel® C++ Compiler 19.0 Developer Guide and Reference
Example 4 demonstrates a linearized 2d stencil using embedded offsets and calling methods on the primitive.
#include <sdlt/sdlt.h>
// Typical C++ object to represent a pixel in an image
struct RGBs
{
float red;
float green;
float blue;
RGBs() {}
RGBs(const RGBs &iOther)
: red(iOther.red)
, green(iOther.green)
, blue(iOther.blue)
{
}
RGBs & operator =(const RGBs &iOther)
{
red = iOther.red;
green = iOther.green;
blue = iOther.blue;
return *this;
}
RGBs operator + (const RGBs &iOther) const
{
RGBs sum;
sum.red = red + iOther.red;
sum.green = green + iOther.green;
sum.blue = blue + iOther.blue;
return sum;
}
RGBs operator * (float iScalar) const
{
RGBs scaledColor;
scaledColor.red = red * iScalar;
scaledColor.green = green * iScalar;
scaledColor.blue = blue * iScalar;
return scaledColor;
}
};
SDLT_PRIMITIVE(RGBs, red, green, blue)
const int StencilHaloSize = 1;
const int width = 1920;
const int height = 1080;
template<typename AccessorT> void loadImageStub(AccessorT) {}
template<typename AccessorT> void saveImageStub(AccessorT) {}
// performs average color filtering with neighbors left,right,above,below
void main(void)
{
// We are padding +-1 so we can avoid boundary conditions
const int paddedWidth = width + 2 * StencilHaloSize;
const int paddedHeight = height + 2 * StencilHaloSize;
int elementCount = paddedWidth*paddedHeight;
sdlt::soa1d_container<RGBs> inputImage(elementCount);
sdlt::soa1d_container<RGBs> outputImage(elementCount);
loadImageStub(inputImage.access());
SDLT_INLINE_BLOCK
{
const int endOfY = StencilHaloSize + height;
const int endOfX = StencilHaloSize + width;
for (int y = StencilHaloSize; y < endOfY; ++y)
{
// Embed offsets into Accessors to get the to correct row
auto prevRow = inputImage.const_access((y - 1)*paddedWidth);
auto curRow = inputImage.const_access(y*paddedWidth);
auto nextRow = inputImage.const_access((y + 1)*paddedWidth);
auto outputRow = outputImage.access(y*paddedWidth);
#pragma omp simd
for (int ix = StencilHaloSize; ix < endOfX; ++ix)
{
sdlt::linear_index x(ix);
const RGBs color1 = curRow[x - 1];
const RGBs color2 = curRow[x];
const RGBs color3 = curRow[x + 1];
const RGBs color4 = prevRow[x];
const RGBs color5 = nextRow[x];
// Despite looking like AOS code, compiler is able to create
// privatized instances and call inlinable methods on the objects
// keeping the algorithm at very high level
const RGBs sumOfColors = color1 + color2 + color3 + color4 + color5;
const RGBs averageColor = sumOfColors*(1.0f / 5.0f);
outputRow[x] = averageColor;
}
}
}
saveImageStub(outputImage.access());
}