Tutorial 2. Basic Kernel ProgrammingΒΆ
This is the kernel program for the linear filtering. In this example, you can see how to get thread-ids when using media-walker. Also how to use the CM matrix type, and the select operation.
#include <cm/cm.h>
// Linear filter: average neighbors for each pixel
// For Each pixel
// For Each RGB Channel
// I(x,y)=[I(x-1, y-1) + I(x-1, y) + I(x-1, y+1) +
// I(x, y-1) + I(x, y) + I(x, y+1) +
// I(x+1, y-1) + I(x+1, y) + I(x+1, y+1)]/9
//
// Every thread outputs results for a 6x8 pixel-block
// Since the data is R8G8B8, therefore the output is
// a 6x24 matrix of uchar elements
//
// _GENX_MAIN_ attribute means this function is a kernel entry
// SurfaceIndex ibuf is input surface
// SurfaceIndex obuf is output surface
extern "C" _GENX_MAIN_ void
linear(SurfaceIndex ibuf, SurfaceIndex obuf)
{
// declare 8x32 input matrix of uchar elements
// Note: 8x30 is sufficient for the computation
// however block-read only reads the multiple of dwords
matrix<uchar, 8, 32> in;
// declare 6x24 output matrix of uchar elements
matrix<uchar, 6, 24> out;
// declare intermediate matrix for summation
matrix<float, 6, 24> m;
// when we use media-walker, we can get thread-ids
// using the following intrinsic instead of using
// per-thread arguments
uint h_pos = get_thread_origin_x();
uint v_pos = get_thread_origin_y();
// 2D media-block read from surface to input
read(ibuf, h_pos * 24, v_pos * 6, in);
// copy 6x24 block from in-matrix starting at corner (v1,h3)
// to m-matrix
// <6,1,24,1> means height is 6, vertical stride is 1
// width is 24, horizontal stride is 1
m = in.select<6, 1, 24, 1>(1, 3);
// add 6x24 block from in-matrix starting at corner(v0,h0)
// to m-matrix
m += in.select<6, 1, 24, 1>(0, 0);
// add 6x24 block from in-matrix starting at corner(v0,h3)
// to m-matrix
m += in.select<6, 1, 24, 1>(0, 3);
// add 6x24 block from in-matrix starting at corner(v0,h6)
// to m-matrix
m += in.select<6, 1, 24, 1>(0, 6);
// add 6x24 block from in-matrix starting at corner(v1,h0)
// to m-matrix
m += in.select<6, 1, 24, 1>(1, 0);
// add 6x24 block from in-matrix starting at corner(v1,h6)
// to m-matrix
m += in.select<6, 1, 24, 1>(1, 6);
// add 6x24 block from in-matrix starting at corner(v2,h0)
// to m-matrix
m += in.select<6, 1, 24, 1>(2, 0);
// add 6x24 block from in-matrix starting at corner(v2,h3)
// to m-matrix
m += in.select<6, 1, 24, 1>(2, 3);
// add 6x24 block from in-matrix starting at corner(v2,h6)
// to m-matrix
m += in.select<6, 1, 24, 1>(2, 6);
// divide by 9 approximately, mul is faster
// implicit type conversion from float to uchar
out = m * 0.111f;
// 2D media-block write to surface
write(obuf, h_pos * 24, v_pos * 6, out);
}