/* * This sample implements a separable convolution filter * of a 2D signal with a gaussian kernel. */ #include #include #include #include #define KERNEL_RADIUS 32/8 #define KERNEL_W (2 * KERNEL_RADIUS + 1) //////////////////////////////////////////////////////////////////////////////// // Reference convolution //////////////////////////////////////////////////////////////////////////////// extern "C" void convolutionRow( float *h_Result, float *h_Data, float *h_Kernel, int dataW, int dataH, int kernelR ); extern "C" void convolutionColumn( float *h_Result, float *h_Data, float *h_Kernel, int dataW, int dataH, int kernelR ); //////////////////////////////////////////////////////////////////////////////// // Data configuration //////////////////////////////////////////////////////////////////////////////// const int DATA_W = 4096; const int DATA_H = 4096; const int DATA_SIZE = DATA_W * DATA_H * sizeof(float); const int KERNEL_SIZE = KERNEL_W * sizeof(float); //////////////////////////////////////////////////////////////////////////////// // Main program //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv){ float *h_Kernel, *h_DataA, *h_DataB, *h_ResultGPU; float *d_DataA, *d_DataB, *d_Temp; double sum_delta, sum_ref, L1norm, gpuTime; int i; printf("%i x %i\n", DATA_W, DATA_H); printf("%i\n", KERNEL_W); printf("line size %d bytes\n",DATA_W*sizeof(float)); printf("kernel size %d bytes \n",KERNEL_SIZE); printf("Initializing data...\n"); h_Kernel = (float *)malloc(KERNEL_SIZE); h_DataA = (float *)malloc(DATA_SIZE); h_DataB = (float *)malloc(DATA_SIZE); float kernelSum = 0; for(i = 0; i < KERNEL_W; i++){ float dist = (float)(i - KERNEL_RADIUS) / (float)KERNEL_RADIUS; h_Kernel[i] = expf(- dist * dist / 2); kernelSum += h_Kernel[i]; } for(i = 0; i < KERNEL_W; i++) h_Kernel[i] /= kernelSum; for(i = 0; i < DATA_W * DATA_H; i++) h_DataA[i] = (float)rand() / (float)RAND_MAX; printf("...running convolutionRow()\n"); convolutionRow( h_DataB, h_DataA, h_Kernel, DATA_W, DATA_H, KERNEL_RADIUS ); printf("...running convolutionColumn()\n"); convolutionColumn( h_DataA, h_DataB, h_Kernel, DATA_W, DATA_H, KERNEL_RADIUS ); printf("Shutting down...\n"); free(h_DataB); free(h_DataA); free(h_Kernel); }