Subversion

gpucv

[/] [experimental/] [trunk/] [gpucv/] [src/] [plugin/] [cvgcu/] [image_processing/] [gradients_edges_corners/] [sobel.filter.cu] - Rev 567

Compare with Previous - Blame


//CVG_LicenseBegin==============================================================
//
//	Copyright@ Institut TELECOM 2005
//		http://www.institut-telecom.fr/en_accueil.html
//	
//	This software is a GPU accelerated library for computer-vision. It 
//	supports an OPENCV-like extensible interface for easily porting OPENCV 
//	applications.
//	
//	Contacts :
//		patrick.horain@it-sudparis.eu
//		gpucv-developers@picoforge.int-evry.fr
//	
//	Project's Home Page :
//		https://picoforge.int-evry.fr/cgi-bin/twiki/view/Gpucv/Web/WebHome
//	
//	This software is governed by the CeCILL-B license under French law and
//	abiding by the rules of distribution of free software.  You can  use, 
//	modify and/ or redistribute the software under the terms of the CeCILL-B
//	license as circulated by CEA, CNRS and INRIA at the following URL
//	"http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html". 
//	
//================================================================CVG_LicenseEnd



//Other Licenses:
//Some operators are inspired from the CUDA SDK, see corresponding license terms.
//	CUDA SDK:
//	- CudaSobelIpl() based on Sobel.
//==============================================================================
#include <cvgcu/config.h>
#include <GPUCVCuda/gpucv_wrapper_c.h>

#if _GPUCV_COMPILE_CUDA
#include <GPUCVCuda/base_kernels/tpl_convolutions.kernels.h>
#include <assert.h>
#include <cutil_inline.h>
#include <cvgcu/image_processing/filter_color_conv/cvgcu_convol_filter.kernel.h>
////////////////////////////////////////////////////////////////////////////////
// Convolution kernel storage
////////////////////////////////////////////////////////////////////////////////
//__constant__ unsigned char c_Kernel[KERNEL_LENGTH];


template <typename TPLSrc, typename TPLDst>
void convolutionRowsGPU(
    TPLSrc *d_Src,
    TPLDst *d_Dst,
    int imageW,
    int imageH
){
    assert( ROWS_BLOCKDIM_X * ROWS_HALO_STEPS >= KERNEL_RADIUS );
    assert( imageW % (ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X) == 0 );
    assert( imageH % ROWS_BLOCKDIM_Y == 0 );

    dim3 blocks(imageW / (ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X), imageH / ROWS_BLOCKDIM_Y);
    dim3 threads(ROWS_BLOCKDIM_X, ROWS_BLOCKDIM_Y);

    convolutionRowsKernel<<<blocks, threads>>>(
        d_Src,
                d_Dst,
        imageW,
        imageH,
        imageW
    );
    cutilCheckMsg("convolutionRowsKernel() execution failed\n");
} 

template <typename TPLSrc, typename TPLDst>
void convolutionColumnsGPU(
    TPLSrc *d_Src,
    TPLDst *d_Dst,
    int imageW,
    int imageH
){
    assert( COLUMNS_BLOCKDIM_Y * COLUMNS_HALO_STEPS >= KERNEL_RADIUS );
    assert( imageW % COLUMNS_BLOCKDIM_X == 0 );
    assert( imageH % (COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y) == 0 );

    dim3 blocks(imageW / COLUMNS_BLOCKDIM_X, imageH / (COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y));
    dim3 threads(COLUMNS_BLOCKDIM_X, COLUMNS_BLOCKDIM_Y);

    convolutionColumnsKernel<<<blocks, threads>>>(
        d_Src,
                d_Dst,
        imageW,
        imageH,
        imageW
    );
    cutilCheckMsg("convolutionColumnsKernel() execution failed\n");
}



/**
\return false if convolution kernel has not been calculated
\param src => Kernel will be faster if type is 32S/32U/32F
\param dst => type must be 16S/16U/32S/32U/32F.
\note In fact, all type are supported but results and performances are unknown for 8U/8S/...
*/
_GPUCV_CVGCU_EXPORT_CU
bool gcuSobel(void* src, void* dst, int xorder, int yorder, int aperture_size)
{
        unsigned int width		= gcuGetWidth(dst);
        unsigned int height		= gcuGetHeight(dst);
        
        //prepare input/ouput========
        void* d_src	= gcuPreProcess(src, GCU_INPUT, CU_MEMORYTYPE_DEVICE);
        void* d_dst = gcuPreProcess(dst, GCU_OUTPUT, CU_MEMORYTYPE_DEVICE);
        
    float *h_Kernel_Horiz = (float *)malloc(KERNEL_LENGTH * sizeof(float));
        float *h_Kernel_Vert = (float *)malloc(KERNEL_LENGTH * sizeof(float));
        
        //use float temporary image
        void *d_Buffer = NULL;
        cudaMalloc((void **)&d_Buffer , width * height * sizeof(float));
        //=====================

        if(aperture_size==-1)//Scharr filter
        {
                h_Kernel_Vert[0] = 3;
                h_Kernel_Vert[1] = 10;
                h_Kernel_Vert[2] = 3;

                h_Kernel_Horiz[0] = -1;
                h_Kernel_Horiz[1] = 0;
                h_Kernel_Horiz[2] = 1;	
        }
        else if ((xorder==0) && (yorder==1) && (aperture_size==3))
        {
                h_Kernel_Vert[0] = 1;
                h_Kernel_Vert[1] = 2;
                h_Kernel_Vert[2] = 1;

                h_Kernel_Horiz[0] = 1;
                h_Kernel_Horiz[1] = 0;
                h_Kernel_Horiz[2] = -1;
        }
        else if ((xorder==1) && (yorder==0) && (aperture_size==3))
        {
                h_Kernel_Vert[0] = 1;
                h_Kernel_Vert[1] = 0;
                h_Kernel_Vert[2] = -1;

                h_Kernel_Horiz[0] = 1;
                h_Kernel_Horiz[1] = 2;
                h_Kernel_Horiz[2] = 1;
        }
        else
        {
                cutilSafeCall( cudaFree(d_Buffer ) );
                free(h_Kernel_Horiz);
                free(h_Kernel_Vert);

                //clean input/output
                gcuPostProcess(dst);
                gcuPostProcess(src);
                return false;
        }

        cudaMemcpyToSymbol(c_Kernel_H, h_Kernel_Horiz, KERNEL_LENGTH * sizeof(float));
        cudaMemcpyToSymbol(c_Kernel_V, h_Kernel_Vert, KERNEL_LENGTH * sizeof(float));


        #define GCU_SOBEL_SWITCH_FCT(CHANNELS, SRC_TYPE, DST_TYPE)\
                convolutionRowsGPU<SRC_TYPE, float>((SRC_TYPE*)d_src,(float*)d_Buffer,width,height);\
                convolutionColumnsGPU<float, DST_TYPE>((float*)d_Buffer,(DST_TYPE*)d_dst,width,height);

        //run kernels for any kind of format...some format might not be used, but it is easier to mange all of them with 
        //this macro than set them manually...
        GCU_MULTIPLEX_CONVERT_1CHANNELS_ALLSRCFORMAT(GCU_SOBEL_SWITCH_FCT, 1, gcuGetGLDepth(src),gcuGetGLDepth(dst));
        
        
        gcudaThreadSynchronize();
    cutilSafeCall( cudaFree(d_Buffer ) );
    free(h_Kernel_Horiz);
        free(h_Kernel_Vert);

        //clean input/output
        gcuPostProcess(dst);
        gcuPostProcess(src);
        return true;
}
//=========================================================
#endif//_GPUCV_COMPILE_CUDA

Powered by WebSVN v1.61