要把OpenCV的源码改写成CUDA,那么在改写成并行计算之前,我们需要保证CUDA C中(特别是CUDA中的核函数)能够支持OpenCV定义的类型,否则我们只有重写。
所以在将OpenCV源码改写成CUDA并行计算之前,我首先将OpenCV源码改写成了普通的C语言版本------这里的改写指的是:一些结构体的重写、Mat数组和普通一维二维数组的转换等等,具体情况下面的代码将见分晓。
这次改写的是ComputeGradient函数
注意:这里保留了Mat img,因为这里到时候移植到CUDA C中要改写成PtrstepSZ变量,这里就不费功夫把他改成数组了。
#include <cv.h> #include <highgui.h> #include <cxcore.h> #include <ml.h> #include <math.h> #include <iostream> #include <fstream> #include"stdio.h" using namespace std; using namespace cv; static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI); static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI); static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI); static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI); enum { BLOCK_SIZE = 1024 }; int nbins = 9; #define IMG_W 64 #define IMG_H 128 //********************************************************************************************************** static void FastAtan2_32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees=true ) { int i = 0; float scale = angleInDegrees ? 1 : (float)(CV_PI/180); for( ; i < len; i++ ) { float x = X[i], y = Y[i]; //cout << "x: " << x << " "<< " y: " << y << endl; float ax = std::abs(x), ay = std::abs(y); float a, c, c2; if( ax >= ay ) { c = ay/(ax + (float)DBL_EPSILON); c2 = c*c; a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; } else { c = ax/(ay + (float)DBL_EPSILON); c2 = c*c; a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; } if( x < 0 ) a = 180.f - a; if( y < 0 ) a = 360.f - a; angle[i] = (float)(a*scale); //cout << " angle: " << angle[i] << endl; } } static void Magnitude_32f(const float* x, const float* y, float* mag, int len) { int i = 0; for( ; i < len; i++ ) { float x0 = x[i], y0 = y[i]; mag[i] = std::sqrt(x0*x0 + y0*y0); //cout << "mag: " << mag[i] << endl; } } void mycartToPolar( InputArray src1, InputArray src2, OutputArray dst1, OutputArray dst2, bool angleInDegrees ) { Mat X = src1.getMat(), Y = src2.getMat(); int type = X.type(), depth = X.depth(), cn = X.channels(); CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F)); dst1.create( X.dims, X.size, type ); dst2.create( X.dims, X.size, type ); Mat Mag = dst1.getMat(), Angle = dst2.getMat(); const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0}; uchar* ptrs[4]; NAryMatIterator it(arrays, ptrs); cv::AutoBuffer<float> _buf; float* buf[2] = {0, 0}; int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn); //cout << "it size: " << it.size << endl; //cout << "blocksize: " << blockSize << endl; //cout << "BLOCK_SIZE: " << BLOCK_SIZE << endl; //cout << "total : " << total << endl; size_t esz1 = X.elemSize1(); //cout << "esz: " << esz1 << endl; //cout << "nplanes: " << it.nplanes << endl; if( depth == CV_64F ) { _buf.allocate(blockSize*2); buf[0] = _buf; buf[1] = buf[0] + blockSize; } //cout << total << endl; //cout << blockSize << endl; for( size_t i = 0; i < it.nplanes; i++, ++it ) { for( j = 0; j < total; j += blockSize ) { int len = std::min(total - j, blockSize); if( depth == CV_32F ) { const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3]; Magnitude_32f( x, y, mag, len ); FastAtan2_32f( y, x, angle, len, angleInDegrees ); } ptrs[0] += len*esz1; ptrs[1] += len*esz1; ptrs[2] += len*esz1; ptrs[3] += len*esz1; } } } void newcartToPolar( float* src1, float* src2, float* dst1, float* dst2, int length, bool angleInDegrees ) { //dst1 = (float*)malloc(sizeof(float) * length); //dst2 = (float*)malloc(sizeof(float) * length); int j, k, total = length, blockSize = MIN(total, (BLOCK_SIZE)); size_t esz1 = 4; size_t nplanes = 1; float* Dx = src1; float* Dy = src2; float* Mag = dst1; float* Angle = dst2; //for(int k = 0; k < IMG_W; k ++) //{ //cout << Dx[k] << endl; //} //cout << total << endl; //cout << blockSize << endl; for( size_t i = 0; i < nplanes; i++ ) { for( j = 0; j < total; j += blockSize ) { int len = std::min(total - j, blockSize); Magnitude_32f( Dx, Dy, Mag, len ); FastAtan2_32f( Dy, Dx, Angle, len, angleInDegrees ); Dx += len; Dy += len; Mag += len; Angle += len; } } } //**************************************************************************************************** void newcomputeGradient(const Mat& img, Mat& grad, Mat& qangle, Size paddingTL, Size paddingBR) { CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 ); Size gradsize(img.cols + paddingTL.width + paddingBR.width, img.rows + paddingTL.height + paddingBR.height); //grad.create(gradsize, CV_32FC2); // <magnitude*(1-alpha), magnitude*alpha> //qangle.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation Size wholeSize; Point roiofs; img.locateROI(wholeSize, roiofs); int i, x, y; int cn = img.channels(); Mat_<float> _lut(1, 256); const float* lut = &_lut(0,0); //if( gammaCorrection ) for( i = 0; i < 256; i++ ) _lut(0,i) = std::sqrt((float)i); //else //for( i = 0; i < 256; i++ ) // _lut(0,i) = (float)i; AutoBuffer<int> mapbuf(gradsize.width + gradsize.height + 4); int* xmap = (int*)mapbuf + 1; int* ymap = xmap + gradsize.width + 2; const int borderType = (int)BORDER_REFLECT_101; for( x = -1; x < gradsize.width + 1; x++ ) { xmap[x] = borderInterpolate(x - paddingTL.width + roiofs.x, wholeSize.width, borderType) - roiofs.x; } for( y = -1; y < gradsize.height + 1; y++ ) { ymap[y] = borderInterpolate(y - paddingTL.height + roiofs.y, wholeSize.height, borderType) - roiofs.y; //cout << ymap[y] << endl; } // x- & y- derivatives for the whole row int width = gradsize.width; AutoBuffer<float> _dbuf(width*4); float* dbuf = _dbuf; Mat Dx(1, width, CV_32F, dbuf); Mat Dy(1, width, CV_32F, dbuf + width); Mat Mag(1, width, CV_32F, dbuf + width*2); Mat Angle(1, width, CV_32F, dbuf + width*3); //cout << ymap[0] << " " << xmap[0] << endl; int _nbins = nbins; float angleScale = (float)(_nbins/CV_PI); //cout << ymap[128] << endl; for( y = 0; y < gradsize.height; y++ ) { const uchar* imgPtr = img.data + img.step*ymap[y]; const uchar* prevPtr = img.data + img.step*ymap[y-1]; const uchar* nextPtr = img.data + img.step*ymap[y+1]; float* gradPtr = (float*)grad.ptr(y); uchar* qanglePtr = (uchar*)qangle.ptr(y); //cout << ymap[y] << endl; //cout << y << "!" <<endl; //cout << "_______________"<< endl; for( x = 0; x < width; x++ ) { int x1 = xmap[x]; dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]); dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]); //if(y == 127) //{ // cout << dbuf[x] << " " << dbuf[width + x] << endl; //} } //cout << y << "@" <<endl; mycartToPolar( Dx, Dy, Mag, Angle, false ); //cartToPolar( Dx, Dy, Mag, Angle, false ); //for(int k = 0; k < Mag.cols; k++) //{ //cout << Mag.at<float>(0, k) << endl; //} for( x = 0; x < width; x++ ) { float mag = dbuf[x+width*2], angle = dbuf[x+width*3]*angleScale - 0.5f; //cout << mag << "," << angle << endl; int hidx = cvFloor(angle); angle -= hidx; gradPtr[x*2] = mag*(1.f - angle); gradPtr[x*2+1] = mag*angle; //cout << gradPtr[x*2] << "," << gradPtr[x*2+1] << endl; //cout << gradPtr[x*2] << " " << gradPtr[x*2+1] << endl; if( hidx < 0 ) hidx += _nbins; else if( hidx >= _nbins ) hidx -= _nbins; assert( (unsigned)hidx < (unsigned)_nbins ); qanglePtr[x*2] = (uchar)hidx; hidx++; hidx &= hidx < _nbins ? -1 : 0; qanglePtr[x*2+1] = (uchar)hidx; } //cout << y << "#" <<endl; } //cout << y; } //**************************************************************************************************** int myborderInterpolate( int p, int len) { if( (unsigned)p < (unsigned)len ) //cout << "fuck" << endl ; else { int delta = 1; if( len == 1 ) return 0; do { if( p < 0 ) p = -p - 1 + delta; else p = len - 1 - (p - len) - delta; } while( (unsigned)p >= (unsigned)len ); } return p; } //**************************************************************************************************** //**************************************************************************************************** void mycomputeGradient(const Mat& img, float** grad1, float** grad2, uchar** qangle1, uchar** qangle2) { int grad_w = img.cols; int grad_h = img.rows; int i, x, y; //int cn = img.channels(); //Mat_<float> _lut(1, 256); float lut[256]; for( i = 0; i < 256; i++ ) lut[i] = std::sqrt((float)i); //int xmap[IMG_W + 1]; //int ymap[IMG_H + 1]; int* xmap = (int*)malloc(sizeof(int)*(grad_w+1)); int* ymap = (int*)malloc(sizeof(int)*(grad_h+1)); int xmapT = 1, ymapT = 1; for( x = -1; x < grad_w + 1; x++ ) { if(x == -1) { xmapT = myborderInterpolate(x, grad_w); continue; } xmap[x] = myborderInterpolate(x, grad_w); //cout << xmap[-1]; } for( y = -1; y < grad_h + 1; y++ ) { if(y == -1) { ymapT = myborderInterpolate(y, grad_h); continue; } ymap[y] = myborderInterpolate(y, grad_h); //cout << ymap[y] << endl; } //cout << ymap[-1] << endl; // x- & y- derivatives for the whole row //float Dx[IMG_W]; //float Dy[IMG_W]; //float Mag[IMG_W]; //float Angle[IMG_W]; float* Dx = (float*)malloc(sizeof(float*)*grad_w); float* Dy = (float*)malloc(sizeof(float*)*grad_w); float* Mag = (float*)malloc(sizeof(float*)*grad_w); float* Angle = (float*)malloc(sizeof(float*)*grad_w); float angleScale = (float)(nbins/CV_PI); cout << xmapT << " " << ymapT << endl; cout << ymap[128] << " " << xmap[64] << endl; for( y = 0; y < grad_h; y++ ) { for( x = 0; x < grad_w; x++ ) { int x1 = xmap[x]; int a1 = (int)img.at<uchar>(ymap[y], xmap[x+1]); //int a2 = (int)img.at<uchar>(ymap[y], xmap[x-1]); int a3 = (int)img.at<uchar>(ymap[y+1], x1); //int a4 = (int)img.at<uchar>(ymap[y-1], x1); int a2, a4; if(x == 0) { a2 = (int)img.at<uchar>(ymap[y], xmapT); } else { a2 = (int)img.at<uchar>(ymap[y], xmap[x-1]); } if(y == 0) { a4 = (int)img.at<uchar>(ymapT, x1); } else { a4 = (int)img.at<uchar>(ymap[y-1], x1); } Dx[x] = (float)(lut[a1] - lut[a2]); Dy[x] = (float)(lut[a3] - lut[a4]); } newcartToPolar( Dx, Dy, Mag, Angle, grad_w, false); for( x = 0; x < grad_w; x++ ) { float mag = Mag[x], angle = Angle[x]*angleScale - 0.5f; int hidx = floor(angle); angle -= hidx; grad1[y][x] = mag*(1.f - angle); grad2[y][x] = mag*angle; //cout << grad1[y][x] << "," << grad2[y][x] << endl; if( hidx < 0 ) hidx += nbins; else if( hidx >= nbins ) hidx -= nbins; assert( (unsigned)hidx < (unsigned)nbins ); qangle1[y][x] = (uchar)hidx; hidx++; hidx &= hidx < nbins ? -1 : 0; qangle2[y][x] = (uchar)hidx; } } free(xmap); free(ymap); free(Dx); free(Dy); free(Mag); free(Angle); } //**************************************************************************************************** int main() { Mat src = imread("test.png"); if (src.empty()) { cout << "Can not load the image" << endl; return 0; } //缩放 Mat testImage; //resize(src, testImage, Size(64, 128)); cvtColor(src, testImage, CV_BGR2GRAY); //测试图片提取HOG特征 HOGDescriptor hog(cvSize(64, 128), cvSize(16, 16), cvSize(8, 8), cvSize(8, 8), 9); vector<float> descriptors; Mat grad, qangle; Mat grad3, qangle3; Size gradsize(testImage.cols, testImage.rows); //grad.create(gradsize, CV_32FC2); // <magnitude*(1-alpha), magnitude*alpha> //qangle.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation grad3.create(gradsize, CV_32FC2); // <magnitude*(1-alpha), magnitude*alpha> qangle3.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation //double t = (double)getTickCount(); //hog.computeGradient(testImage, grad, qangle); newcomputeGradient(testImage, grad3, qangle3, cvSize(0, 0), cvSize(0, 0)); //t = ((double)getTickCount() - t)/getTickFrequency(); //cout << " t : " << t * 1000 << " ms" <<endl; //cout << "grad_w: " << grad.cols << endl; //cout << "grad_h: " << grad.rows << endl; int w = testImage.cols; int h = testImage.rows; float** grad1 = (float**)malloc(sizeof(float*) * h); for(int i = 0; i < h; i ++) { grad1[i] = (float*)malloc(sizeof(float) * w); } float** grad2 = (float**)malloc(sizeof(float*) * h); for(int i = 0; i < h; i ++) { grad2[i] = (float*)malloc(sizeof(float) * w); } uchar** qangle1 = (uchar**)malloc(sizeof(uchar*) * h); for(int i = 0; i < h; i ++) { qangle1[i] = (uchar*)malloc(sizeof(uchar) * w); } uchar** qangle2 = (uchar**)malloc(sizeof(uchar*) * h); for(int i = 0; i < h; i ++) { qangle2[i] = (uchar*)malloc(sizeof(uchar) * w); } cout << "0000" << endl; mycomputeGradient(testImage, grad1, grad2, qangle1, qangle2); cout << "1111" << endl; //cout << grad.channels() << endl; for(int i = 0; i < h; i ++) { float* gradPtr = (float*)grad3.ptr(i); uchar* qanglePtr = (uchar*)qangle3.ptr(i); for(int j = 0; j < w; j ++) { //cout << gradPtr[2*j] << " _ " << gradPtr[2*j +1] << " "; //cout << grad1[i][j] << " _ " << grad2[i][j] << endl; if(gradPtr[2*j] == grad1[i][j] && gradPtr[2*j +1] == grad2[i][j]) { cout << "ok" << endl; } else { cout << i << "," << j << endl;; } //cout << grad.at<float>(i, j) << endl; //cout << grad1[i][j] << endl; /*float a = grad.at<float>(i, j); float b = grad1.at<float>(i, j); if(a == b) { cout << "nice+(" << j << "," << i << ")" << endl; } else { cout << "*"; }*/ } } //hog.compute(testImage, descriptors); //cout << "HOG dimensions:" << descriptors.size() << endl; getchar(); return 0; }
上述是源码和改写函数共存的一个测试函数,旨在使二者输出结果相同,即改写成功。
newcomputeGradient函数为源码函数(这里写出来是为了测试输出更加方便)
mycomputeGradient函数为改写后的函数
当然这附加的改写函数是newcartToPolar函数,源码是mycartToPolar函数
可以看出,大部分的修改是Mat转化成数组的修改。
大家一定很好奇,为什么我要把函数名字给改了,这是因为这些函数名早已被OpenCV定义好了,具体定义在cxcore头文件中,如果包含了该头文件,那么就不能重名,意味着我需要改函数名;否则,最直接了当的办法就是把这个头文件给删除!
这里还需要注意:
源码中会出现xmap[-1]、ymap[-1]这么一些神奇的东西。虽然改写成普通数组编译时不会出错的,但是,这个-1实际上表示的是数组上一个地址的内存单元,这是未定义的,所以盲目使用,可能会出错,就比如说:
我定义一个int变量,紧接着我给xmap[-1] = 1;很可能这个-1指向的内存单元就是前面所定义的变量,这样的错误是很难发现的!
所以这里我定义了xmapT,ymapT变量,意思就是避免上述错误!
关于borderInterpolate函数,源码实在opencv_core的filter.cpp中
关于FastAtan2_32f,Magnitude_32f,cartToPolar函数,源码均在opencv_core的mathfuncs.cpp中
相关推荐
《闵大荒之旅(三)---- 抄抄改改opencv之GPUvsCPU》这篇博文主要探讨了在计算机视觉领域中,OpenCV库的GPU加速与CPU执行的对比。OpenCV是一个广泛使用的开源计算机视觉和机器学习库,它支持多种平台,并且提供了...
微信小程序源码-户外旅游小程序.zip微信小程序源码-户外旅游小程序.zip微信小程序源码-户外旅游小程序.zip微信小程序源码-户外旅游小程序.zip微信小程序源码-户外旅游小程序.zip微信小程序源码-户外旅游小程序.zip...
拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾柴网-源码拾...
更新运动步数--源码 更新运动步数--源码 更新运动步数--源码 更新运动步数--源码 更新运动步数--源码 更新运动步数--源码 更新运动步数--源码 更新运动步数--源码 更新运动步数--源码 更新运动步数--源码 更新运动步...
5--[少儿编程奇幻之旅-整装待发].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码5--[少儿编程奇幻之旅-整装待发].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码5--[少儿编程奇幻之旅-整装待发]....
这个“lwip移植-源码工程”很可能是为了在特定硬件平台上实现lwIP网络功能的代码库,可能包含了针对该平台的适配层和必要的配置文件。 lwIP协议栈的核心组件包括以下几个部分: 1. **IP层**:负责处理IP数据包的...
4--[少儿编程奇幻之旅-绘制飞船].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码4--[少儿编程奇幻之旅-绘制飞船].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码4--[少儿编程奇幻之旅-绘制飞船]....
4--[少儿编程奇幻之旅-侦测目标].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码4--[少儿编程奇幻之旅-侦测目标].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码4--[少儿编程奇幻之旅-侦测目标]....
5--[少儿编程奇幻之旅-烟花庆祝].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码5--[少儿编程奇幻之旅-烟花庆祝].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码5--[少儿编程奇幻之旅-烟花庆祝]....
5--[少儿编程奇幻之旅-收集莫桑石].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码5--[少儿编程奇幻之旅-收集莫桑石].zip源码scratch2.0 3.0编程项目源文件源码案例素材源代码5--[少儿编程奇幻之旅-收集...
微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-...
Python实战游戏源码- 中国象棋Python实战游戏源码- 中国象棋Python实战游戏源码- 中国象棋Python实战游戏源码- 中国象棋Python实战游戏源码- 中国象棋Python实战游戏源码- 中国象棋Python实战游戏源码- 中国象棋...
微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-...
微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-...
微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-...
微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-...
微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-...
微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-毕业设计期末大作业课程设计源码 微信小程序源码-...
keil-MDK开发环境uCOS-III移植到STM32上源码,keil-MDK开发环境uCOS-III移植到STM32上源码,开发者著作keil-MDK开发环境uCOS-III移植到STM32上源码,keil-MDK开发环境uCOS-III移植到STM32上源码,开发者著作
微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-毕业设计期末大作业前端项目源码 微信小程序源码-...