PARP Research Group University of Murcia, Spain


examples/OpenCV/siftDetector/hess/sift.cpp

00001 /*
00002   Functions for detecting SIFT image features.
00003   
00004   For more information, refer to:
00005   
00006   Lowe, D.  Distinctive image features from scale-invariant keypoints.
00007   <EM>International Journal of Computer Vision, 60</EM>, 2 (2004),
00008   pp.91--110.
00009   
00010   Copyright (C) 2006-2007  Rob Hess <hess@eecs.oregonstate.edu>
00011 
00012   Note: The SIFT algorithm is patented in the United States and cannot be
00013   used in commercial products without a license from the University of
00014   British Columbia.  For more information, refer to the file LICENSE.ubc
00015   that accompanied this distribution.
00016 
00017   @version 1.1.1-20070913
00018 */
00019 
00020 #include "sift.h"
00021 #include "imgfeatures.h"
00022 #include "utils.h"
00023 
00024 #include <cxcore.h>
00025 #include <cv.h>
00026 
00027 /************************* Local Function Prototypes *************************/
00028 
00029 IplImage* create_init_img( IplImage*, int, double );
00030 IplImage* convert_to_gray32( IplImage* );
00031 IplImage*** build_gauss_pyr( IplImage*, int, int, double );
00032 IplImage* downsample( IplImage* );
00033 IplImage*** build_dog_pyr( IplImage***, int, int );
00034 CvSeq* scale_space_extrema( IplImage***, int, int, double, int, CvMemStorage*);
00035 int is_extremum( IplImage***, int, int, int, int );
00036 struct feature* interp_extremum( IplImage***, int, int, int, int, int, double);
00037 void interp_step( IplImage***, int, int, int, int, double*, double*, double* );
00038 CvMat* deriv_3D( IplImage***, int, int, int, int );
00039 CvMat* hessian_3D( IplImage***, int, int, int, int );
00040 double interp_contr( IplImage***, int, int, int, int, double, double, double );
00041 struct feature* new_feature( void );
00042 int is_too_edge_like( IplImage*, int, int, int );
00043 void calc_feature_scales( CvSeq*, double, int );
00044 void adjust_for_img_dbl( CvSeq* );
00045 void calc_feature_oris( CvSeq*, IplImage*** );
00046 double* ori_hist( IplImage*, int, int, int, int, double );
00047 int calc_grad_mag_ori( IplImage*, int, int, double*, double* );
00048 void smooth_ori_hist( double*, int );
00049 double dominant_ori( double*, int );
00050 void add_good_ori_features( CvSeq*, double*, int, double, struct feature* );
00051 struct feature* clone_feature( struct feature* );
00052 void compute_descriptors( CvSeq*, IplImage***, int, int );
00053 double*** descr_hist( IplImage*, int, int, double, double, int, int );
00054 void interp_hist_entry( double***, double, double, double, double, int, int);
00055 void hist_to_descr( double***, int, int, struct feature* );
00056 void normalize_descr( struct feature* );
00057 int feature_cmp( void*, void*, void* );
00058 void release_descr_hist( double****, int );
00059 void release_pyr( IplImage****, int, int );
00060 
00061 
00062 /*********************** Functions prototyped in sift.h **********************/
00063 
00064 
00075 int sift_features( IplImage* img, struct feature** feat )
00076 {
00077   return _sift_features( img, feat, SIFT_INTVLS, SIFT_SIGMA, SIFT_CONTR_THR,
00078                          SIFT_CURV_THR, SIFT_IMG_DBL, SIFT_DESCR_WIDTH,
00079                          SIFT_DESCR_HIST_BINS );
00080 }
00081 
00082 
00083 
00109 int _sift_features( IplImage* img, struct feature** feat, int intvls,
00110                     double sigma, double contr_thr, int curv_thr,
00111                     int img_dbl, int descr_width, int descr_hist_bins )
00112 {
00113   IplImage* init_img;
00114   IplImage*** gauss_pyr, *** dog_pyr;
00115   CvMemStorage* storage;
00116   CvSeq* features;
00117   int octvs, i, n = 0;
00118   
00119   /* check arguments */
00120   if( ! img )
00121     fatal_error( "NULL pointer error, %s, line %d",  __FILE__, __LINE__ );
00122   if( ! feat )
00123     fatal_error( "NULL pointer error, %s, line %d",  __FILE__, __LINE__ );
00124 
00125   /* build scale space pyramid; smallest dimension of top level is ~4 pixels */
00126   init_img = create_init_img( img, img_dbl, sigma );
00127   octvs = log( MIN( init_img->width, init_img->height ) ) / log(2) - 2;
00128   gauss_pyr = build_gauss_pyr( init_img, octvs, intvls, sigma );
00129   dog_pyr = build_dog_pyr( gauss_pyr, octvs, intvls );
00130   
00131   storage = cvCreateMemStorage( 0 );
00132   features = scale_space_extrema( dog_pyr, octvs, intvls, contr_thr,
00133                                   curv_thr, storage );
00134   calc_feature_scales( features, sigma, intvls );
00135   if( img_dbl )
00136     adjust_for_img_dbl( features );
00137   calc_feature_oris( features, gauss_pyr );
00138   compute_descriptors( features, gauss_pyr, descr_width, descr_hist_bins );
00139 
00140   /* sort features by decreasing scale and move from CvSeq to array */
00141   cvSeqSort( features, (CvCmpFunc)feature_cmp, NULL );
00142   n = features->total;
00143   *feat = (struct feature *) calloc( n, sizeof(struct feature) );
00144   *feat = (struct feature *) cvCvtSeqToArray( features, *feat, CV_WHOLE_SEQ );
00145   for( i = 0; i < n; i++ )
00146     {
00147       free( (*feat)[i].feature_data );
00148       (*feat)[i].feature_data = NULL;
00149     }
00150   
00151   cvReleaseMemStorage( &storage );
00152   cvReleaseImage( &init_img );
00153   release_pyr( &gauss_pyr, octvs, intvls + 3 );
00154   release_pyr( &dog_pyr, octvs, intvls + 2 );
00155   return n;
00156 }
00157 
00158 
00159 /************************ Functions prototyped here **************************/
00160 
00161 /*
00162   Converts an image to 8-bit grayscale and Gaussian-smooths it.  The image is
00163   optionally doubled in size prior to smoothing.
00164 
00165   @param img input image
00166   @param img_dbl if true, image is doubled in size prior to smoothing
00167   @param sigma total std of Gaussian smoothing
00168 */
00169 IplImage* create_init_img( IplImage* img, int img_dbl, double sigma )
00170 {
00171   IplImage* gray, * dbl;
00172   double sig_diff;
00173 
00174   gray = convert_to_gray32( img );
00175   if( img_dbl )
00176     {
00177       sig_diff = sqrt( sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4 );
00178       dbl = cvCreateImage( cvSize( img->width*2, img->height*2 ),
00179                            IPL_DEPTH_32F, 1 );
00180       cvResize( gray, dbl, CV_INTER_CUBIC );
00181       cvSmooth( dbl, dbl, CV_GAUSSIAN, 0, 0, sig_diff, sig_diff );
00182       cvReleaseImage( &gray );
00183       return dbl;
00184     }
00185   else
00186     {
00187       sig_diff = sqrt( sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA );
00188       cvSmooth( gray, gray, CV_GAUSSIAN, 0, 0, sig_diff, sig_diff );
00189       return gray;
00190     }
00191 }
00192 
00193 
00194 
00195 /*
00196   Converts an image to 32-bit grayscale
00197 
00198   @param img a 3-channel 8-bit color (BGR) or 8-bit gray image
00199 
00200   @return Returns a 32-bit grayscale image
00201 */
00202 IplImage* convert_to_gray32( IplImage* img )
00203 {
00204   IplImage* gray8, * gray32;
00205 
00206   gray8 = cvCreateImage( cvGetSize(img), IPL_DEPTH_8U, 1 );
00207   gray32 = cvCreateImage( cvGetSize(img), IPL_DEPTH_32F, 1 );
00208 
00209   if( img->nChannels == 1 )
00210     gray8 = (IplImage *) cvClone( img );
00211   else
00212     cvCvtColor( img, gray8, CV_RGB2GRAY );
00213   cvConvertScale( gray8, gray32, 1.0 / 255.0, 0 );
00214 
00215   cvReleaseImage( &gray8 );
00216   return gray32;
00217 }
00218 
00219 
00220 
00221 /*
00222   Builds Gaussian scale space pyramid from an image
00223 
00224   @param base base image of the pyramid
00225   @param octvs number of octaves of scale space
00226   @param intvls number of intervals per octave
00227   @param sigma amount of Gaussian smoothing per octave
00228 
00229   @return Returns a Gaussian scale space pyramid as an octvs x (intvls + 3)
00230     array
00231 */
00232 IplImage*** build_gauss_pyr( IplImage* base, int octvs,
00233                              int intvls, double sigma )
00234 {
00235   IplImage*** gauss_pyr;
00236   const int _intvls = intvls;
00237   double sig[_intvls+3], sig_total, sig_prev, k;
00238   int i, o;
00239 
00240   gauss_pyr = (IplImage ***) calloc( octvs, sizeof( IplImage** ) );
00241   for( i = 0; i < octvs; i++ )
00242     gauss_pyr[i] = (IplImage **) calloc( intvls + 3, sizeof( IplImage *) );
00243 
00244   /*
00245     precompute Gaussian sigmas using the following formula:
00246 
00247     \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2
00248   */
00249   sig[0] = sigma;
00250   k = pow( 2.0, 1.0 / intvls );
00251   for( i = 1; i < intvls + 3; i++ )
00252     {
00253       sig_prev = pow( k, i - 1 ) * sigma;
00254       sig_total = sig_prev * k;
00255       sig[i] = sqrt( sig_total * sig_total - sig_prev * sig_prev );
00256     }
00257 
00258   for( o = 0; o < octvs; o++ )
00259     for( i = 0; i < intvls + 3; i++ )
00260       {
00261         if( o == 0  &&  i == 0 )
00262           gauss_pyr[o][i] = cvCloneImage(base);
00263 
00264         /* base of new octvave is halved image from end of previous octave */
00265         else if( i == 0 )
00266           gauss_pyr[o][i] = downsample( gauss_pyr[o-1][intvls] );
00267           
00268         /* blur the current octave's last image to create the next one */
00269         else
00270           {
00271             gauss_pyr[o][i] = cvCreateImage( cvGetSize(gauss_pyr[o][i-1]),
00272                                              IPL_DEPTH_32F, 1 );
00273             cvSmooth( gauss_pyr[o][i-1], gauss_pyr[o][i],
00274                       CV_GAUSSIAN, 0, 0, sig[i], sig[i] );
00275           }
00276       }
00277 
00278   return gauss_pyr;
00279 }
00280 
00281 
00282 
00283 /*
00284   Downsamples an image to a quarter of its size (half in each dimension)
00285   using nearest-neighbor interpolation
00286 
00287   @param img an image
00288 
00289   @return Returns an image whose dimensions are half those of img
00290 */
00291 IplImage* downsample( IplImage* img )
00292 {
00293   IplImage* smaller = cvCreateImage( cvSize(img->width / 2, img->height / 2),
00294                                      img->depth, img->nChannels );
00295   cvResize( img, smaller, CV_INTER_NN );
00296 
00297   return smaller;
00298 }
00299 
00300 
00301 
00302 /*
00303   Builds a difference of Gaussians scale space pyramid by subtracting adjacent
00304   intervals of a Gaussian pyramid
00305 
00306   @param gauss_pyr Gaussian scale-space pyramid
00307   @param octvs number of octaves of scale space
00308   @param intvls number of intervals per octave
00309 
00310   @return Returns a difference of Gaussians scale space pyramid as an
00311     octvs x (intvls + 2) array
00312 */
00313 IplImage*** build_dog_pyr( IplImage*** gauss_pyr, int octvs, int intvls )
00314 {
00315   IplImage*** dog_pyr;
00316   int i, o;
00317 
00318   dog_pyr = (IplImage ***) calloc( octvs, sizeof( IplImage** ) );
00319   for( i = 0; i < octvs; i++ )
00320     dog_pyr[i] = (IplImage **) calloc( intvls + 2, sizeof(IplImage*) );
00321 
00322   for( o = 0; o < octvs; o++ )
00323     for( i = 0; i < intvls + 2; i++ )
00324       {
00325         dog_pyr[o][i] = cvCreateImage( cvGetSize(gauss_pyr[o][i]),
00326                                        IPL_DEPTH_32F, 1 );
00327         cvSub( gauss_pyr[o][i+1], gauss_pyr[o][i], dog_pyr[o][i], NULL );
00328       }
00329 
00330   return dog_pyr;
00331 }
00332 
00333 
00334 
00335 /*
00336   Detects features at extrema in DoG scale space.  Bad features are discarded
00337   based on contrast and ratio of principal curvatures.
00338 
00339   @param dog_pyr DoG scale space pyramid
00340   @param octvs octaves of scale space represented by dog_pyr
00341   @param intvls intervals per octave
00342   @param contr_thr low threshold on feature contrast
00343   @param curv_thr high threshold on feature ratio of principal curvatures
00344   @param storage memory storage in which to store detected features
00345 
00346   @return Returns an array of detected features whose scales, orientations,
00347     and descriptors are yet to be determined.
00348 */
00349 CvSeq* scale_space_extrema( IplImage*** dog_pyr, int octvs, int intvls,
00350                             double contr_thr, int curv_thr,
00351                             CvMemStorage* storage )
00352 {
00353   CvSeq* features;
00354   double prelim_contr_thr = 0.5 * contr_thr / intvls;
00355   struct feature* feat;
00356   struct detection_data* ddata;
00357   int o, i, r, c;
00358 
00359   features = cvCreateSeq( 0, sizeof(CvSeq), sizeof(struct feature), storage );
00360   for( o = 0; o < octvs; o++ )
00361     for( i = 1; i <= intvls; i++ )
00362       for(r = SIFT_IMG_BORDER; r < dog_pyr[o][0]->height-SIFT_IMG_BORDER; r++)
00363         for(c = SIFT_IMG_BORDER; c < dog_pyr[o][0]->width-SIFT_IMG_BORDER; c++)
00364           /* perform preliminary check on contrast */
00365           if( ABS( pixval32f( dog_pyr[o][i], r, c ) ) > prelim_contr_thr )
00366             if( is_extremum( dog_pyr, o, i, r, c ) )
00367               {
00368                 feat = interp_extremum(dog_pyr, o, i, r, c, intvls, contr_thr);
00369                 if( feat )
00370                   {
00371                     ddata = feat_detection_data( feat );
00372                     if( ! is_too_edge_like( dog_pyr[ddata->octv][ddata->intvl],
00373                                             ddata->r, ddata->c, curv_thr ) )
00374                       {
00375                         cvSeqPush( features, feat );
00376                       }
00377                     else
00378                       free( ddata );
00379                     free( feat );
00380                   }
00381               }
00382   
00383   return features;
00384 }
00385 
00386 
00387 
00388 /*
00389   Determines whether a pixel is a scale-space extremum by comparing it to it's
00390   3x3x3 pixel neighborhood.
00391 
00392   @param dog_pyr DoG scale space pyramid
00393   @param octv pixel's scale space octave
00394   @param intvl pixel's within-octave interval
00395   @param r pixel's image row
00396   @param c pixel's image col
00397 
00398   @return Returns 1 if the specified pixel is an extremum (max or min) among
00399     it's 3x3x3 pixel neighborhood.
00400 */
00401 int is_extremum( IplImage*** dog_pyr, int octv, int intvl, int r, int c )
00402 {
00403   double val = pixval32f( dog_pyr[octv][intvl], r, c );
00404   int i, j, k;
00405 
00406   /* check for maximum */
00407   if( val > 0 )
00408     {
00409       for( i = -1; i <= 1; i++ )
00410         for( j = -1; j <= 1; j++ )
00411           for( k = -1; k <= 1; k++ )
00412             if( val < pixval32f( dog_pyr[octv][intvl+i], r + j, c + k ) )
00413               return 0;
00414     }
00415 
00416   /* check for minimum */
00417   else
00418     {
00419       for( i = -1; i <= 1; i++ )
00420         for( j = -1; j <= 1; j++ )
00421           for( k = -1; k <= 1; k++ )
00422             if( val > pixval32f( dog_pyr[octv][intvl+i], r + j, c + k ) )
00423               return 0;
00424     }
00425 
00426   return 1;
00427 }
00428 
00429 
00430 
00431 /*
00432   Interpolates a scale-space extremum's location and scale to subpixel
00433   accuracy to form an image feature.  Rejects features with low contrast.
00434   Based on Section 4 of Lowe's paper.  
00435 
00436   @param dog_pyr DoG scale space pyramid
00437   @param octv feature's octave of scale space
00438   @param intvl feature's within-octave interval
00439   @param r feature's image row
00440   @param c feature's image column
00441   @param intvls total intervals per octave
00442   @param contr_thr threshold on feature contrast
00443 
00444   @return Returns the feature resulting from interpolation of the given
00445     parameters or NULL if the given location could not be interpolated or
00446     if contrast at the interpolated loation was too low.  If a feature is
00447     returned, its scale, orientation, and descriptor are yet to be determined.
00448 */
00449 struct feature* interp_extremum( IplImage*** dog_pyr, int octv, int intvl,
00450                                  int r, int c, int intvls, double contr_thr )
00451 {
00452   struct feature* feat;
00453   struct detection_data* ddata;
00454   double xi, xr, xc, contr;
00455   int i = 0;
00456   
00457   while( i < SIFT_MAX_INTERP_STEPS )
00458     {
00459       interp_step( dog_pyr, octv, intvl, r, c, &xi, &xr, &xc );
00460       if( ABS( xi ) < 0.5  &&  ABS( xr ) < 0.5  &&  ABS( xc ) < 0.5 )
00461         break;
00462       
00463       c += cvRound( xc );
00464       r += cvRound( xr );
00465       intvl += cvRound( xi );
00466       
00467       if( intvl < 1  ||
00468           intvl > intvls  ||
00469           c < SIFT_IMG_BORDER  ||
00470           r < SIFT_IMG_BORDER  ||
00471           c >= dog_pyr[octv][0]->width - SIFT_IMG_BORDER  ||
00472           r >= dog_pyr[octv][0]->height - SIFT_IMG_BORDER )
00473         {
00474           return NULL;
00475         }
00476       
00477       i++;
00478     }
00479   
00480   /* ensure convergence of interpolation */
00481   if( i >= SIFT_MAX_INTERP_STEPS )
00482     return NULL;
00483   
00484   contr = interp_contr( dog_pyr, octv, intvl, r, c, xi, xr, xc );
00485   if( ABS( contr ) < contr_thr / intvls )
00486     return NULL;
00487 
00488   feat = new_feature();
00489   ddata = feat_detection_data( feat );
00490   feat->img_pt.x = feat->x = ( c + xc ) * pow( 2.0, octv );
00491   feat->img_pt.y = feat->y = ( r + xr ) * pow( 2.0, octv );
00492   ddata->r = r;
00493   ddata->c = c;
00494   ddata->octv = octv;
00495   ddata->intvl = intvl;
00496   ddata->subintvl = xi;
00497 
00498   return feat;
00499 }
00500 
00501 
00502 
00503 /*
00504   Performs one step of extremum interpolation.  Based on Eqn. (3) in Lowe's
00505   paper.
00506 
00507   @param dog_pyr difference of Gaussians scale space pyramid
00508   @param octv octave of scale space
00509   @param intvl interval being interpolated
00510   @param r row being interpolated
00511   @param c column being interpolated
00512   @param xi output as interpolated subpixel increment to interval
00513   @param xr output as interpolated subpixel increment to row
00514   @param xc output as interpolated subpixel increment to col
00515 */
00516 
00517 void interp_step( IplImage*** dog_pyr, int octv, int intvl, int r, int c,
00518                   double* xi, double* xr, double* xc )
00519 {
00520   CvMat* dD, * H, * H_inv, X;
00521   double x[3] = { 0 };
00522   
00523   dD = deriv_3D( dog_pyr, octv, intvl, r, c );
00524   H = hessian_3D( dog_pyr, octv, intvl, r, c );
00525   H_inv = cvCreateMat( 3, 3, CV_64FC1 );
00526   cvInvert( H, H_inv, CV_SVD );
00527   cvInitMatHeader( &X, 3, 1, CV_64FC1, x, CV_AUTOSTEP );
00528   cvGEMM( H_inv, dD, -1, NULL, 0, &X, 0 );
00529   
00530   cvReleaseMat( &dD );
00531   cvReleaseMat( &H );
00532   cvReleaseMat( &H_inv );
00533 
00534   *xi = x[2];
00535   *xr = x[1];
00536   *xc = x[0];
00537 }
00538 
00539 
00540 
00541 /*
00542   Computes the partial derivatives in x, y, and scale of a pixel in the DoG
00543   scale space pyramid.
00544 
00545   @param dog_pyr DoG scale space pyramid
00546   @param octv pixel's octave in dog_pyr
00547   @param intvl pixel's interval in octv
00548   @param r pixel's image row
00549   @param c pixel's image col
00550 
00551   @return Returns the vector of partial derivatives for pixel I
00552     { dI/dx, dI/dy, dI/ds }^T as a CvMat*
00553 */
00554 CvMat* deriv_3D( IplImage*** dog_pyr, int octv, int intvl, int r, int c )
00555 {
00556   CvMat* dI;
00557   double dx, dy, ds;
00558 
00559   dx = ( pixval32f( dog_pyr[octv][intvl], r, c+1 ) -
00560          pixval32f( dog_pyr[octv][intvl], r, c-1 ) ) / 2.0;
00561   dy = ( pixval32f( dog_pyr[octv][intvl], r+1, c ) -
00562          pixval32f( dog_pyr[octv][intvl], r-1, c ) ) / 2.0;
00563   ds = ( pixval32f( dog_pyr[octv][intvl+1], r, c ) -
00564          pixval32f( dog_pyr[octv][intvl-1], r, c ) ) / 2.0;
00565   
00566   dI = cvCreateMat( 3, 1, CV_64FC1 );
00567   cvmSet( dI, 0, 0, dx );
00568   cvmSet( dI, 1, 0, dy );
00569   cvmSet( dI, 2, 0, ds );
00570 
00571   return dI;
00572 }
00573 
00574 
00575 
00576 /*
00577   Computes the 3D Hessian matrix for a pixel in the DoG scale space pyramid.
00578 
00579   @param dog_pyr DoG scale space pyramid
00580   @param octv pixel's octave in dog_pyr
00581   @param intvl pixel's interval in octv
00582   @param r pixel's image row
00583   @param c pixel's image col
00584 
00585   @return Returns the Hessian matrix (below) for pixel I as a CvMat*
00586 
00587   / Ixx  Ixy  Ixs \ <BR>
00588   | Ixy  Iyy  Iys | <BR>
00589   \ Ixs  Iys  Iss /
00590 */
00591 CvMat* hessian_3D( IplImage*** dog_pyr, int octv, int intvl, int r, int c )
00592 {
00593   CvMat* H;
00594   double v, dxx, dyy, dss, dxy, dxs, dys;
00595   
00596   v = pixval32f( dog_pyr[octv][intvl], r, c );
00597   dxx = ( pixval32f( dog_pyr[octv][intvl], r, c+1 ) + 
00598           pixval32f( dog_pyr[octv][intvl], r, c-1 ) - 2 * v );
00599   dyy = ( pixval32f( dog_pyr[octv][intvl], r+1, c ) +
00600           pixval32f( dog_pyr[octv][intvl], r-1, c ) - 2 * v );
00601   dss = ( pixval32f( dog_pyr[octv][intvl+1], r, c ) +
00602           pixval32f( dog_pyr[octv][intvl-1], r, c ) - 2 * v );
00603   dxy = ( pixval32f( dog_pyr[octv][intvl], r+1, c+1 ) -
00604           pixval32f( dog_pyr[octv][intvl], r+1, c-1 ) -
00605           pixval32f( dog_pyr[octv][intvl], r-1, c+1 ) +
00606           pixval32f( dog_pyr[octv][intvl], r-1, c-1 ) ) / 4.0;
00607   dxs = ( pixval32f( dog_pyr[octv][intvl+1], r, c+1 ) -
00608           pixval32f( dog_pyr[octv][intvl+1], r, c-1 ) -
00609           pixval32f( dog_pyr[octv][intvl-1], r, c+1 ) +
00610           pixval32f( dog_pyr[octv][intvl-1], r, c-1 ) ) / 4.0;
00611   dys = ( pixval32f( dog_pyr[octv][intvl+1], r+1, c ) -
00612           pixval32f( dog_pyr[octv][intvl+1], r-1, c ) -
00613           pixval32f( dog_pyr[octv][intvl-1], r+1, c ) +
00614           pixval32f( dog_pyr[octv][intvl-1], r-1, c ) ) / 4.0;
00615   
00616   H = cvCreateMat( 3, 3, CV_64FC1 );
00617   cvmSet( H, 0, 0, dxx );
00618   cvmSet( H, 0, 1, dxy );
00619   cvmSet( H, 0, 2, dxs );
00620   cvmSet( H, 1, 0, dxy );
00621   cvmSet( H, 1, 1, dyy );
00622   cvmSet( H, 1, 2, dys );
00623   cvmSet( H, 2, 0, dxs );
00624   cvmSet( H, 2, 1, dys );
00625   cvmSet( H, 2, 2, dss );
00626 
00627   return H;
00628 }
00629 
00630 
00631 
00632 /*
00633   Calculates interpolated pixel contrast.  Based on Eqn. (3) in Lowe's
00634   paper.
00635 
00636   @param dog_pyr difference of Gaussians scale space pyramid
00637   @param octv octave of scale space
00638   @param intvl within-octave interval
00639   @param r pixel row
00640   @param c pixel column
00641   @param xi interpolated subpixel increment to interval
00642   @param xr interpolated subpixel increment to row
00643   @param xc interpolated subpixel increment to col
00644 
00645   @param Returns interpolated contrast.
00646 */
00647 double interp_contr( IplImage*** dog_pyr, int octv, int intvl, int r,
00648                      int c, double xi, double xr, double xc )
00649 {
00650   CvMat* dD, X, T;
00651   double t[1], x[3] = { xc, xr, xi };
00652 
00653   cvInitMatHeader( &X, 3, 1, CV_64FC1, x, CV_AUTOSTEP );
00654   cvInitMatHeader( &T, 1, 1, CV_64FC1, t, CV_AUTOSTEP );
00655   dD = deriv_3D( dog_pyr, octv, intvl, r, c );
00656   cvGEMM( dD, &X, 1, NULL, 0, &T,  CV_GEMM_A_T );
00657   cvReleaseMat( &dD );
00658 
00659   return pixval32f( dog_pyr[octv][intvl], r, c ) + t[0] * 0.5;
00660 }
00661 
00662 
00663 
00664 /*
00665   Allocates and initializes a new feature
00666 
00667   @return Returns a pointer to the new feature
00668 */
00669 struct feature* new_feature( void )
00670 {
00671   struct feature* feat;
00672   struct detection_data* ddata;
00673 
00674   feat = (struct feature *) malloc( sizeof( struct feature ) );
00675   memset( feat, 0, sizeof( struct feature ) );
00676   ddata = (struct detection_data *) malloc( sizeof( struct detection_data ) );
00677   memset( ddata, 0, sizeof( struct detection_data ) );
00678   feat->feature_data = ddata;
00679   feat->type = FEATURE_LOWE;
00680 
00681   return feat;
00682 }
00683 
00684 
00685 
00686 /*
00687   Determines whether a feature is too edge like to be stable by computing the
00688   ratio of principal curvatures at that feature.  Based on Section 4.1 of
00689   Lowe's paper.
00690 
00691   @param dog_img image from the DoG pyramid in which feature was detected
00692   @param r feature row
00693   @param c feature col
00694   @param curv_thr high threshold on ratio of principal curvatures
00695 
00696   @return Returns 0 if the feature at (r,c) in dog_img is sufficiently
00697     corner-like or 1 otherwise.
00698 */
00699 int is_too_edge_like( IplImage* dog_img, int r, int c, int curv_thr )
00700 {
00701   double d, dxx, dyy, dxy, tr, det;
00702 
00703   /* principal curvatures are computed using the trace and det of Hessian */
00704   d = pixval32f(dog_img, r, c);
00705   dxx = pixval32f( dog_img, r, c+1 ) + pixval32f( dog_img, r, c-1 ) - 2 * d;
00706   dyy = pixval32f( dog_img, r+1, c ) + pixval32f( dog_img, r-1, c ) - 2 * d;
00707   dxy = ( pixval32f(dog_img, r+1, c+1) - pixval32f(dog_img, r+1, c-1) -
00708           pixval32f(dog_img, r-1, c+1) + pixval32f(dog_img, r-1, c-1) ) / 4.0;
00709   tr = dxx + dyy;
00710   det = dxx * dyy - dxy * dxy;
00711 
00712   /* negative determinant -> curvatures have different signs; reject feature */
00713   if( det <= 0 )
00714     return 1;
00715 
00716   if( tr * tr / det < ( curv_thr + 1.0 )*( curv_thr + 1.0 ) / curv_thr )
00717     return 0;
00718   return 1;
00719 }
00720 
00721 
00722 
00723 /*
00724   Calculates characteristic scale for each feature in an array.
00725 
00726   @param features array of features
00727   @param sigma amount of Gaussian smoothing per octave of scale space
00728   @param intvls intervals per octave of scale space
00729 */
00730 void calc_feature_scales( CvSeq* features, double sigma, int intvls )
00731 {
00732   struct feature* feat;
00733   struct detection_data* ddata;
00734   double intvl;
00735   int i, n;
00736 
00737   n = features->total;
00738   for( i = 0; i < n; i++ )
00739     {
00740       feat = CV_GET_SEQ_ELEM( struct feature, features, i );
00741       ddata = feat_detection_data( feat );
00742       intvl = ddata->intvl + ddata->subintvl;
00743       feat->scl = sigma * pow( 2.0, ddata->octv + intvl / intvls );
00744       ddata->scl_octv = sigma * pow( 2.0, intvl / intvls );
00745     }
00746 }
00747 
00748 
00749 
00750 /*
00751   Halves feature coordinates and scale in case the input image was doubled
00752   prior to scale space construction.
00753 
00754   @param features array of features
00755 */
00756 void adjust_for_img_dbl( CvSeq* features )
00757 {
00758   struct feature* feat;
00759   int i, n;
00760 
00761   n = features->total;
00762   for( i = 0; i < n; i++ )
00763     {
00764       feat = CV_GET_SEQ_ELEM( struct feature, features, i );
00765       feat->x /= 2.0;
00766       feat->y /= 2.0;
00767       feat->scl /= 2.0;
00768       feat->img_pt.x /= 2.0;
00769       feat->img_pt.y /= 2.0;
00770     }
00771 }
00772 
00773 
00774 
00775 /*
00776   Computes a canonical orientation for each image feature in an array.  Based
00777   on Section 5 of Lowe's paper.  This function adds features to the array when
00778   there is more than one dominant orientation at a given feature location.
00779 
00780   @param features an array of image features
00781   @param gauss_pyr Gaussian scale space pyramid
00782 */
00783 void calc_feature_oris( CvSeq* features, IplImage*** gauss_pyr )
00784 {
00785   struct feature* feat;
00786   struct detection_data* ddata;
00787   double* hist;
00788   double omax;
00789   int i, j, n = features->total;
00790 
00791   for( i = 0; i < n; i++ )
00792     {
00793       feat = (struct feature *) malloc( sizeof( struct feature ) );
00794       cvSeqPopFront( features, feat );
00795       ddata = feat_detection_data( feat );
00796       hist = ori_hist( gauss_pyr[ddata->octv][ddata->intvl],
00797                        ddata->r, ddata->c, SIFT_ORI_HIST_BINS,
00798                        cvRound( SIFT_ORI_RADIUS * ddata->scl_octv ),
00799                        SIFT_ORI_SIG_FCTR * ddata->scl_octv );
00800       for( j = 0; j < SIFT_ORI_SMOOTH_PASSES; j++ )
00801         smooth_ori_hist( hist, SIFT_ORI_HIST_BINS );
00802       omax = dominant_ori( hist, SIFT_ORI_HIST_BINS );
00803       add_good_ori_features( features, hist, SIFT_ORI_HIST_BINS,
00804                              omax * SIFT_ORI_PEAK_RATIO, feat );
00805       free( ddata );
00806       free( feat );
00807       free( hist );
00808     }
00809 }
00810 
00811 
00812 
00813 /*
00814   Computes a gradient orientation histogram at a specified pixel.
00815 
00816   @param img image
00817   @param r pixel row
00818   @param c pixel col
00819   @param n number of histogram bins
00820   @param rad radius of region over which histogram is computed
00821   @param sigma std for Gaussian weighting of histogram entries
00822 
00823   @return Returns an n-element array containing an orientation histogram
00824     representing orientations between 0 and 2 PI.
00825 */
00826 double* ori_hist( IplImage* img, int r, int c, int n, int rad, double sigma)
00827 {
00828   double* hist;
00829   double mag, ori, w, exp_denom, PI2 = CV_PI * 2.0;
00830   int bin, i, j;
00831 
00832   hist = (double *) calloc( n, sizeof( double ) );
00833   exp_denom = 2.0 * sigma * sigma;
00834   for( i = -rad; i <= rad; i++ )
00835     for( j = -rad; j <= rad; j++ )
00836       if( calc_grad_mag_ori( img, r + i, c + j, &mag, &ori ) )
00837         {
00838           w = exp( -( i*i + j*j ) / exp_denom );
00839           bin = cvRound( n * ( ori + CV_PI ) / PI2 );
00840           bin = ( bin < n )? bin : 0;
00841           hist[bin] += w * mag;
00842         }
00843 
00844   return hist;
00845 }
00846 
00847 
00848 
00849 /*
00850   Calculates the gradient magnitude and orientation at a given pixel.
00851 
00852   @param img image
00853   @param r pixel row
00854   @param c pixel col
00855   @param mag output as gradient magnitude at pixel (r,c)
00856   @param ori output as gradient orientation at pixel (r,c)
00857 
00858   @return Returns 1 if the specified pixel is a valid one and sets mag and
00859     ori accordingly; otherwise returns 0
00860 */
00861 int calc_grad_mag_ori( IplImage* img, int r, int c, double* mag, double* ori )
00862 {
00863   double dx, dy;
00864 
00865   if( r > 0  &&  r < img->height - 1  &&  c > 0  &&  c < img->width - 1 )
00866     {
00867       dx = pixval32f( img, r, c+1 ) - pixval32f( img, r, c-1 );
00868       dy = pixval32f( img, r-1, c ) - pixval32f( img, r+1, c );
00869       *mag = sqrt( dx*dx + dy*dy );
00870       *ori = atan2( dy, dx );
00871       return 1;
00872     }
00873 
00874   else
00875     return 0;
00876 }
00877 
00878 
00879 
00880 /*
00881   Gaussian smooths an orientation histogram.
00882 
00883   @param hist an orientation histogram
00884   @param n number of bins
00885 */
00886 void smooth_ori_hist( double* hist, int n )
00887 {
00888   double prev, tmp, h0 = hist[0];
00889   int i;
00890 
00891   prev = hist[n-1];
00892   for( i = 0; i < n; i++ )
00893     {
00894       tmp = hist[i];
00895       hist[i] = 0.25 * prev + 0.5 * hist[i] + 
00896         0.25 * ( ( i+1 == n )? h0 : hist[i+1] );
00897       prev = tmp;
00898     }
00899 }
00900 
00901 
00902 
00903 /*
00904   Finds the magnitude of the dominant orientation in a histogram
00905 
00906   @param hist an orientation histogram
00907   @param n number of bins
00908 
00909   @return Returns the value of the largest bin in hist
00910 */
00911 double dominant_ori( double* hist, int n )
00912 {
00913   double omax;
00914   int maxbin, i;
00915 
00916   omax = hist[0];
00917   maxbin = 0;
00918   for( i = 1; i < n; i++ )
00919     if( hist[i] > omax )
00920       {
00921         omax = hist[i];
00922         maxbin = i;
00923       }
00924   return omax;
00925 }
00926 
00927 
00928 
00929 /*
00930   Interpolates a histogram peak from left, center, and right values
00931 */
00932 #define interp_hist_peak( l, c, r ) ( 0.5 * ((l)-(r)) / ((l) - 2.0*(c) + (r)) )
00933 
00934 
00935 
00936 /*
00937   Adds features to an array for every orientation in a histogram greater than
00938   a specified threshold.
00939 
00940   @param features new features are added to the end of this array
00941   @param hist orientation histogram
00942   @param n number of bins in hist
00943   @param mag_thr new features are added for entries in hist greater than this
00944   @param feat new features are clones of this with different orientations
00945 */
00946 void add_good_ori_features( CvSeq* features, double* hist, int n,
00947                             double mag_thr, struct feature* feat )
00948 {
00949   struct feature* new_feat;
00950   double bin, PI2 = CV_PI * 2.0;
00951   int l, r, i;
00952 
00953   for( i = 0; i < n; i++ )
00954     {
00955       l = ( i == 0 )? n - 1 : i-1;
00956       r = ( i + 1 ) % n;
00957       
00958       if( hist[i] > hist[l]  &&  hist[i] > hist[r]  &&  hist[i] >= mag_thr )
00959         {
00960           bin = i + interp_hist_peak( hist[l], hist[i], hist[r] );
00961           bin = ( bin < 0 )? n + bin : ( bin >= n )? bin - n : bin;
00962           new_feat = clone_feature( feat );
00963           new_feat->ori = ( ( PI2 * bin ) / n ) - CV_PI;
00964           cvSeqPush( features, new_feat );
00965           free( new_feat );
00966         }
00967     }
00968 }
00969 
00970 
00971 
00972 /*
00973   Makes a deep copy of a feature
00974 
00975   @param feat feature to be cloned
00976 
00977   @return Returns a deep copy of feat
00978 */
00979 struct feature* clone_feature( struct feature* feat )
00980 {
00981   struct feature* new_feat;
00982   struct detection_data* ddata;
00983 
00984   new_feat = new_feature();
00985   ddata = feat_detection_data( new_feat );
00986   memcpy( new_feat, feat, sizeof( struct feature ) );
00987   memcpy( ddata, feat_detection_data(feat), sizeof( struct detection_data ) );
00988   new_feat->feature_data = ddata;
00989 
00990   return new_feat;
00991 }
00992 
00993 
00994 
00995 /*
00996   Computes feature descriptors for features in an array.  Based on Section 6
00997   of Lowe's paper.
00998 
00999   @param features array of features
01000   @param gauss_pyr Gaussian scale space pyramid
01001   @param d width of 2D array of orientation histograms
01002   @param n number of bins per orientation histogram
01003 */
01004 void compute_descriptors( CvSeq* features, IplImage*** gauss_pyr, int d, int n)
01005 {
01006   struct feature* feat;
01007   struct detection_data* ddata;
01008   double*** hist;
01009   int i, k = features->total;
01010 
01011   for( i = 0; i < k; i++ )
01012     {
01013       feat = CV_GET_SEQ_ELEM( struct feature, features, i );
01014       ddata = feat_detection_data( feat );
01015       hist = descr_hist( gauss_pyr[ddata->octv][ddata->intvl], ddata->r,
01016                          ddata->c, feat->ori, ddata->scl_octv, d, n );
01017       hist_to_descr( hist, d, n, feat );
01018       release_descr_hist( &hist, d );
01019     }
01020 }
01021 
01022 
01023 
01024 /*
01025   Computes the 2D array of orientation histograms that form the feature
01026   descriptor.  Based on Section 6.1 of Lowe's paper.
01027 
01028   @param img image used in descriptor computation
01029   @param r row coord of center of orientation histogram array
01030   @param c column coord of center of orientation histogram array
01031   @param ori canonical orientation of feature whose descr is being computed
01032   @param scl scale relative to img of feature whose descr is being computed
01033   @param d width of 2d array of orientation histograms
01034   @param n bins per orientation histogram
01035 
01036   @return Returns a d x d array of n-bin orientation histograms.
01037 */
01038 double*** descr_hist( IplImage* img, int r, int c, double ori,
01039                       double scl, int d, int n )
01040 {
01041   double*** hist;
01042   double cos_t, sin_t, hist_width, exp_denom, r_rot, c_rot, grad_mag,
01043     grad_ori, w, rbin, cbin, obin, bins_per_rad, PI2 = 2.0 * CV_PI;
01044   int radius, i, j;
01045 
01046   hist = (double ***) calloc( d, sizeof( double** ) );
01047   for( i = 0; i < d; i++ )
01048     {
01049       hist[i] = (double **) calloc( d, sizeof( double* ) );
01050       for( j = 0; j < d; j++ )
01051         hist[i][j] = (double *) calloc( n, sizeof( double ) );
01052     }
01053   
01054   cos_t = cos( ori );
01055   sin_t = sin( ori );
01056   bins_per_rad = n / PI2;
01057   exp_denom = d * d * 0.5;
01058   hist_width = SIFT_DESCR_SCL_FCTR * scl;
01059   radius = hist_width * sqrt(2) * ( d + 1.0 ) * 0.5 + 0.5;
01060   for( i = -radius; i <= radius; i++ )
01061     for( j = -radius; j <= radius; j++ )
01062       {
01063         /*
01064           Calculate sample's histogram array coords rotated relative to ori.
01065           Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e.
01066           r_rot = 1.5) have full weight placed in row 1 after interpolation.
01067         */
01068         c_rot = ( j * cos_t - i * sin_t ) / hist_width;
01069         r_rot = ( j * sin_t + i * cos_t ) / hist_width;
01070         rbin = r_rot + d / 2 - 0.5;
01071         cbin = c_rot + d / 2 - 0.5;
01072         
01073         if( rbin > -1.0  &&  rbin < d  &&  cbin > -1.0  &&  cbin < d )
01074           if( calc_grad_mag_ori( img, r + i, c + j, &grad_mag, &grad_ori ))
01075             {
01076               grad_ori -= ori;
01077               while( grad_ori < 0.0 )
01078                 grad_ori += PI2;
01079               while( grad_ori >= PI2 )
01080                 grad_ori -= PI2;
01081               
01082               obin = grad_ori * bins_per_rad;
01083               w = exp( -(c_rot * c_rot + r_rot * r_rot) / exp_denom );
01084               interp_hist_entry( hist, rbin, cbin, obin, grad_mag * w, d, n );
01085             }
01086       }
01087 
01088   return hist;
01089 }
01090 
01091 
01092 
01093 /*
01094   Interpolates an entry into the array of orientation histograms that form
01095   the feature descriptor.
01096 
01097   @param hist 2D array of orientation histograms
01098   @param rbin sub-bin row coordinate of entry
01099   @param cbin sub-bin column coordinate of entry
01100   @param obin sub-bin orientation coordinate of entry
01101   @param mag size of entry
01102   @param d width of 2D array of orientation histograms
01103   @param n number of bins per orientation histogram
01104 */
01105 void interp_hist_entry( double*** hist, double rbin, double cbin,
01106                         double obin, double mag, int d, int n )
01107 {
01108   double d_r, d_c, d_o, v_r, v_c, v_o;
01109   double** row, * h;
01110   int r0, c0, o0, rb, cb, ob, r, c, o;
01111 
01112   r0 = cvFloor( rbin );
01113   c0 = cvFloor( cbin );
01114   o0 = cvFloor( obin );
01115   d_r = rbin - r0;
01116   d_c = cbin - c0;
01117   d_o = obin - o0;
01118 
01119   /*
01120     The entry is distributed into up to 8 bins.  Each entry into a bin
01121     is multiplied by a weight of 1 - d for each dimension, where d is the
01122     distance from the center value of the bin measured in bin units.
01123   */
01124   for( r = 0; r <= 1; r++ )
01125     {
01126       rb = r0 + r;
01127       if( rb >= 0  &&  rb < d )
01128         {
01129           v_r = mag * ( ( r == 0 )? 1.0 - d_r : d_r );
01130           row = hist[rb];
01131           for( c = 0; c <= 1; c++ )
01132             {
01133               cb = c0 + c;
01134               if( cb >= 0  &&  cb < d )
01135                 {
01136                   v_c = v_r * ( ( c == 0 )? 1.0 - d_c : d_c );
01137                   h = row[cb];
01138                   for( o = 0; o <= 1; o++ )
01139                     {
01140                       ob = ( o0 + o ) % n;
01141                       v_o = v_c * ( ( o == 0 )? 1.0 - d_o : d_o );
01142                       h[ob] += v_o;
01143                     }
01144                 }
01145             }
01146         }
01147     }
01148 }
01149 
01150 
01151 
01152 /*
01153   Converts the 2D array of orientation histograms into a feature's descriptor
01154   vector.
01155   
01156   @param hist 2D array of orientation histograms
01157   @param d width of hist
01158   @param n bins per histogram
01159   @param feat feature into which to store descriptor
01160 */
01161 void hist_to_descr( double*** hist, int d, int n, struct feature* feat )
01162 {
01163   int int_val, i, r, c, o, k = 0;
01164 
01165   for( r = 0; r < d; r++ )
01166     for( c = 0; c < d; c++ )
01167       for( o = 0; o < n; o++ )
01168         feat->descr[k++] = hist[r][c][o];
01169 
01170   feat->d = k;
01171   normalize_descr( feat );
01172   for( i = 0; i < k; i++ )
01173     if( feat->descr[i] > SIFT_DESCR_MAG_THR )
01174       feat->descr[i] = SIFT_DESCR_MAG_THR;
01175   normalize_descr( feat );
01176 
01177   /* convert floating-point descriptor to integer valued descriptor */
01178   for( i = 0; i < k; i++ )
01179     {
01180       int_val = SIFT_INT_DESCR_FCTR * feat->descr[i];
01181       feat->descr[i] = MIN( 255, int_val );
01182     }
01183 }
01184 
01185 
01186 
01187 /*
01188   Normalizes a feature's descriptor vector to unitl length
01189 
01190   @param feat feature
01191 */
01192 void normalize_descr( struct feature* feat )
01193 {
01194   double cur, len_inv, len_sq = 0.0;
01195   int i, d = feat->d;
01196 
01197   for( i = 0; i < d; i++ )
01198     {
01199       cur = feat->descr[i];
01200       len_sq += cur*cur;
01201     }
01202   len_inv = 1.0 / sqrt( len_sq );
01203   for( i = 0; i < d; i++ )
01204     feat->descr[i] *= len_inv;
01205 }
01206 
01207 
01208 
01209 /*
01210   Compares features for a decreasing-scale ordering.  Intended for use with
01211   CvSeqSort
01212 
01213   @param feat1 first feature
01214   @param feat2 second feature
01215   @param param unused
01216 
01217   @return Returns 1 if feat1's scale is greater than feat2's, -1 if vice versa,
01218     and 0 if their scales are equal
01219 */
01220 int feature_cmp( void* feat1, void* feat2, void* param )
01221 {
01222   struct feature* f1 = (struct feature*) feat1;
01223   struct feature* f2 = (struct feature*) feat2;
01224 
01225   if( f1->scl < f2->scl )
01226     return 1;
01227   if( f1->scl > f2->scl )
01228     return -1;
01229   return 0;
01230 }
01231 
01232 
01233 
01234 /*
01235   De-allocates memory held by a descriptor histogram
01236 
01237   @param hist pointer to a 2D array of orientation histograms
01238   @param d width of hist
01239 */
01240 void release_descr_hist( double**** hist, int d )
01241 {
01242   int i, j;
01243 
01244   for( i = 0; i < d; i++)
01245     {
01246       for( j = 0; j < d; j++ )
01247         free( (*hist)[i][j] );
01248       free( (*hist)[i] );
01249     }
01250   free( *hist );
01251   *hist = NULL;
01252 }
01253 
01254 
01255 /*
01256   De-allocates memory held by a scale space pyramid
01257 
01258   @param pyr scale space pyramid
01259   @param octvs number of octaves of scale space
01260   @param n number of images per octave
01261 */
01262 void release_pyr( IplImage**** pyr, int octvs, int n )
01263 {
01264   int i, j;
01265   for( i = 0; i < octvs; i++ )
01266     {
01267       for( j = 0; j < n; j++ )
01268         cvReleaseImage( &(*pyr)[i][j] );
01269       free( (*pyr)[i] );
01270     }
01271   free( *pyr );
01272   *pyr = NULL;
01273 }



QVision framework. PARP research group, copyright 2007, 2008.