MVE - Multi-View Environment mve-devel
Loading...
Searching...
No Matches
sift.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 2015, Simon Fuhrmann
3 * TU Darmstadt - Graphics, Capture and Massively Parallel Computing
4 * All rights reserved.
5 *
6 * This software may be modified and distributed under the terms
7 * of the BSD 3-Clause license. See the LICENSE.txt file for details.
8 */
9
10#include <iostream>
11#include <fstream>
12#include <stdexcept>
13
14#include "util/timer.h"
15#include "math/functions.h"
16#include "math/matrix.h"
17#include "math/matrix_tools.h"
18#include "mve/image_io.h"
19#include "mve/image_tools.h"
20#include "sfm/sift.h"
21
23
24Sift::Sift (Options const& options)
25 : options(options)
26{
27 if (this->options.min_octave < -1
28 || this->options.min_octave > this->options.max_octave)
29 throw std::invalid_argument("Invalid octave range");
30
31 if (this->options.contrast_threshold < 0.0f)
32 this->options.contrast_threshold = 0.02f
33 / static_cast<float>(this->options.num_samples_per_octave);
34
35 if (this->options.debug_output)
36 this->options.verbose_output = true;
37}
38
39/* ---------------------------------------------------------------- */
40
41void
43{
44 util::ClockTimer timer, total_timer;
45
46 /*
47 * Creates the scale space representation of the image by
48 * sampling the scale space and computing the DoG images.
49 * See Section 3, 3.2 and 3.3 in SIFT article.
50 */
51 if (this->options.verbose_output)
52 {
53 std::cout << "SIFT: Creating "
54 << (this->options.max_octave - this->options.min_octave)
55 << " octaves (" << this->options.min_octave << " to "
56 << this->options.max_octave << ")..." << std::endl;
57 }
58 timer.reset();
59 this->create_octaves();
60 if (this->options.debug_output)
61 {
62 std::cout << "SIFT: Creating octaves took "
63 << timer.get_elapsed() << "ms." << std::endl;
64 }
65
66 /*
67 * Detects local extrema in the DoG function as described in Section 3.1.
68 */
69 if (this->options.debug_output)
70 {
71 std::cout << "SIFT: Detecting local extrema..." << std::endl;
72 }
73 timer.reset();
74 this->extrema_detection();
75 if (this->options.debug_output)
76 {
77 std::cout << "SIFT: Detected " << this->keypoints.size()
78 << " keypoints, took " << timer.get_elapsed() << "ms." << std::endl;
79 }
80
81 /*
82 * Accurate keypoint localization and filtering.
83 * According to Section 4 in SIFT article.
84 */
85 if (this->options.debug_output)
86 {
87 std::cout << "SIFT: Localizing and filtering keypoints..." << std::endl;
88 }
89 timer.reset();
91 if (this->options.debug_output)
92 {
93 std::cout << "SIFT: Retained " << this->keypoints.size() << " stable "
94 << "keypoints, took " << timer.get_elapsed() << "ms." << std::endl;
95 }
96
97 /*
98 * Difference of Gaussian images are not needed anymore.
99 */
100 for (std::size_t i = 0; i < this->octaves.size(); ++i)
101 this->octaves[i].dog.clear();
102
103 /*
104 * Generate the list of keypoint descriptors.
105 * See Section 5 and 6 in the SIFT article.
106 * This list can in general be larger than the amount of keypoints,
107 * since for each keypoint several descriptors may be created.
108 */
109 if (this->options.verbose_output)
110 {
111 std::cout << "SIFT: Generating keypoint descriptors..." << std::endl;
112 }
113 timer.reset();
114 this->descriptor_generation();
115 if (this->options.debug_output)
116 {
117 std::cout << "SIFT: Generated " << this->descriptors.size()
118 << " descriptors, took " << timer.get_elapsed() << "ms."
119 << std::endl;
120 }
121 if (this->options.verbose_output)
122 {
123 std::cout << "SIFT: Generated " << this->descriptors.size()
124 << " descriptors from " << this->keypoints.size() << " keypoints,"
125 << " took " << total_timer.get_elapsed() << "ms." << std::endl;
126 }
127
128 /* Free memory. */
129 this->octaves.clear();
130}
131
132/* ---------------------------------------------------------------- */
133
134void
136{
137 if (img->channels() != 1 && img->channels() != 3)
138 throw std::invalid_argument("Gray or color image expected");
139
140 this->orig = mve::image::byte_to_float_image(img);
141 if (img->channels() == 3)
142 {
143 this->orig = mve::image::desaturate<float>
144 (this->orig, mve::image::DESATURATE_AVERAGE);
145 }
146}
147
148/* ---------------------------------------------------------------- */
149
150void
152{
153 if (img->channels() != 1 && img->channels() != 3)
154 throw std::invalid_argument("Gray or color image expected");
155
156 if (img->channels() == 3)
157 {
158 this->orig = mve::image::desaturate<float>
160 }
161 else
162 {
163 this->orig = img->duplicate();
164 }
165}
166
167/* ---------------------------------------------------------------- */
168
169void
171{
172 this->octaves.clear();
173
174 /*
175 * Create octave -1. The original image is assumed to have blur
176 * sigma = 0.5. The double size image therefore has sigma = 1.
177 */
178 if (this->options.min_octave < 0)
179 {
180 //std::cout << "Creating octave -1..." << std::endl;
182 = mve::image::rescale_double_size_supersample<float>(this->orig);
183 this->add_octave(img, this->options.inherent_blur_sigma * 2.0f,
184 this->options.base_blur_sigma);
185 }
186
187 /*
188 * Prepare image for the first positive octave by downsampling.
189 * This code is executed only if min_octave > 0.
190 */
191 mve::FloatImage::ConstPtr img = this->orig;
192 for (int i = 0; i < this->options.min_octave; ++i)
193 img = mve::image::rescale_half_size_gaussian<float>(img);
194
195 /*
196 * Create new octave from 'img', then subsample octave image where
197 * sigma is doubled to get a new base image for the next octave.
198 */
199 float img_sigma = this->options.inherent_blur_sigma;
200 for (int i = std::max(0, this->options.min_octave);
201 i <= this->options.max_octave; ++i)
202 {
203 //std::cout << "Creating octave " << i << "..." << std::endl;
204 this->add_octave(img, img_sigma, this->options.base_blur_sigma);
205 img = mve::image::rescale_half_size_gaussian<float>(img);
206 img_sigma = this->options.base_blur_sigma;
207 }
208}
209
210/* ---------------------------------------------------------------- */
211
212void
214 float has_sigma, float target_sigma)
215{
216 /*
217 * First, bring the provided image to the target blur.
218 * Since L * g(sigma1) * g(sigma2) = L * g(sqrt(sigma1^2 + sigma2^2)),
219 * we need to blur with sigma = sqrt(target_sigma^2 - has_sigma^2).
220 */
221 float sigma = std::sqrt(MATH_POW2(target_sigma) - MATH_POW2(has_sigma));
222 //std::cout << "Pre-blurring image to sigma " << target_sigma << " (has "
223 // << has_sigma << ", blur = " << sigma << ")..." << std::endl;
224 mve::FloatImage::Ptr base = (target_sigma > has_sigma
225 ? mve::image::blur_gaussian<float>(image, sigma)
226 : image->duplicate());
227
228 /* Create the new octave and add initial image. */
229 this->octaves.push_back(Octave());
230 Octave& oct = this->octaves.back();
231 oct.img.push_back(base);
232
233 /* 'k' is the constant factor between the scales in scale space. */
234 float const k = std::pow(2.0f, 1.0f / this->options.num_samples_per_octave);
235 sigma = target_sigma;
236
237 /* Create other (s+2) samples of the octave to get a total of (s+3). */
238 for (int i = 1; i < this->options.num_samples_per_octave + 3; ++i)
239 {
240 /* Calculate the blur sigma the image will get. */
241 float sigmak = sigma * k;
242 float blur_sigma = std::sqrt(MATH_POW2(sigmak) - MATH_POW2(sigma));
243
244 /* Blur the image to create a new scale space sample. */
245 //std::cout << "Blurring image to sigma " << sigmak << " (has " << sigma
246 // << ", blur = " << blur_sigma << ")..." << std::endl;
247 mve::FloatImage::Ptr img = mve::image::blur_gaussian<float>
248 (base, blur_sigma);
249 oct.img.push_back(img);
250
251 /* Create the Difference of Gaussian image (DoG). */
252 mve::FloatImage::Ptr dog = mve::image::subtract<float>(img, base);
253 oct.dog.push_back(dog);
254
255 /* Update previous image and sigma for next round. */
256 base = img;
257 sigma = sigmak;
258 }
259}
260
261/* ---------------------------------------------------------------- */
262
263void
265{
266 /* Delete previous keypoints. */
267 this->keypoints.clear();
268
269 /* Detect keypoints in each octave... */
270 for (std::size_t i = 0; i < this->octaves.size(); ++i)
271 {
272 Octave const& oct(this->octaves[i]);
273 /* In each octave, take three subsequent DoG images and detect. */
274 for (int s = 0; s < (int)oct.dog.size() - 2; ++s)
275 {
276 mve::FloatImage::ConstPtr samples[3] =
277 { oct.dog[s + 0], oct.dog[s + 1], oct.dog[s + 2] };
278 this->extrema_detection(samples, static_cast<int>(i)
279 + this->options.min_octave, s);
280 }
281 }
282}
283
284/* ---------------------------------------------------------------- */
285
286std::size_t
288{
289 int const w = s[1]->width();
290 int const h = s[1]->height();
291
292 /* Offsets for the 9-neighborhood w.r.t. center pixel. */
293 int noff[9] = { -1 - w, 0 - w, 1 - w, -1, 0, 1, -1 + w, 0 + w, 1 + w };
294
295 /*
296 * Iterate over all pixels in s[1], and check if pixel is maximum
297 * (or minumum) in its 27-neighborhood.
298 */
299 int detected = 0;
300 int off = w;
301 for (int y = 1; y < h - 1; ++y, off += w)
302 for (int x = 1; x < w - 1; ++x)
303 {
304 int idx = off + x;
305
306 bool largest = true;
307 bool smallest = true;
308 float center_value = s[1]->at(idx);
309 for (int l = 0; (largest || smallest) && l < 3; ++l)
310 for (int i = 0; (largest || smallest) && i < 9; ++i)
311 {
312 if (l == 1 && i == 4) // Skip center pixel
313 continue;
314 if (s[l]->at(idx + noff[i]) >= center_value)
315 largest = false;
316 if (s[l]->at(idx + noff[i]) <= center_value)
317 smallest = false;
318 }
319
320 /* Skip non-maximum values. */
321 if (!smallest && !largest)
322 continue;
323
324 /* Yummy. Add detected scale space extremum. */
325 Keypoint kp;
326 kp.octave = oi;
327 kp.x = static_cast<float>(x);
328 kp.y = static_cast<float>(y);
329 kp.sample = static_cast<float>(si);
330 this->keypoints.push_back(kp);
331 detected += 1;
332 }
333
334 return detected;
335}
336
337/* ---------------------------------------------------------------- */
338
339void
341{
342 /*
343 * Iterate over all keypoints, accurately localize minima and maxima
344 * in the DoG function by fitting a quadratic Taylor polynomial
345 * around the keypoint.
346 */
347
348 int num_singular = 0;
349 int num_keypoints = 0; // Write iterator
350 for (std::size_t i = 0; i < this->keypoints.size(); ++i)
351 {
352 /* Copy keypoint. */
353 Keypoint kp(this->keypoints[i]);
354
355 /* Get corresponding octave and DoG images. */
356 Octave const& oct(this->octaves[kp.octave - this->options.min_octave]);
357 int sample = static_cast<int>(kp.sample);
359 { oct.dog[sample + 0], oct.dog[sample + 1], oct.dog[sample + 2] };
360
361 /* Shorthand for image width and height. */
362 int const w = dogs[0]->width();
363 int const h = dogs[0]->height();
364 /* The integer and floating point location of the keypoints. */
365 int ix = static_cast<int>(kp.x);
366 int iy = static_cast<int>(kp.y);
367 int is = static_cast<int>(kp.sample);
368 float fx, fy, fs;
369 /* The first and second order derivatives. */
370 float Dx, Dy, Ds;
371 float Dxx, Dyy, Dss;
372 float Dxy, Dxs, Dys;
373
374 /*
375 * Locate the keypoint using second order Taylor approximation.
376 * The procedure might get iterated around a neighboring pixel if
377 * the accurate keypoint is off by >0.6 from the center pixel.
378 */
379# define AT(S,OFF) (dogs[S]->at(px + OFF))
380 for (int j = 0; j < 5; ++j)
381 {
382 std::size_t px = iy * w + ix;
383
384 /* Compute first and second derivatives. */
385 Dx = (AT(1,1) - AT(1,-1)) * 0.5f;
386 Dy = (AT(1,w) - AT(1,-w)) * 0.5f;
387 Ds = (AT(2,0) - AT(0,0)) * 0.5f;
388
389 Dxx = AT(1,1) + AT(1,-1) - 2.0f * AT(1,0);
390 Dyy = AT(1,w) + AT(1,-w) - 2.0f * AT(1,0);
391 Dss = AT(2,0) + AT(0,0) - 2.0f * AT(1,0);
392
393 Dxy = (AT(1,1+w) + AT(1,-1-w) - AT(1,-1+w) - AT(1,1-w)) * 0.25f;
394 Dxs = (AT(2,1) + AT(0,-1) - AT(2,-1) - AT(0,1)) * 0.25f;
395 Dys = (AT(2,w) + AT(0,-w) - AT(2,-w) - AT(0,w)) * 0.25f;
396
397 /* Setup the Hessian matrix. */
399 A[0] = Dxx; A[1] = Dxy; A[2] = Dxs;
400 A[3] = Dxy; A[4] = Dyy; A[5] = Dys;
401 A[6] = Dxs; A[7] = Dys; A[8] = Dss;
402
403 /* Compute determinant to detect singular matrix. */
404 float detA = math::matrix_determinant(A);
405 if (MATH_EPSILON_EQ(detA, 0.0f, 1e-15f))
406 {
407 num_singular += 1;
408 fx = fy = fs = 0.0f; // FIXME: Handle this case?
409 break;
410 }
411
412 /* Invert the matrix to get the accurate keypoint. */
413 A = math::matrix_inverse(A, detA);
414 math::Vec3f b(-Dx, -Dy, -Ds);
415 b = A * b;
416 fx = b[0]; fy = b[1]; fs = b[2];
417
418 /* Check if accurate location is far away from pixel center. */
419 int dx = (fx > 0.6f && ix < w-2) * 1 + (fx < -0.6f && ix > 1) * -1;
420 int dy = (fy > 0.6f && iy < h-2) * 1 + (fy < -0.6f && iy > 1) * -1;
421
422 /* If the accurate location is closer to another pixel,
423 * repeat localization around the other pixel. */
424 if (dx != 0 || dy != 0)
425 {
426 ix += dx;
427 iy += dy;
428 continue;
429 }
430
431 /* Accurate location looks good. */
432 break;
433 }
434
435 /* Calcualte function value D(x) at accurate keypoint x. */
436 float val = dogs[1]->at(ix, iy, 0) + 0.5f * (Dx * fx + Dy * fy + Ds * fs);
437
438 /* Calcualte edge response score Tr(H)^2 / Det(H), see Section 4.1. */
439 float hessian_trace = Dxx + Dyy;
440 float hessian_det = Dxx * Dyy - MATH_POW2(Dxy);
441 float hessian_score = MATH_POW2(hessian_trace) / hessian_det;
442 float score_thres = MATH_POW2(this->options.edge_ratio_threshold + 1.0f)
443 / this->options.edge_ratio_threshold;
444
445 /*
446 * Set accurate final keypoint location.
447 */
448 kp.x = (float)ix + fx;
449 kp.y = (float)iy + fy;
450 kp.sample = (float)is + fs;
451
452 /*
453 * Discard keypoints with:
454 * 1. low contrast (value of DoG function at keypoint),
455 * 2. negative hessian determinant (curvatures with different sign),
456 * Note that negative score implies negative determinant.
457 * 3. large edge response (large hessian score),
458 * 4. unstable keypoint accurate locations,
459 * 5. keypoints beyond the scale space boundary.
460 */
461 if (std::abs(val) < this->options.contrast_threshold
462 || hessian_score < 0.0f || hessian_score > score_thres
463 || std::abs(fx) > 1.5f || std::abs(fy) > 1.5f || std::abs(fs) > 1.0f
464 || kp.sample < -1.0f
465 || kp.sample > (float)this->options.num_samples_per_octave
466 || kp.x < 0.0f || kp.x > (float)(w - 1)
467 || kp.y < 0.0f || kp.y > (float)(h - 1))
468 {
469 //std::cout << " REJECTED!" << std::endl;
470 continue;
471 }
472
473 /* Keypoint is accepted, copy to write iter and advance. */
474 this->keypoints[num_keypoints] = kp;
475 num_keypoints += 1;
476 }
477
478 /* Limit vector size to number of accepted keypoints. */
479 this->keypoints.resize(num_keypoints);
480
481 if (this->options.debug_output && num_singular > 0)
482 {
483 std::cout << "SIFT: Warning: " << num_singular
484 << " singular matrices detected!" << std::endl;
485 }
486}
487
488/* ---------------------------------------------------------------- */
489
490void
492{
493 if (this->octaves.empty())
494 throw std::runtime_error("Octaves not available!");
495 if (this->keypoints.empty())
496 return;
497
498 this->descriptors.clear();
499 this->descriptors.reserve(this->keypoints.size() * 3 / 2);
500
501 /*
502 * Keep a buffer of S+3 gradient and orientation images for the current
503 * octave. Once the octave is changed, these images are recomputed.
504 * To ensure efficiency, the octave index must always increase, never
505 * decrease, which is enforced during the algorithm.
506 */
507 int octave_index = this->keypoints[0].octave;
508 Octave* octave = &this->octaves[octave_index - this->options.min_octave];
509 this->generate_grad_ori_images(octave);
510
511 /* Walk over all keypoints and compute descriptors. */
512 for (std::size_t i = 0; i < this->keypoints.size(); ++i)
513 {
514 Keypoint const& kp(this->keypoints[i]);
515
516 /* Generate new gradient and orientation images if octave changed. */
517 if (kp.octave > octave_index)
518 {
519 /* Clear old octave gradient and orientation images. */
520 if (octave)
521 {
522 octave->grad.clear();
523 octave->ori.clear();
524 }
525 /* Setup new octave gradient and orientation images. */
526 octave_index = kp.octave;
527 octave = &this->octaves[octave_index - this->options.min_octave];
528 this->generate_grad_ori_images(octave);
529 }
530 else if (kp.octave < octave_index)
531 {
532 throw std::runtime_error("Decreasing octave index!");
533 }
534
535 /* Orientation assignment. This returns multiple orientations. */
536 std::vector<float> orientations;
537 orientations.reserve(8);
538 this->orientation_assignment(kp, octave, orientations);
539
540 /* Feature vector extraction. */
541 for (std::size_t j = 0; j < orientations.size(); ++j)
542 {
543 Descriptor desc;
544 float const scale_factor = std::pow(2.0f, kp.octave);
545 desc.x = scale_factor * (kp.x + 0.5f) - 0.5f;
546 desc.y = scale_factor * (kp.y + 0.5f) - 0.5f;
547 desc.scale = this->keypoint_absolute_scale(kp);
548 desc.orientation = orientations[j];
549 if (this->descriptor_assignment(kp, desc, octave))
550 this->descriptors.push_back(desc);
551 }
552 }
553}
554
555/* ---------------------------------------------------------------- */
556
557void
559{
560 octave->grad.clear();
561 octave->grad.reserve(octave->img.size());
562 octave->ori.clear();
563 octave->ori.reserve(octave->img.size());
564
565 int const width = octave->img[0]->width();
566 int const height = octave->img[0]->height();
567
568 //std::cout << "Generating gradient and orientation images..." << std::endl;
569 for (std::size_t i = 0; i < octave->img.size(); ++i)
570 {
571 mve::FloatImage::ConstPtr img = octave->img[i];
572 mve::FloatImage::Ptr grad = mve::FloatImage::create(width, height, 1);
573 mve::FloatImage::Ptr ori = mve::FloatImage::create(width, height, 1);
574
575 int image_iter = width + 1;
576 for (int y = 1; y < height - 1; ++y, image_iter += 2)
577 for (int x = 1; x < width - 1; ++x, ++image_iter)
578 {
579 float m1x = img->at(image_iter - 1);
580 float p1x = img->at(image_iter + 1);
581 float m1y = img->at(image_iter - width);
582 float p1y = img->at(image_iter + width);
583 float dx = 0.5f * (p1x - m1x);
584 float dy = 0.5f * (p1y - m1y);
585
586 float atan2f = std::atan2(dy, dx);
587 grad->at(image_iter) = std::sqrt(dx * dx + dy * dy);
588 ori->at(image_iter) = atan2f < 0.0f
589 ? atan2f + MATH_PI * 2.0f : atan2f;
590 }
591 octave->grad.push_back(grad);
592 octave->ori.push_back(ori);
593 }
594}
595
596/* ---------------------------------------------------------------- */
597
598void
600 Octave const* octave, std::vector<float>& orientations)
601{
602 int const nbins = 36;
603 float const nbinsf = static_cast<float>(nbins);
604
605 /* Prepare 36-bin histogram. */
606 float hist[nbins];
607 std::fill(hist, hist + nbins, 0.0f);
608
609 /* Integral x and y coordinates and closest scale sample. */
610 int const ix = static_cast<int>(kp.x + 0.5f);
611 int const iy = static_cast<int>(kp.y + 0.5f);
612 int const is = static_cast<int>(math::round(kp.sample));
613 float const sigma = this->keypoint_relative_scale(kp);
614
615 /* Images with its dimension for the keypoint. */
616 mve::FloatImage::ConstPtr grad(octave->grad[is + 1]);
617 mve::FloatImage::ConstPtr ori(octave->ori[is + 1]);
618 int const width = grad->width();
619 int const height = grad->height();
620
621 /*
622 * Compute window size 'win', the full window has 2 * win + 1 pixel.
623 * The factor 3 makes the window large enough such that the gaussian
624 * has very little weight beyond the window. The value 1.5 is from
625 * the SIFT paper. If the window goes beyond the image boundaries,
626 * the keypoint is discarded.
627 */
628 float const sigma_factor = 1.5f;
629 int win = static_cast<int>(sigma * sigma_factor * 3.0f);
630 if (ix < win || ix + win >= width || iy < win || iy + win >= height)
631 return;
632
633 /* Center of keypoint index. */
634 int center = iy * width + ix;
635 float const dxf = kp.x - static_cast<float>(ix);
636 float const dyf = kp.y - static_cast<float>(iy);
637 float const maxdist = static_cast<float>(win*win) + 0.5f;
638
639 /* Populate histogram over window, intersected with (1,1), (w-2,h-2). */
640 for (int dy = -win; dy <= win; ++dy)
641 {
642 int const yoff = dy * width;
643 for (int dx = -win; dx <= win; ++dx)
644 {
645 /* Limit to circular window (centered at accurate keypoint). */
646 float const dist = MATH_POW2(dx-dxf) + MATH_POW2(dy-dyf);
647 if (dist > maxdist)
648 continue;
649
650 float gm = grad->at(center + yoff + dx); // gradient magnitude
651 float go = ori->at(center + yoff + dx); // gradient orientation
652 float weight = math::gaussian_xx(dist, sigma * sigma_factor);
653 int bin = static_cast<int>(nbinsf * go / (2.0f * MATH_PI));
654 bin = math::clamp(bin, 0, nbins - 1);
655 hist[bin] += gm * weight;
656 }
657 }
658
659 /* Smooth histogram. */
660 for (int i = 0; i < 6; ++i)
661 {
662 float first = hist[0];
663 float prev = hist[nbins - 1];
664 for (int j = 0; j < nbins - 1; ++j)
665 {
666 float current = hist[j];
667 hist[j] = (prev + current + hist[j + 1]) / 3.0f;
668 prev = current;
669 }
670 hist[nbins - 1] = (prev + hist[nbins - 1] + first) / 3.0f;
671 }
672
673 /* Find maximum element. */
674 float maxh = *std::max_element(hist, hist + nbins);
675
676 /* Find peaks within 80% of max element. */
677 for (int i = 0; i < nbins; ++i)
678 {
679 float h0 = hist[(i + nbins - 1) % nbins];
680 float h1 = hist[i];
681 float h2 = hist[(i + 1) % nbins];
682
683 /* These peaks must be a local maximum! */
684 if (h1 <= 0.8f * maxh || h1 <= h0 || h1 <= h2)
685 continue;
686
687 /*
688 * Quadratic interpolation to find accurate maximum.
689 * f(x) = ax^2 + bx + c, f(-1) = h0, f(0) = h1, f(1) = h2
690 * --> a = 1/2 (h0 - 2h1 + h2), b = 1/2 (h2 - h0), c = h1.
691 * x = f'(x) = 2ax + b = 0 --> x = -1/2 * (h2 - h0) / (h0 - 2h1 + h2)
692 */
693 float x = -0.5f * (h2 - h0) / (h0 - 2.0f * h1 + h2);
694 float o = 2.0f * MATH_PI * (x + (float)i + 0.5f) / nbinsf;
695 orientations.push_back(o);
696 }
697}
698
699/* ---------------------------------------------------------------- */
700
701bool
703 Octave const* octave)
704{
705 /*
706 * The final feature vector has size PXB * PXB * OHB.
707 * The following constants should not be changed yet, as the
708 * (PXB^2 * OHB = 128) element feature vector is still hard-coded.
709 */
710 //int const PIX = 16; // Descriptor region with 16x16 pixel
711 int const PXB = 4; // Pixel bins with 4x4 bins
712 int const OHB = 8; // Orientation histogram with 8 bins
713
714 /* Integral x and y coordinates and closest scale sample. */
715 int const ix = static_cast<int>(kp.x + 0.5f);
716 int const iy = static_cast<int>(kp.y + 0.5f);
717 int const is = static_cast<int>(math::round(kp.sample));
718 float const dxf = kp.x - static_cast<float>(ix);
719 float const dyf = kp.y - static_cast<float>(iy);
720 float const sigma = this->keypoint_relative_scale(kp);
721
722 /* Images with its dimension for the keypoint. */
723 mve::FloatImage::ConstPtr grad(octave->grad[is + 1]);
724 mve::FloatImage::ConstPtr ori(octave->ori[is + 1]);
725 int const width = grad->width();
726 int const height = grad->height();
727
728 /* Clear feature vector. */
729 desc.data.fill(0.0f);
730
731 /* Rotation constants given by descriptor orientation. */
732 float const sino = std::sin(desc.orientation);
733 float const coso = std::cos(desc.orientation);
734
735 /*
736 * Compute window size.
737 * Each spacial bin has an extension of 3 * sigma (sigma is the scale
738 * of the keypoint). For interpolation we need another half bin at
739 * both ends in each dimension. And since the window can be arbitrarily
740 * rotated, we need to multiply with sqrt(2). The window size is:
741 * 2W = sqrt(2) * 3 * sigma * (PXB + 1).
742 */
743 float const binsize = 3.0f * sigma;
744 int win = MATH_SQRT2 * binsize * (float)(PXB + 1) * 0.5f;
745 if (ix < win || ix + win >= width || iy < win || iy + win >= height)
746 return false;
747
748 /*
749 * Iterate over the window, intersected with the image region
750 * from (1,1) to (w-2, h-2) since gradients/orientations are
751 * not defined at the boundary pixels. Add all samples to the
752 * corresponding bin.
753 */
754 int const center = iy * width + ix; // Center pixel at KP location
755 for (int dy = -win; dy <= win; ++dy)
756 {
757 int const yoff = dy * width;
758 for (int dx = -win; dx <= win; ++dx)
759 {
760 /* Get pixel gradient magnitude and orientation. */
761 float const mod = grad->at(center + yoff + dx);
762 float const angle = ori->at(center + yoff + dx);
763 float theta = angle - desc.orientation;
764 if (theta < 0.0f)
765 theta += 2.0f * MATH_PI;
766
767 /* Compute fractional coordinates w.r.t. the window. */
768 float const winx = (float)dx - dxf;
769 float const winy = (float)dy - dyf;
770
771 /*
772 * Compute normalized coordinates w.r.t. bins. The window
773 * coordinates are rotated around the keypoint. The bins are
774 * chosen such that 0 is the coordinate of the first bins center
775 * in each dimension. In other words, (0,0,0) is the coordinate
776 * of the first bin center in the three dimensional histogram.
777 */
778 float binoff = (float)(PXB - 1) / 2.0f;
779 float binx = (coso * winx + sino * winy) / binsize + binoff;
780 float biny = (-sino * winx + coso * winy) / binsize + binoff;
781 float bint = theta * (float)OHB / (2.0f * MATH_PI) - 0.5f;
782
783 /* Compute circular window weight for the sample. */
784 float gaussian_sigma = 0.5f * (float)PXB;
785 float gaussian_weight = math::gaussian_xx
786 (MATH_POW2(binx - binoff) + MATH_POW2(biny - binoff),
787 gaussian_sigma);
788
789 /* Total contribution of the sample in the histogram is now: */
790 float contrib = mod * gaussian_weight;
791
792 /*
793 * Distribute values into bins (using trilinear interpolation).
794 * Each sample is inserted into 8 bins. Some of these bins may
795 * not exist, because the sample is outside the keypoint window.
796 */
797 int bxi[2] = { (int)std::floor(binx), (int)std::floor(binx) + 1 };
798 int byi[2] = { (int)std::floor(biny), (int)std::floor(biny) + 1 };
799 int bti[2] = { (int)std::floor(bint), (int)std::floor(bint) + 1 };
800
801 float weights[3][2] = {
802 { (float)bxi[1] - binx, 1.0f - ((float)bxi[1] - binx) },
803 { (float)byi[1] - biny, 1.0f - ((float)byi[1] - biny) },
804 { (float)bti[1] - bint, 1.0f - ((float)bti[1] - bint) }
805 };
806
807 // Wrap around orientation histogram
808 if (bti[0] < 0)
809 bti[0] += OHB;
810 if (bti[1] >= OHB)
811 bti[1] -= OHB;
812
813 /* Iterate the 8 bins and add weighted contrib to each. */
814 int const xstride = OHB;
815 int const ystride = OHB * PXB;
816 for (int y = 0; y < 2; ++y)
817 for (int x = 0; x < 2; ++x)
818 for (int t = 0; t < 2; ++t)
819 {
820 if (bxi[x] < 0 || bxi[x] >= PXB
821 || byi[y] < 0 || byi[y] >= PXB)
822 continue;
823
824 int idx = bti[t] + bxi[x] * xstride + byi[y] * ystride;
825 desc.data[idx] += contrib * weights[0][x]
826 * weights[1][y] * weights[2][t];
827 }
828 }
829 }
830
831 /* Normalize the feature vector. */
832 desc.data.normalize();
833
834 /* Truncate descriptor values to 0.2. */
835 for (int i = 0; i < PXB * PXB * OHB; ++i)
836 desc.data[i] = std::min(desc.data[i], 0.2f);
837
838 /* Normalize once again. */
839 desc.data.normalize();
840
841 return true;
842}
843
844/* ---------------------------------------------------------------- */
845
846/*
847 * The scale of a keypoint is: scale = sigma0 * 2^(octave + (s+1)/S).
848 * sigma0 is the initial blur (1.6), octave the octave index of the
849 * keypoint (-1, 0, 1, ...) and scale space sample s in [-1,S+1] where
850 * S is the amount of samples per octave. Since the initial blur 1.6
851 * corresponds to scale space sample -1, we add 1 to the scale index.
852 */
853
854float
856{
857 return this->options.base_blur_sigma * std::pow(2.0f,
858 (kp.sample + 1.0f) / this->options.num_samples_per_octave);
859}
860
861float
863{
864 return this->options.base_blur_sigma * std::pow(2.0f,
865 kp.octave + (kp.sample + 1.0f) / this->options.num_samples_per_octave);
866}
867
868/* ---------------------------------------------------------------- */
869
870void
871Sift::load_lowe_descriptors (std::string const& filename, Descriptors* result)
872{
873 std::ifstream in(filename.c_str());
874 if (!in.good())
875 throw std::runtime_error("Cannot open descriptor file");
876
877 int num_descriptors;
878 int num_dimensions;
879 in >> num_descriptors >> num_dimensions;
880 if (num_descriptors > 100000 || num_dimensions != 128)
881 {
882 in.close();
883 throw std::runtime_error("Invalid number of descriptors/dimensions");
884 }
885 result->clear();
886 result->reserve(num_descriptors);
887 for (int i = 0; i < num_descriptors; ++i)
888 {
889 Sift::Descriptor descriptor;
890 in >> descriptor.y >> descriptor.x
891 >> descriptor.scale >> descriptor.orientation;
892 for (int j = 0; j < 128; ++j)
893 in >> descriptor.data[j];
894 descriptor.data.normalize();
895 result->push_back(descriptor);
896 }
897
898 if (!in.good())
899 {
900 result->clear();
901 in.close();
902 throw std::runtime_error("Error while reading descriptors");
903 }
904
905 in.close();
906}
907
Matrix class for arbitrary dimensions and types.
Definition matrix.h:54
Vector class for arbitrary dimensions and types.
Definition vector.h:87
Vector< T, N > & fill(T const &value)
Fills all vector elements with the given value.
Definition vector.h:381
Vector< T, N > & normalize(void)
Normalizes self and returns reference to self.
Definition vector.h:448
std::shared_ptr< Image< T > > Ptr
Definition image.h:42
std::shared_ptr< Image< T > const > ConstPtr
Definition image.h:43
static Ptr create(void)
Smart pointer image constructor.
Definition image.h:191
void orientation_assignment(Keypoint const &kp, Octave const *octave, std::vector< float > &orientations)
Definition sift.cc:599
void set_image(mve::ByteImage::ConstPtr img)
Sets the input image.
Definition sift.cc:135
void keypoint_localization(void)
Definition sift.cc:340
void descriptor_generation(void)
Definition sift.cc:491
void process(void)
Starts the SIFT keypoint detection and descriptor extraction.
Definition sift.cc:42
float keypoint_absolute_scale(Keypoint const &kp)
Definition sift.cc:862
void extrema_detection(void)
Definition sift.cc:264
void set_float_image(mve::FloatImage::ConstPtr img)
Sets the input image.
Definition sift.cc:151
static void load_lowe_descriptors(std::string const &filename, Descriptors *result)
Helper function that creates SIFT descriptors from David Lowe's SIFT descriptor files.
Definition sift.cc:871
void create_octaves(void)
Definition sift.cc:170
std::vector< Descriptor > Descriptors
Definition sift.h:153
void generate_grad_ori_images(Octave *octave)
Definition sift.cc:558
bool descriptor_assignment(Keypoint const &kp, Descriptor &desc, Octave const *octave)
Definition sift.cc:702
void add_octave(mve::FloatImage::ConstPtr image, float has_sigma, float target_sigma)
Definition sift.cc:213
float keypoint_relative_scale(Keypoint const &kp)
Definition sift.cc:855
Simple timer class to take execution times.
Definition timer.h:64
std::size_t get_elapsed(void) const
Definition timer.h:139
void reset(void)
Definition timer.h:115
#define MATH_EPSILON_EQ(x, v, eps)
Definition defines.h:96
#define MATH_SQRT2
Definition defines.h:49
#define MATH_PI
Definition defines.h:42
#define MATH_POW2(x)
Definition defines.h:68
std::size_t first
Definition mesh_info.cc:46
T const & clamp(T const &v, T const &min=T(0), T const &max=T(1))
Returns value 'v' clamped to the interval specified by 'min' and 'max'.
Definition functions.h:204
T round(T const &x)
Removes the fractional part of the value to the closest integer.
Definition functions.h:70
Matrix< T, N, N > matrix_inverse(Matrix< T, N, N > const &mat)
Calculates the inverse of the given matrix.
T matrix_determinant(Matrix< T, N, N > const &mat)
Calculates the determinant of the given matrix.
T gaussian_xx(T const &xx, T const &sigma)
Gaussian function that expects x to be squared.
Definition functions.h:49
FloatImage::Ptr byte_to_float_image(ByteImage::ConstPtr image)
Converts a given byte image to a float image.
@ DESATURATE_AVERAGE
(R + G + B) * 1/3
#define SFM_NAMESPACE_END
Definition defines.h:14
#define SFM_NAMESPACE_BEGIN
Definition defines.h:13
#define AT(S, OFF)
Representation of the SIFT descriptor.
Definition sift.h:138
math::Vector< float, 128 > data
The descriptor data, elements are unsigned in [0.0, 1.0].
Definition sift.h:148
float scale
The scale (or sigma value) of the keypoint.
Definition sift.h:144
float orientation
The orientation of the image keypoint in [0, 2PI].
Definition sift.h:146
float x
The sub-pixel x-coordinate of the image keypoint.
Definition sift.h:140
float y
The sub-pixel y-coordinate of the image keypoint.
Definition sift.h:142
Representation of a SIFT keypoint.
Definition sift.h:121
float x
Keypoint x-coordinate.
Definition sift.h:127
float y
Keypoint y-coordinate.
Definition sift.h:129
int octave
Octave index of the keypoint.
Definition sift.h:123
float sample
Sample index.
Definition sift.h:125
Representation of a SIFT octave.
Definition sift.h:184
ImageVector grad
S+3 gradient images.
Definition sift.h:188
ImageVector dog
S+2 difference of gaussian images.
Definition sift.h:187
ImageVector ori
S+3 orientation images.
Definition sift.h:189
ImageVector img
S+3 images per octave.
Definition sift.h:186
SIFT options.
Definition sift.h:49
float edge_ratio_threshold
Sets the edge threshold to eliminate edge responses.
Definition sift.h:84
bool debug_output
Produce even more messages on the console.
Definition sift.h:108
float base_blur_sigma
Sets the amount of desired base blur before constructing the octaves.
Definition sift.h:92
bool verbose_output
Produce status messages on the console.
Definition sift.h:103
float inherent_blur_sigma
Sets the inherent blur sigma in the input image.
Definition sift.h:98
float contrast_threshold
Sets contrast threshold, i.e.
Definition sift.h:77
int max_octave
Sets the maximum octave.
Definition sift.h:70
int num_samples_per_octave
Sets the amount of samples per octave.
Definition sift.h:56
int min_octave
Sets the minimum octave ID.
Definition sift.h:64