From 60ad0d3344abf69ba19af7fc6d30e0cbd399cffc Mon Sep 17 00:00:00 2001 From: NovokshanovE Date: Wed, 18 Mar 2026 13:28:50 +0300 Subject: [PATCH] Solution 1 --- src/phg/matching/descriptor_matcher.cpp | 70 +++++++++++++- src/phg/matching/flann_matcher.cpp | 33 ++++++- src/phg/sfm/homography.cpp | 106 ++++++++++++++++++++- src/phg/sfm/panorama_stitcher.cpp | 34 ++++++- src/phg/sift/sift.cpp | 120 +++++++++++++++++++++++- tests/test_matching.cpp | 4 +- 6 files changed, 357 insertions(+), 10 deletions(-) diff --git a/src/phg/matching/descriptor_matcher.cpp b/src/phg/matching/descriptor_matcher.cpp index f4bcd871..352a8b7b 100644 --- a/src/phg/matching/descriptor_matcher.cpp +++ b/src/phg/matching/descriptor_matcher.cpp @@ -1,5 +1,6 @@ #include "descriptor_matcher.h" +#include #include #include "flann_factory.h" @@ -7,8 +8,15 @@ void phg::DescriptorMatcher::filterMatchesRatioTest(const std::vector &filtered_matches) { filtered_matches.clear(); - - throw std::runtime_error("not implemented yet"); + const float ratio = 0.7f; + for (const auto &m : matches) { + if (m.size() < 2) { + continue; + } + if (m[0].distance < ratio * m[1].distance) { + filtered_matches.push_back(m[0]); + } + } } @@ -73,4 +81,62 @@ void phg::DescriptorMatcher::filterMatchesClusters(const std::vector // // метч остается, если левое и правое множества первых total_neighbors соседей в радиусах поиска(radius2_query, radius2_train) имеют как минимум consistent_matches общих элементов // // TODO заполнить filtered_matches + std::shared_ptr index_params = flannKdTreeIndexParams(1); + std::shared_ptr search_params = flannKsTreeSearchParams(32); + + std::shared_ptr index_query = flannKdTreeIndex(points_query, index_params); + std::shared_ptr index_train = flannKdTreeIndex(points_train, index_params); + + cv::Mat indices_query(n_matches, total_neighbours, CV_32SC1); + cv::Mat distances2_query(n_matches, total_neighbours, CV_32FC1); + cv::Mat indices_train(n_matches, total_neighbours, CV_32SC1); + cv::Mat distances2_train(n_matches, total_neighbours, CV_32FC1); + + index_query->knnSearch(points_query, indices_query, distances2_query, total_neighbours, *search_params); + index_train->knnSearch(points_train, indices_train, distances2_train, total_neighbours, *search_params); + + float radius2_query, radius2_train; + { + std::vector max_dists2_query(n_matches); + std::vector max_dists2_train(n_matches); + for (int i = 0; i < n_matches; ++i) { + max_dists2_query[i] = distances2_query.at(i, total_neighbours - 1); + max_dists2_train[i] = distances2_train.at(i, total_neighbours - 1); + } + + int median_pos = n_matches / 2; + std::nth_element(max_dists2_query.begin(), max_dists2_query.begin() + median_pos, max_dists2_query.end()); + std::nth_element(max_dists2_train.begin(), max_dists2_train.begin() + median_pos, max_dists2_train.end()); + + radius2_query = max_dists2_query[median_pos] * radius_limit_scale * radius_limit_scale; + radius2_train = max_dists2_train[median_pos] * radius_limit_scale * radius_limit_scale; + } + + for (int i = 0; i < n_matches; ++i) { + std::vector neigh_q; + std::vector neigh_t; + + neigh_q.reserve(total_neighbours); + neigh_t.reserve(total_neighbours); + + for (size_t j = 0; j < total_neighbours; ++j) { + if (distances2_query.at(i, (int)j) <= radius2_query) { + neigh_q.push_back(indices_query.at(i, (int)j)); + } + if (distances2_train.at(i, (int)j) <= radius2_train) { + neigh_t.push_back(indices_train.at(i, (int)j)); + } + } + + int common = 0; + for (int vq : neigh_q) { + if (std::find(neigh_t.begin(), neigh_t.end(), vq) != neigh_t.end()) { + ++common; + } + } + + if (common >= (int)consistent_matches) { + filtered_matches.push_back(matches[i]); + } + } } diff --git a/src/phg/matching/flann_matcher.cpp b/src/phg/matching/flann_matcher.cpp index 9e9f5180..775ec773 100644 --- a/src/phg/matching/flann_matcher.cpp +++ b/src/phg/matching/flann_matcher.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include "flann_matcher.h" #include "flann_factory.h" @@ -8,6 +10,8 @@ phg::FlannMatcher::FlannMatcher() // параметры для приближенного поиска // index_params = flannKdTreeIndexParams(TODO); // search_params = flannKsTreeSearchParams(TODO); + index_params = flannKdTreeIndexParams(4); + search_params = flannKsTreeSearchParams(32); } void phg::FlannMatcher::train(const cv::Mat &train_desc) @@ -17,5 +21,32 @@ void phg::FlannMatcher::train(const cv::Mat &train_desc) void phg::FlannMatcher::knnMatch(const cv::Mat &query_desc, std::vector> &matches, int k) const { - throw std::runtime_error("not implemented yet"); + if (!flann_index) { + throw std::runtime_error("FlannMatcher:: knnMatch : matcher is not trained"); + } + if (k <= 0) { + throw std::runtime_error("FlannMatcher:: knnMatch : k must be > 0"); + } + if (query_desc.empty()) { + matches.clear(); + return; + } + + cv::Mat indices(query_desc.rows, k, CV_32SC1); + cv::Mat distances2(query_desc.rows, k, CV_32FC1); + flann_index->knnSearch(query_desc, indices, distances2, k, *search_params); + + matches.resize(query_desc.rows); + for (int i = 0; i < query_desc.rows; ++i) { + matches[i].clear(); + matches[i].reserve(k); + for (int j = 0; j < k; ++j) { + cv::DMatch m; + m.queryIdx = i; + m.trainIdx = indices.at(i, j); + m.imgIdx = 0; + m.distance = std::sqrt(std::max(0.f, distances2.at(i, j))); + matches[i].push_back(m); + } + } } diff --git a/src/phg/sfm/homography.cpp b/src/phg/sfm/homography.cpp index 5cbc780c..5307190e 100644 --- a/src/phg/sfm/homography.cpp +++ b/src/phg/sfm/homography.cpp @@ -2,6 +2,9 @@ #include #include +#include +#include +#include namespace { @@ -86,6 +89,8 @@ namespace { // 8 elements of matrix + free term as needed by gauss routine // A.push_back({TODO}); // A.push_back({TODO}); + A.push_back({x0, y0, w0, 0.0, 0.0, 0.0, -x1 * x0, -x1 * y0, x1 * w0}); + A.push_back({0.0, 0.0, 0.0, x0, y0, w0, -y1 * x0, -y1 * y0, y1 * w0}); } int res = gauss(A, H); @@ -219,6 +224,83 @@ namespace { // } // // return best_H; + const int n_matches = points_lhs.size(); + if (n_matches < 4) { + throw std::runtime_error("estimateHomographyRANSAC : too few points"); + } + + const int n_trials = 1000; + + const int n_samples = 4; + uint64_t seed = 1; + const double reprojection_error_threshold_px = 2; + + int best_support = 0; + cv::Mat best_H; + + std::vector sample; + for (int i_trial = 0; i_trial < n_trials; ++i_trial) { + randomSample(sample, n_matches, n_samples, &seed); + + cv::Mat H; + try { + H = estimateHomography4Points(points_lhs[sample[0]], points_lhs[sample[1]], points_lhs[sample[2]], points_lhs[sample[3]], + points_rhs[sample[0]], points_rhs[sample[1]], points_rhs[sample[2]], points_rhs[sample[3]]); + } catch (...) { + continue; + } + + int support = 0; + for (int i_point = 0; i_point < n_matches; ++i_point) { + try { + cv::Point2d proj = phg::transformPoint(points_lhs[i_point], H); + if (cv::norm(proj - cv::Point2d(points_rhs[i_point])) < reprojection_error_threshold_px) { + ++support; + } + } catch (const std::exception &e) + { + std::cerr << e.what() << std::endl; + } + } + + if (support > best_support) { + best_support = support; + best_H = H; + + std::cout << "estimateHomographyRANSAC : support: " << best_support << "/" << n_matches << std::endl; + + if (best_support == n_matches) { + break; + } + } + } + + std::cout << "estimateHomographyRANSAC : best support: " << best_support << "/" << n_matches << std::endl; + + if (best_support == 0) { + throw std::runtime_error("estimateHomographyRANSAC : failed to estimate homography"); + } + + std::vector in_lhs; + std::vector in_rhs; + in_lhs.reserve(n_matches); + in_rhs.reserve(n_matches); + for (int i_point = 0; i_point < n_matches; ++i_point) { + cv::Point2d proj = phg::transformPoint(points_lhs[i_point], best_H); + if (cv::norm(proj - cv::Point2d(points_rhs[i_point])) < reprojection_error_threshold_px) { + in_lhs.push_back(points_lhs[i_point]); + in_rhs.push_back(points_rhs[i_point]); + } + } + + if ((int)in_lhs.size() >= 4) { + cv::Mat refined_H = cv::findHomography(in_lhs, in_rhs, 0); + if (!refined_H.empty()) { + return refined_H; + } + } + + return best_H; } } @@ -238,7 +320,29 @@ cv::Mat phg::findHomographyCV(const std::vector &points_lhs, const // таким преобразованием внутри занимается функции cv::perspectiveTransform и cv::warpPerspective cv::Point2d phg::transformPoint(const cv::Point2d &pt, const cv::Mat &T) { - throw std::runtime_error("not implemented yet"); + if (T.rows != 3 || T.cols != 3) { + throw std::runtime_error("transformPoint : invalid matrix size"); + } + + cv::Mat T64; + if (T.type() == CV_64F) { + T64 = T; + } else { + T.convertTo(T64, CV_64F); + } + + const double x = pt.x; + const double y = pt.y; + + const double tx = T64.at(0, 0) * x + T64.at(0, 1) * y + T64.at(0, 2); + const double ty = T64.at(1, 0) * x + T64.at(1, 1) * y + T64.at(1, 2); + const double tw = T64.at(2, 0) * x + T64.at(2, 1) * y + T64.at(2, 2); + + if (std::abs(tw) < 1e-12) { + throw std::runtime_error("transformPoint : invalid homogeneous coordinate"); + } + + return {tx / tw, ty / tw}; } cv::Point2d phg::transformPointCV(const cv::Point2d &pt, const cv::Mat &T) { diff --git a/src/phg/sfm/panorama_stitcher.cpp b/src/phg/sfm/panorama_stitcher.cpp index 8d76939b..7287d4f7 100644 --- a/src/phg/sfm/panorama_stitcher.cpp +++ b/src/phg/sfm/panorama_stitcher.cpp @@ -23,7 +23,39 @@ cv::Mat phg::stitchPanorama(const std::vector &imgs, { // здесь надо посчитать вектор Hs // при этом можно обойтись n_images - 1 вызовами функтора homography_builder - throw std::runtime_error("not implemented yet"); + if ((int)parent.size() != n_images) { + throw std::runtime_error("stitchPanorama: parent.size() != imgs.size()"); + } + + std::vector state(n_images, 0); + + std::function build = [&](int i) { + if (state[i] == 2) { + return; + } + if (state[i] == 1) { + throw std::runtime_error("stitchPanorama: cycle in parent graph"); + } + state[i] = 1; + + if (parent[i] == -1) { + Hs[i] = cv::Mat::eye(3, 3, CV_64FC1); + } else { + int p = parent[i]; + if (p < 0 || p >= n_images) { + throw std::runtime_error("stitchPanorama: invalid parent id"); + } + build(p); + cv::Mat H_to_parent = homography_builder(imgs[i], imgs[p]); + Hs[i] = Hs[p] * H_to_parent; + } + + state[i] = 2; + }; + + for (int i = 0; i < n_images; ++i) { + build(i); + } } bbox2 bbox; diff --git a/src/phg/sift/sift.cpp b/src/phg/sift/sift.cpp index 72047711..c3983e82 100755 --- a/src/phg/sift/sift.cpp +++ b/src/phg/sift/sift.cpp @@ -111,13 +111,16 @@ std::vector phg::buildOctaves(const cv::Mat& img, const phg:: // TODO double sigma_layer = sigma0 * корень из двух нужной степени, чтобы при i==s получали удвоение базового блюра; // // вычтем sigma0 чтобы размыть ровно до нужной суммарной сигмы // TODO sigma_layer = ... (вычитаем как в sigma base); - // cv::GaussianBlur(oct.layers[0], oct.layers[i], cv::Size(), sigma_layer, sigma_layer); + double sigma_layer = sigma0 * std::pow(2.0, (double)i / s); + sigma_layer = std::sqrt(sigma_layer * sigma_layer - sigma0 * sigma0); + cv::GaussianBlur(oct.layers[0], oct.layers[i], cv::Size(), sigma_layer, sigma_layer); } // подготавливаем базовый слой для следующей октавы if (o + 1 < n_octaves) { // используется в opencv, формула для пересчета ключевых точек: pt_upscaled = 2^o * pt_downscaled // TODO cv::resize(даунскейлим текущий слой в два раза, без интерполяции, просто сабсепмлинг); + cv::resize(oct.layers[s], base, cv::Size(oct.layers[s].cols / 2, oct.layers[s].rows / 2), 0, 0, cv::INTER_NEAREST); // можно использовать и downsample2x_avg(oct.layers[s]), это позволяет потом заапскейлить слои обратно до оригинального разрешения без сдвига // но потребуется везде изменить формулу для пересчета ключевых точек: pt_upscaled = (pt_downscaled + 0.5) * 2^o - 0.5 @@ -139,6 +142,9 @@ std::vector phg::buildDoG(const std::vector phg::findScaleSpaceExtrema(const std::vector keypoints; - for (int o = 0; o < (int)dog.size(); o++) { int real_octave = o + first_octave; @@ -208,16 +213,42 @@ std::vector phg::findScaleSpaceExtrema(const std::vector phg::findScaleSpaceExtrema(const std::vector(yi, xi + 1) + cL.at(yi, xi - 1) - 2.f * resp_center; + dyy = cL.at(yi + 1, xi) + cL.at(yi - 1, xi) - 2.f * resp_center; + dss = nL.at(yi, xi) + pL.at(yi, xi) - 2.f * resp_center; + dxy = (cL.at(yi + 1, xi + 1) - cL.at(yi + 1, xi - 1) - cL.at(yi - 1, xi + 1) + cL.at(yi - 1, xi - 1)) * 0.25f; + dxs = (nL.at(yi, xi + 1) - nL.at(yi, xi - 1) - pL.at(yi, xi + 1) + pL.at(yi, xi - 1)) * 0.25f; + dys = (nL.at(yi + 1, xi) - nL.at(yi - 1, xi) - pL.at(yi + 1, xi) + pL.at(yi - 1, xi)) * 0.25f; // float dxx = cL.at(yi, xi + 1) + cL.at(yi, xi - 1) - 2.f * resp_center; // float dyy = TODO; // float dss = TODO; @@ -273,6 +310,21 @@ std::vector phg::findScaleSpaceExtrema(const std::vector= (r + 1.f) * (r + 1.f) * det) + break; // float trace = //TODO ; // = lambda1 + lambda2 // float det = // TODO ; // = lambda1 * lambda2 // if (det <= 0) @@ -412,6 +464,41 @@ std::vector phg::computeOrientations(const std::vector(py, px + 1) - img.at(py, px - 1); + float gy = img.at(py + 1, px) - img.at(py - 1, px); + + float mag = std::sqrt(gx * gx + gy * gy); + float angle = std::atan2(gy, gx); // [-pi, pi] + + float angle_deg = angle * 180.f / (float)CV_PI; + if (angle_deg < 0.f) + angle_deg += 360.f; + + // гауссово взвешивание голоса точки с затуханием к краям + float weight = std::exp(-(dx * dx + dy * dy) / (2.f * sigma_win * sigma_win)); + if (!params.enable_orientation_gaussian_weighting) { + weight = 1.f; + } + + // голосуем в гистограмме направлений. находим два ближайших бина и гладко распределяем голос между ними + // в таком случае, голос попавший близко к границе между бинами, проголосует поровну за оба бина + float bin = angle_deg * n_bins / 360.f; + if (bin >= n_bins) + bin -= n_bins; + int bin0 = (int)bin; + int bin1 = (bin0 + 1) % n_bins; + + float frac = bin - bin0; + if (!params.enable_orientation_bin_interpolation) { + frac = 0.f; + } + + histogram[bin0] += mag * weight * (1.f - frac); + histogram[bin1] += mag * weight * frac; } } @@ -448,7 +535,7 @@ std::vector phg::computeOrientations(const std::vector a = (left + right - 2 * center) / 2 - // f(1) - f(-1) = 2b -> b = (right - left) / 2 +// f(1) - f(-1) = 2b -> b = (right - left) / 2 // float offset = TODO; // if (!params.enable_orientation_subpixel_localization) { @@ -464,6 +551,24 @@ std::vector phg::computeOrientations(const std::vector 1e-7f) ? (0.5f * (left - right) / denom) : 0.f; + if (!params.enable_orientation_subpixel_localization) { + offset = 0.f; + } + offset = std::max(-1.f, std::min(1.f, offset)); + + float bin_real = i + offset; + if (bin_real < 0.f) + bin_real += n_bins; + if (bin_real >= n_bins) + bin_real -= n_bins; + + float angle = bin_real * 360.f / n_bins; + + cv::KeyPoint new_kp = kp; + new_kp.angle = angle; + oriented_kpts.push_back(new_kp); } } } @@ -579,6 +684,11 @@ std::pair> phg::computeDescriptors(const std: // weight = 1.f; // } // float weighted_mag = mag * weight; + float weight = std::exp(-(rot_x * rot_x + rot_y * rot_y) / (2.f * sigma_desc * sigma_desc)); + if (!params.enable_descriptor_gaussian_weighting) { + weight = 1.f; + } + float weighted_mag = mag * weight; if (params.enable_descriptor_bin_interpolation) { // размажем вклад weighted_mag по пространственным бинам и по бинам гистограммок трилинейной интерполяцией @@ -611,6 +721,8 @@ std::pair> phg::computeDescriptors(const std: // int idx = TODO; // desc[idx] += TODO; + int idx = (iy * n_spatial_bins + ix) * n_orient_bins + io; + desc[idx] += weighted_mag * wx * wy * wo; } } } @@ -623,6 +735,8 @@ std::pair> phg::computeDescriptors(const std: // TODO uncomment // int idx = (iy_nearest * n_spatial_bins + ix_nearest) * n_orient_bins + io_nearest; // desc[idx] += weighted_mag; + int idx = (iy_nearest * n_spatial_bins + ix_nearest) * n_orient_bins + io_nearest; + desc[idx] += weighted_mag; } } } diff --git a/tests/test_matching.cpp b/tests/test_matching.cpp index adaac65e..5c7a61b5 100644 --- a/tests/test_matching.cpp +++ b/tests/test_matching.cpp @@ -19,8 +19,8 @@ // TODO enable both toggles for testing custom detector & matcher -#define ENABLE_MY_DESCRIPTOR 0 -#define ENABLE_MY_MATCHING 0 +#define ENABLE_MY_DESCRIPTOR 1 +#define ENABLE_MY_MATCHING 1 #define ENABLE_GPU_BRUTEFORCE_MATCHER 0 // TODO disable for local testing but do not commit