Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libs/3rdparty/libgtest/googletest/src/gtest-death-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#include "gtest/gtest-death-test.h"

#include <cstdint>
#include <utility>

#include "gtest/internal/gtest-port.h"
Expand Down
4 changes: 4 additions & 0 deletions libs/gpu/libgpu/opencl/enum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <libutils/string_utils.h>
#include <algorithm>
#include <CL/cl.h>
#include <CL/cl_ext.h>
#include <iostream>
#include "enum.h"

Expand Down Expand Up @@ -32,6 +33,9 @@ bool OpenCLEnum::enumPlatforms()

// Get OpenCL platform count
ciErrNum = clGetPlatformIDs (0, NULL, &num_platforms);
if (ciErrNum == CL_PLATFORM_NOT_FOUND_KHR) {
return true;
}
if (ciErrNum != CL_SUCCESS) {
std::cerr << "clGetPlatformIDs failed: " << ocl::errorString(ciErrNum) << std::endl;
return false;
Expand Down
43 changes: 40 additions & 3 deletions src/phg/matching/bruteforce_matcher_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,41 @@

#define BF_MATCHER_GPU_VERBOSE 0

bool phg::BruteforceMatcherGPU::isAvailable(std::string *reason, bool require_non_cpu_device)
{
const std::vector<gpu::Device> devices = gpu::enumDevices();

bool has_opencl_device = false;
bool has_non_cpu_opencl_device = false;
for (const gpu::Device &device : devices) {
if (!device.supports_opencl) {
continue;
}

has_opencl_device = true;
if (!device.is_cpu) {
has_non_cpu_opencl_device = true;
break;
}
}

if (require_non_cpu_device ? has_non_cpu_opencl_device : has_opencl_device) {
return true;
}

if (!reason) {
return false;
}

if (!has_opencl_device) {
*reason = "no OpenCL platforms/devices detected; install an OpenCL ICD/runtime and verify with clinfo";
} else {
*reason = "only CPU OpenCL devices are available; the GPU matcher benchmark needs a non-CPU OpenCL device";
}

return false;
}

void phg::BruteforceMatcherGPU::train(const cv::Mat &train_desc)
{
if (train_desc.rows < 2) {
Expand All @@ -26,6 +61,11 @@ void phg::BruteforceMatcherGPU::knnMatch(const cv::Mat &query_desc,
std::vector<std::vector<cv::DMatch>> &matches,
int k) const
{
std::string availability_reason;
if (!isAvailable(&availability_reason)) {
throw std::runtime_error("BruteforceMatcherGPU:: knnMatch : " + availability_reason);
}

if (!train_desc_ptr) {
throw std::runtime_error("BruteforceMatcher:: knnMatch : matcher is not trained");
}
Expand All @@ -37,9 +77,6 @@ void phg::BruteforceMatcherGPU::knnMatch(const cv::Mat &query_desc,
std::cout << "BruteforceMatcher::knnMatch : n query desc : " << query_desc.rows << ", n train desc : " << train_desc_ptr->rows << std::endl;

gpu::Device device = gpu::chooseDevice(BF_MATCHER_GPU_VERBOSE);
if (!device.supports_opencl) {
throw std::runtime_error("No OpenCL device found");
}

gpu::Context context;
context.init(device.device_id_opencl);
Expand Down
2 changes: 2 additions & 0 deletions src/phg/matching/bruteforce_matcher_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ namespace phg {

struct BruteforceMatcherGPU : DescriptorMatcher {

static bool isAvailable(std::string *reason = nullptr, bool require_non_cpu_device = false);

void train(const cv::Mat &train_desc) override;

void knnMatch(const cv::Mat &query_desc, std::vector<std::vector<cv::DMatch>> &matches, int k) const override;
Expand Down
28 changes: 22 additions & 6 deletions src/phg/matching/cl/bruteforce_matcher.cl
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,23 @@ __kernel void bruteforce_matcher(__global const float* train,
// храним два лучших сопоставления для каждого дескриптора-query:
__local uint res_train_idx_local[KEYPOINTS_PER_WG * 2];
__local float res_distance2_local[KEYPOINTS_PER_WG * 2]; // храним квадраты чтобы не считать корень до самого последнего момента
__local float dist2_for_reduction[NDIM];
// заполняем текущие лучшие дистанции большими значениями
if (dim_id < KEYPOINTS_PER_WG * 2) {
res_distance2_local[dim_id] = FLT_MAX; // полагаемся на то что res_distance2_local размера KEYPOINTS_PER_WG*2==4*2<dim_id<=NDIM==128
res_train_idx_local[dim_id] = UINT_MAX;
}

// грузим 4 дескриптора-query (для каждого из четырех дескрипторов каждый поток грузит значение своей размерности dim_id)
// TODO: т.е. надо прогрузить в query_local все KEYPOINTS_PER_WG=4 дескриптора из query (начиная с индекса query_id0) (а если часть из них выходит за пределы n_query_desc - грузить нули)
for (int query_local_i = 0; query_local_i < KEYPOINTS_PER_WG; ++query_local_i) {
const unsigned int query_id = query_id0 + query_local_i;
if (query_id < n_query_desc) {
query_local[query_local_i * NDIM + dim_id] = query[query_id * NDIM + dim_id];
} else {
query_local[query_local_i * NDIM + dim_id] = 0.0f;
}
}

barrier(CLK_LOCAL_MEM_FENCE); // дожидаемся прогрузки наших дескрипторов-запросов

Expand All @@ -43,13 +53,15 @@ __kernel void bruteforce_matcher(__global const float* train,
// до дескриптора-train в глобальной памяти (#train_idx)

// TODO посчитать квадрат расстояния по нашей размерности (dim_id) и сохранить его в нашу ячейку в dist2_for_reduction
const float diff = train_value_dim - query_local[query_local_i * NDIM + dim_id];
dist2_for_reduction[dim_id] = diff * diff;

barrier(CLK_LOCAL_MEM_FENCE);
// TODO суммируем редукцией все что есть в dist2_for_reduction
int step = NDIM / 2;
while (step > 0) {
if (dim_id < step) {
// TODO
dist2_for_reduction[dim_id] += dist2_for_reduction[dim_id + step];
}
barrier(CLK_LOCAL_MEM_FENCE);
step /= 2;
Expand All @@ -63,13 +75,17 @@ __kernel void bruteforce_matcher(__global const float* train,
#define SECOND_BEST_INDEX 1

// пытаемся улучшить самое лучшее сопоставление для локального дескриптора
if (dist2 <= res_distance2_local[query_local_i * 2 + BEST_INDEX]) {
if (dist2 < res_distance2_local[query_local_i * 2 + BEST_INDEX]) {
// не забываем что прошлое лучшее сопоставление теперь стало вторым по лучшевизне (на данный момент)
res_distance2_local[query_local_i * 2 + SECOND_BEST_INDEX] = res_distance2_local[query_local_i * 2 + BEST_INDEX];
res_train_idx_local[query_local_i * 2 + SECOND_BEST_INDEX] = res_train_idx_local[query_local_i * 2 + BEST_INDEX];
// TODO заменяем нашим (dist2, train_idx) самое лучшее сопоставление для локального дескриптора
} else if (dist2 <= res_distance2_local[query_local_i * 2 + SECOND_BEST_INDEX]) { // может мы улучшили хотя бы второе по лучшевизне сопоставление?
res_distance2_local[query_local_i * 2 + BEST_INDEX] = dist2;
res_train_idx_local[query_local_i * 2 + BEST_INDEX] = train_idx;
} else if (dist2 < res_distance2_local[query_local_i * 2 + SECOND_BEST_INDEX]) { // может мы улучшили хотя бы второе по лучшевизне сопоставление?
// TODO заменяем второе по лучшевизне сопоставление для локального дескриптора
res_distance2_local[query_local_i * 2 + SECOND_BEST_INDEX] = dist2;
res_train_idx_local[query_local_i * 2 + SECOND_BEST_INDEX] = train_idx;
}
}
}
Expand All @@ -82,9 +98,9 @@ __kernel void bruteforce_matcher(__global const float* train,

int query_id = query_id0 + query_local_i;
if (query_id < n_query_desc) {
res_train_idx[query_id * 2 + k] = // TODO
res_query_idx[query_id * 2 + k] = // TODO хм, не масло масленное ли?
res_distance [query_id * 2 + k] = // TODO не забудьте извлечь корень
res_train_idx[query_id * 2 + k] = res_train_idx_local[query_local_i * 2 + k];
res_query_idx[query_id * 2 + k] = query_id;
res_distance [query_id * 2 + k] = sqrt(res_distance2_local[query_local_i * 2 + k]);
}
}
}
127 changes: 83 additions & 44 deletions src/phg/matching/descriptor_matcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,20 @@
#include "flann_factory.h"

void phg::DescriptorMatcher::filterMatchesRatioTest(const std::vector<std::vector<cv::DMatch>> &matches,
std::vector<cv::DMatch> &filtered_matches)
std::vector<cv::DMatch> &filtered_matches)
{
filtered_matches.clear();

throw std::runtime_error("not implemented yet");
filtered_matches.reserve(matches.size());
for (const std::vector<cv::DMatch> &knn_match : matches) {
if (knn_match.size() < 2) {
continue;
}
const cv::DMatch &best = knn_match[0];
const cv::DMatch &second = knn_match[1];
if (constexpr float ratio_thresh = 0.75f; best.distance < ratio_thresh * second.distance) {
filtered_matches.push_back(best);
}
}
}


Expand All @@ -19,8 +28,8 @@ void phg::DescriptorMatcher::filterMatchesClusters(const std::vector<cv::DMatch>
{
filtered_matches.clear();

const size_t total_neighbours = 5; // total number of neighbours to test (including candidate)
const size_t consistent_matches = 3; // minimum number of consistent matches (including candidate)
const size_t total_neighbours = 7; // total number of neighbours to test (including candidate)
const size_t consistent_matches = 4; // minimum number of consistent matches (including candidate)
const float radius_limit_scale = 2.f; // limit search radius by scaled median

const int n_matches = matches.size();
Expand All @@ -32,45 +41,75 @@ void phg::DescriptorMatcher::filterMatchesClusters(const std::vector<cv::DMatch>
cv::Mat points_query(n_matches, 2, CV_32FC1);
cv::Mat points_train(n_matches, 2, CV_32FC1);
for (int i = 0; i < n_matches; ++i) {
points_query.at<cv::Point2f>(i) = keypoints_query[matches[i].queryIdx].pt;
points_train.at<cv::Point2f>(i) = keypoints_train[matches[i].trainIdx].pt;
const cv::Point2f pt_query = keypoints_query[matches[i].queryIdx].pt;
const cv::Point2f pt_train = keypoints_train[matches[i].trainIdx].pt;

points_query.at<float>(i, 0) = pt_query.x;
points_query.at<float>(i, 1) = pt_query.y;
points_train.at<float>(i, 0) = pt_train.x;
points_train.at<float>(i, 1) = pt_train.y;
}
//
// // размерность всего 2, так что точное KD-дерево
// std::shared_ptr<cv::flann::IndexParams> index_params = flannKdTreeIndexParams(TODO);
// std::shared_ptr<cv::flann::SearchParams> search_params = flannKsTreeSearchParams(TODO);
//
// std::shared_ptr<cv::flann::Index> index_query = flannKdTreeIndex(points_query, index_params);
// std::shared_ptr<cv::flann::Index> index_train = flannKdTreeIndex(points_train, index_params);
//
// // для каждой точки найти total neighbors ближайших соседей
// cv::Mat indices_query(n_matches, total_neighbours, CV_32SC1);
// cv::Mat distances2_query(n_matches, total_neighbours, CV_32FC1);
// cv::Mat indices_train(n_matches, total_neighbours, CV_32SC1);
// cv::Mat distances2_train(n_matches, total_neighbours, CV_32FC1);
//
// index_query->knnSearch(points_query, indices_query, distances2_query, total_neighbours, *search_params);
// index_train->knnSearch(points_train, indices_train, distances2_train, total_neighbours, *search_params);
//
// // оценить радиус поиска для каждой картинки
// // NB: radius2_query, radius2_train: квадраты радиуса!
// float radius2_query, radius2_train;
// {
// std::vector<double> max_dists2_query(n_matches);
// std::vector<double> max_dists2_train(n_matches);
// for (int i = 0; i < n_matches; ++i) {
// max_dists2_query[i] = distances2_query.at<float>(i, total_neighbours - 1);
// max_dists2_train[i] = distances2_train.at<float>(i, total_neighbours - 1);
// }
//
// int median_pos = n_matches / 2;
// std::nth_element(max_dists2_query.begin(), max_dists2_query.begin() + median_pos, max_dists2_query.end());
// std::nth_element(max_dists2_train.begin(), max_dists2_train.begin() + median_pos, max_dists2_train.end());
//
// radius2_query = max_dists2_query[median_pos] * radius_limit_scale * radius_limit_scale;
// radius2_train = max_dists2_train[median_pos] * radius_limit_scale * radius_limit_scale;
// }
//
std::shared_ptr<cv::flann::IndexParams> index_params = flannKdTreeIndexParams(1);
std::shared_ptr<cv::flann::SearchParams> search_params = flannKsTreeSearchParams(128);

std::shared_ptr<cv::flann::Index> index_query = flannKdTreeIndex(points_query, index_params);
std::shared_ptr<cv::flann::Index> index_train = flannKdTreeIndex(points_train, index_params);

// для каждой точки найти total neighbors ближайших соседей
cv::Mat indices_query(n_matches, total_neighbours, CV_32SC1);
cv::Mat distances2_query(n_matches, total_neighbours, CV_32FC1);
cv::Mat indices_train(n_matches, total_neighbours, CV_32SC1);
cv::Mat distances2_train(n_matches, total_neighbours, CV_32FC1);

index_query->knnSearch(points_query, indices_query, distances2_query, total_neighbours, *search_params);
index_train->knnSearch(points_train, indices_train, distances2_train, total_neighbours, *search_params);

// оценить радиус поиска для каждой картинки
// NB: radius2_query, radius2_train: квадраты радиуса!
float radius2_query, radius2_train;
{
std::vector<double> max_dists2_query(n_matches);
std::vector<double> max_dists2_train(n_matches);
for (int i = 0; i < n_matches; ++i) {
max_dists2_query[i] = distances2_query.at<float>(i, total_neighbours - 1);
max_dists2_train[i] = distances2_train.at<float>(i, total_neighbours - 1);
}

int median_pos = n_matches / 2;
std::nth_element(max_dists2_query.begin(), max_dists2_query.begin() + median_pos, max_dists2_query.end());
std::nth_element(max_dists2_train.begin(), max_dists2_train.begin() + median_pos, max_dists2_train.end());

radius2_query = max_dists2_query[median_pos] * radius_limit_scale * radius_limit_scale;
radius2_train = max_dists2_train[median_pos] * radius_limit_scale * radius_limit_scale;
}

// метч остается, если левое и правое множества первых total_neighbors соседей в радиусах поиска(radius2_query, radius2_train) имеют как минимум consistent_matches общих элементов
// // TODO заполнить filtered_matches
filtered_matches.reserve(matches.size());
for (int i = 0; i < n_matches; ++i) {
std::vector<int> neigh_query;
std::vector<int> neigh_train;

neigh_query.reserve(total_neighbours);
neigh_train.reserve(total_neighbours);

for (size_t j = 0; j < total_neighbours; ++j) {
if (distances2_query.at<float>(i, j) <= radius2_query) {
neigh_query.push_back(indices_query.at<int>(i, j));
}
if (distances2_train.at<float>(i, j) <= radius2_train) {
neigh_train.push_back(indices_train.at<int>(i, j));
}
}

int n_consistent = 0;
for (int idx_q : neigh_query) {
if (std::find(neigh_train.begin(), neigh_train.end(), idx_q) != neigh_train.end()) {
++n_consistent;
}
}

if (n_consistent >= static_cast<int>(consistent_matches)) {
filtered_matches.push_back(matches[i]);
}
}
}
34 changes: 31 additions & 3 deletions src/phg/matching/flann_matcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
phg::FlannMatcher::FlannMatcher()
{
// параметры для приближенного поиска
// index_params = flannKdTreeIndexParams(TODO);
// search_params = flannKsTreeSearchParams(TODO);
index_params = flannKdTreeIndexParams(4);
search_params = flannKsTreeSearchParams(32);
}

void phg::FlannMatcher::train(const cv::Mat &train_desc)
Expand All @@ -17,5 +17,33 @@ void phg::FlannMatcher::train(const cv::Mat &train_desc)

void phg::FlannMatcher::knnMatch(const cv::Mat &query_desc, std::vector<std::vector<cv::DMatch>> &matches, int k) const
{
throw std::runtime_error("not implemented yet");
if (!flann_index) {
throw std::runtime_error("FlannMatcher:: knnMatch : matcher is not trained");
}
if (k <= 0) {
throw std::runtime_error("FlannMatcher:: knnMatch : k must be positive");
}

if (query_desc.empty()) {
matches.clear();
return;
}

cv::Mat indices(query_desc.rows, k, CV_32SC1);
cv::Mat distances2(query_desc.rows, k, CV_32FC1);
flann_index->knnSearch(query_desc, indices, distances2, k, *search_params);
matches.resize(query_desc.rows);
for (int qi = 0; qi < query_desc.rows; qi++) {
std::vector<cv::DMatch> &dst = matches[qi];
dst.clear();
dst.reserve(k);
for (int ki = 0; ki < k; ki++) {
cv::DMatch match;
match.imgIdx = 0;
match.queryIdx = qi;
match.trainIdx = indices.at<int>(qi, ki);
match.distance = std::sqrt(distances2.at<float>(qi, ki));
dst.push_back(match);
}
}
}
Loading
Loading