diff --git a/CMakeLists.txt b/CMakeLists.txt index a8ca5d83..79ab30b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,15 +5,13 @@ include_directories("${PROJECT_BINARY_DIR}") -project(hello_world) - set(SOURCE_EXE main.cpp) -set(SOURCE_LIB1 sift_test.cpp) -set(SOURCE_LIB2 sift_1b.cpp) +set(SOURCE_LIB sift_1b.cpp) + +add_library(sift_test STATIC ${SOURCE_LIB}) -add_library(sift_test STATIC ${SOURCE_LIB1} ${SOURCE_LIB2}) -add_executable(main ${SOURCE_EXE}) +add_executable(main ${SOURCE_EXE}) SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0" ) target_link_libraries(main sift_test) diff --git a/L2space.h b/L2space.h deleted file mode 100644 index a70a583c..00000000 --- a/L2space.h +++ /dev/null @@ -1,233 +0,0 @@ - -#pragma once -#ifdef _MSC_VER -#include -#include - -#define __builtin_popcount(t) __popcnt(t) -#else -#include -#endif -#define USE_AVX -#if defined(__GNUC__) -#define PORTABLE_ALIGN32 __attribute__((aligned(32))) -#else -#define PORTABLE_ALIGN32 __declspec(align(32)) -#endif - - -#include "hnswlib.h" -namespace hnswlib { - using namespace std; - - static float - L2Sqr(const void *pVect1, const void *pVect2, const void *qty_ptr) - { - //return *((float *)pVect2); - size_t qty = *((size_t *)qty_ptr); - float res = 0; - for (int i = 0; i < qty; i++) { - float t = ((float*)pVect1)[i] - ((float*)pVect2)[i]; - res += t*t; - } - return (res); - - }; - - - static float - L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) - { - float *pVect1 = (float *)pVect1v; - float *pVect2 = (float *)pVect2v; - size_t qty = *((size_t *)qty_ptr); - float PORTABLE_ALIGN32 TmpRes[8]; - #ifdef USE_AVX - size_t qty16 = qty >> 4; - - const float *pEnd1 = pVect1 + (qty16 << 4); - - __m256 diff, v1, v2; - __m256 sum = _mm256_set1_ps(0); - - while (pVect1 < pEnd1) { - v1 = _mm256_loadu_ps(pVect1); - pVect1 += 8; - v2 = _mm256_loadu_ps(pVect2); - pVect2 += 8; - diff = _mm256_sub_ps(v1, v2); - sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); - - v1 = _mm256_loadu_ps(pVect1); - pVect1 += 8; - v2 = _mm256_loadu_ps(pVect2); - pVect2 += 8; - diff = _mm256_sub_ps(v1, v2); - sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); - } - - _mm256_store_ps(TmpRes, sum); - float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; - - return (res); - #else - // size_t qty4 = qty >> 2; - size_t qty16 = qty >> 4; - - const float *pEnd1 = pVect1 + (qty16 << 4); - // const float* pEnd2 = pVect1 + (qty4 << 2); - // const float* pEnd3 = pVect1 + qty; - - __m128 diff, v1, v2; - __m128 sum = _mm_set1_ps(0); - - while (pVect1 < pEnd1) { - //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - } - _mm_store_ps(TmpRes, sum); - float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; - - return (res); - #endif - }; - static float PORTABLE_ALIGN32 TmpRes[8]; - static float - L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) - { - float *pVect1 = (float *)pVect1v; - float *pVect2 = (float *)pVect2v; - size_t qty = *((size_t *)qty_ptr); - - - // size_t qty4 = qty >> 2; - size_t qty16 = qty >> 2; - - const float *pEnd1 = pVect1 + (qty16 << 2); - - __m128 diff, v1, v2; - __m128 sum = _mm_set1_ps(0); - - while (pVect1 < pEnd1) { - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - } - _mm_store_ps(TmpRes, sum); - float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; - - return (res); - }; - - class L2Space : public SpaceInterface { - - DISTFUNC fstdistfunc_; - size_t data_size_; - size_t dim_; - public: - L2Space(size_t dim) { - fstdistfunc_ = L2Sqr; - if (dim % 4 == 0) - fstdistfunc_ = L2SqrSIMD4Ext; - if (dim % 16==0) - fstdistfunc_ = L2SqrSIMD16Ext; - /*else{ - throw runtime_error("Data type not supported!"); - }*/ - dim_ = dim; - data_size_ = dim * sizeof(float); - } - - size_t get_data_size() { - return data_size_; - } - DISTFUNC get_dist_func() { - return fstdistfunc_; - } - void *get_dist_func_param() { - return &dim_; - } - - }; - static int - L2SqrI(const void * __restrict pVect1, const void * __restrict pVect2, const void * __restrict qty_ptr) - { - - size_t qty = *((size_t *)qty_ptr); - int res = 0; - unsigned char *a = (unsigned char*)pVect1; - unsigned char *b = (unsigned char*)pVect2; - /*for (int i = 0; i < qty; i++) { - int t = int((a)[i]) - int((b)[i]); - res += t*t; - }*/ - - qty = qty >> 2; - for (int i = 0; i < qty; i++) { - - res += ((*a) - (*b))*((*a) - (*b)); a++; b++; - res += ((*a) - (*b))*((*a) - (*b)); a++; b++; - res += ((*a) - (*b))*((*a) - (*b)); a++; b++; - res += ((*a) - (*b))*((*a) - (*b)); a++; b++; - - - } - - return (res); - - }; - class L2SpaceI : public SpaceInterface { - - DISTFUNC fstdistfunc_; - size_t data_size_; - size_t dim_; - public: - L2SpaceI(size_t dim) { - fstdistfunc_ = L2SqrI; - dim_ = dim; - data_size_ = dim * sizeof(unsigned char); - } - - size_t get_data_size() { - return data_size_; - } - DISTFUNC get_dist_func() { - return fstdistfunc_; - } - void *get_dist_func_param() { - return &dim_; - } - - }; - - -}; \ No newline at end of file diff --git a/README.md b/README.md index bdfbfd6e..3d86d3b3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # Hnsw - Approximate nearest neighbor search Hnsw paper code for the 200M SIFT experiment. +NEW: Added simple python bindings with incremental construction(reusing some nmslib code, only L2 is now supported) + + +####Test reproduction steps: To download and extract the bigann dataset: ```bash python3 download_bigann.py @@ -18,6 +22,37 @@ To run the test on 200M SIFT subset: The size of the bigann subset (in millions) is controlled by the variable **subset_size_milllions** hardcoded in **sift_1b.cpp**. + +#### Python bindings example +```python +import hnswlib +import numpy as np + +dim = 128 +num_elements = 10000 + +data=np.float32(np.random.random((num_elements,dim))) +data_labels=np.arange(num_elements) + +p = hnswlib.Index(space='l2', dim=dim) #Only l2 is supported currently +p.init_index(max_elements=10000, ef_construction=200, M=16) + +# Element insertion be called several times: +int_labels = p.add_items(data, data_labels) + + +p.set_ef(50) +# Query dataset, k - number of closest elements +# Return numpy arrays +labels, distances = p.knn_query(data, k=1) + +``` +To compile run: +```bash +cd python_bindings +python3 setup.py install +``` + The repo contrains some parts of the Non-Metric Space Library's code https://github.com/searchivarius/nmslib References: diff --git a/brutoforce.h b/brutoforce.h deleted file mode 100644 index fff60c13..00000000 --- a/brutoforce.h +++ /dev/null @@ -1,63 +0,0 @@ -#pragma once -#include -namespace hnswlib { - template class BruteforceSearch : public AlgorithmInterface { - public: - BruteforceSearch(SpaceInterface *s) { - - } - BruteforceSearch(SpaceInterface *s, size_t maxElements) { - maxelements_ = maxElements; - - data_size_= s->get_data_size(); - fstdistfunc_ = s->get_dist_func(); - dist_func_param_ = s->get_dist_func_param(); - cout << data_size_ << "\n"; - size_per_element_ = data_size_ + sizeof(labeltype); - data_=(char *)malloc(maxElements*size_per_element_); - cur_element_count = 0; - } - ~BruteforceSearch() { - free(data_); - } - char *data_; - size_t maxelements_; - size_t cur_element_count; - size_t size_per_element_; - - size_t data_size_; - DISTFUNC fstdistfunc_; - void *dist_func_param_; - - void addPoint(void *datapoint, labeltype label) { - - if (cur_element_count >= maxelements_) - { - cout << "The number of elements exceeds the specified limit\n"; - throw exception(); - }; - memcpy(data_ + size_per_element_*cur_element_count+ data_size_, &label, sizeof(labeltype)); - memcpy(data_ + size_per_element_*cur_element_count, datapoint, data_size_); - cur_element_count++; - }; - std::priority_queue< std::pair< dist_t, labeltype >> searchKnn(void *query_data,int k) { - std::priority_queue< std::pair< dist_t, labeltype >> topResults; - for (int i = 0; i < k; i++) { - dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_*i, dist_func_param_); - topResults.push(std::pair(dist, *((labeltype*)(data_ + size_per_element_*i + data_size_)))); - } - dist_t lastdist= topResults.top().first; - for (int i = k; i < cur_element_count; i++) { - dist_t dist=fstdistfunc_(query_data, data_ + size_per_element_*i, dist_func_param_); - if(dist < lastdist) { - topResults.push(std::pair(dist,*((labeltype*) (data_ + size_per_element_*i + data_size_)))); - if (topResults.size() > k) - topResults.pop(); - lastdist = topResults.top().first; - } - - } - return topResults; - }; - }; -} diff --git a/hnswalg.h b/hnswalg.h deleted file mode 100644 index ab30563e..00000000 --- a/hnswalg.h +++ /dev/null @@ -1,693 +0,0 @@ -#pragma once -#include "visited_list_pool.h" -#include "hnswlib.h" -#include -#include -#include -#include -#include -#include -template -void writeBinaryPOD(std::ostream& out, const T& podRef) { - out.write((char*)&podRef, sizeof(T)); -} - -template -static void readBinaryPOD(std::istream& in, T& podRef) { - in.read((char*)&podRef, sizeof(T)); -} - -#define DEBUG_LIB 1 -namespace hnswlib { - typedef unsigned int tableint; - typedef unsigned int linklistsizeint; - template class HierarchicalNSW : public AlgorithmInterface { - public: - HierarchicalNSW(SpaceInterface *s) { - - } - HierarchicalNSW(SpaceInterface *s, const string &location, bool nmslib = false) { - LoadIndex(location, s); - } - HierarchicalNSW(SpaceInterface *s, size_t maxElements, size_t M = 16, size_t efConstruction = 200) : - ll_locks(maxElements), elementLevels(maxElements) { - maxelements_ = maxElements; - - data_size_ = s->get_data_size(); - fstdistfunc_ = s->get_dist_func(); - dist_func_param_ = s->get_dist_func_param(); - M_ = M; - maxM_ = M_; - maxM0_ = M_ * 2; - efConstruction_ = efConstruction; - ef_ = 7; - - - size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint); - size_data_per_element_ = size_links_level0_ + data_size_ + sizeof(labeltype); - offsetData_ = size_links_level0_; - label_offset_ = size_links_level0_ + data_size_; - offsetLevel0_ = 0; - cout << offsetData_ << "\t" << label_offset_ << "\n"; - cout << size_links_level0_ << "\t" << data_size_ << "\t" << sizeof(labeltype) << "\n"; - - data_level0_memory_ = (char *)malloc(maxelements_*size_data_per_element_); - - size_t predicted_size_per_element = size_data_per_element_ + sizeof(void*) + 8 + 8 + 2 * 8; - cout << "size_mb=" << maxelements_*(predicted_size_per_element) / (1000 * 1000) << "\n"; - cur_element_count = 0; - - visitedlistpool = new VisitedListPool(1, maxElements); - - - - //initializations for special treatment of the first node - enterpoint_node = -1; - maxlevel_ = -1; - - linkLists_ = (char **)malloc(sizeof(void *) * maxelements_); - size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); - mult_ = 1 / log(1.0 * M_); - revSize_ = 1.0 / mult_; - } - ~HierarchicalNSW() { - - free(data_level0_memory_); - for (tableint i = 0; i < cur_element_count; i++) { - if (elementLevels[i] > 0) - free(linkLists_[i]); - } - free(linkLists_); - delete visitedlistpool; - } - size_t maxelements_; - size_t cur_element_count; - size_t size_data_per_element_; - size_t size_links_per_element_; - - size_t M_; - size_t maxM_; - size_t maxM0_; - size_t efConstruction_; - int delaunay_type_; - double mult_, revSize_; - int maxlevel_; - - - VisitedListPool *visitedlistpool; - mutex cur_element_count_guard_; - mutex MaxLevelGuard_; - vector ll_locks; - tableint enterpoint_node; - - size_t dist_calc; - size_t size_links_level0_; - size_t offsetData_, offsetLevel0_; - - - char *data_level0_memory_; - char **linkLists_; - vector elementLevels; - - - - size_t data_size_; - size_t label_offset_; - DISTFUNC fstdistfunc_; - void *dist_func_param_; - std::default_random_engine generator = std::default_random_engine(100); - - inline labeltype getExternalLabel(tableint internal_id) { - return *((labeltype *)(data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_)); - } - inline labeltype *getExternalLabeLp(tableint internal_id) { - return (labeltype *)(data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_); - } - inline char *getDataByInternalId(tableint internal_id) { - return (data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_); - } - int getRandomLevel(double revSize) - { - std::uniform_real_distribution distribution(0.0, 1.0); - double r = -log(distribution(generator)) * revSize; - //cout << revSize; - return (int)r; - } - - std::priority_queue< std::pair< dist_t, tableint >> searchBaseLayer(tableint ep, void *datapoint, int layer) { - VisitedList *vl = visitedlistpool->getFreeVisitedList(); - vl_type *massVisited = vl->mass; - vl_type currentV = vl->curV; - - std::priority_queue< std::pair< dist_t, tableint >> topResults; - std::priority_queue< std::pair< dist_t, tableint >> candidateSet; - dist_t dist = fstdistfunc_(datapoint, getDataByInternalId(ep), dist_func_param_); - - topResults.emplace(dist, ep); - candidateSet.emplace(-dist, ep); - massVisited[ep] = currentV; - dist_t lowerBound = dist; - - while (!candidateSet.empty()) { - - std::pair curr_el_pair = candidateSet.top(); - - if ((-curr_el_pair.first) > lowerBound) { - break; - } - candidateSet.pop(); - - tableint curNodeNum = curr_el_pair.second; - - unique_lock lock(ll_locks[curNodeNum]); - - int *data;// = (int *)(linkList0_ + curNodeNum * size_links_per_element0_); - if (layer == 0) - data = (int *)(data_level0_memory_ + curNodeNum * size_data_per_element_ + offsetLevel0_); - else - data = (int *)(linkLists_[curNodeNum] + (layer - 1) * size_links_per_element_); - int size = *data; - tableint *datal = (tableint *)(data + 1); - _mm_prefetch((char *)(massVisited + *(data + 1)), _MM_HINT_T0); - _mm_prefetch((char *)(massVisited + *(data + 1) + 64), _MM_HINT_T0); - _mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0); - _mm_prefetch(getDataByInternalId(*(datal + 1)), _MM_HINT_T0); - - for (int j = 0; j < size; j++) { - tableint tnum = *(datal + j); - _mm_prefetch((char *)(massVisited + *(datal + j + 1)), _MM_HINT_T0); - _mm_prefetch(getDataByInternalId(*(datal + j + 1)), _MM_HINT_T0); - if (!(massVisited[tnum] == currentV)) { - massVisited[tnum] = currentV; - char *currObj1 = (getDataByInternalId(tnum)); - - dist_t dist = fstdistfunc_(datapoint, currObj1, dist_func_param_); - if (topResults.top().first > dist || topResults.size() < efConstruction_) { - candidateSet.emplace(-dist, tnum); - _mm_prefetch(getDataByInternalId(candidateSet.top().second), _MM_HINT_T0); - topResults.emplace(dist, tnum); - if (topResults.size() > efConstruction_) { - topResults.pop(); - } - lowerBound = topResults.top().first; - } - } - } - } - visitedlistpool->releaseVisitedList(vl); - - return topResults; - } - struct CompareByFirst { - constexpr bool operator()(pair const & a, - pair const & b) const noexcept - { - return a.first < b.first; - } - }; - std::priority_queue< std::pair< dist_t, tableint >, vector>, CompareByFirst> searchBaseLayerST(tableint ep, void *datapoint, size_t ef) { - VisitedList *vl = visitedlistpool->getFreeVisitedList(); - vl_type *massVisited = vl->mass; - vl_type currentV = vl->curV; - - std::priority_queue< std::pair< dist_t, tableint >, vector>, CompareByFirst> topResults; - std::priority_queue< std::pair< dist_t, tableint >, vector>, CompareByFirst> candidateSet; - dist_t dist = fstdistfunc_(datapoint, getDataByInternalId(ep), dist_func_param_); - dist_calc++; - topResults.emplace(dist, ep); - candidateSet.emplace(-dist, ep); - massVisited[ep] = currentV; - dist_t lowerBound = dist; - - while (!candidateSet.empty()) { - - std::pair curr_el_pair = candidateSet.top(); - - if ((-curr_el_pair.first) > lowerBound) { - break; - } - candidateSet.pop(); - - tableint curNodeNum = curr_el_pair.second; - int *data = (int *)(data_level0_memory_ + curNodeNum * size_data_per_element_ + offsetLevel0_); - int size = *data; - _mm_prefetch((char *)(massVisited + *(data + 1)), _MM_HINT_T0); - _mm_prefetch((char *)(massVisited + *(data + 1) + 64), _MM_HINT_T0); - _mm_prefetch(data_level0_memory_ + (*(data + 1)) * size_data_per_element_ + offsetData_, _MM_HINT_T0); - _mm_prefetch((char *)(data + 2), _MM_HINT_T0); - - for (int j = 1; j <= size; j++) { - int tnum = *(data + j); - _mm_prefetch((char *)(massVisited + *(data + j + 1)), _MM_HINT_T0); - _mm_prefetch(data_level0_memory_ + (*(data + j + 1)) * size_data_per_element_ + offsetData_, _MM_HINT_T0);//////////// - if (!(massVisited[tnum] == currentV)) { - - massVisited[tnum] = currentV; - - char *currObj1 = (getDataByInternalId(tnum)); - dist_t dist = fstdistfunc_(datapoint, currObj1, dist_func_param_); - dist_calc++; - if (topResults.top().first > dist || topResults.size() < ef) { - candidateSet.emplace(-dist, tnum); - _mm_prefetch(data_level0_memory_ + candidateSet.top().second * size_data_per_element_ + offsetLevel0_,/////////// - _MM_HINT_T0);//////////////////////// - - topResults.emplace(dist, tnum); - - if (topResults.size() > ef) { - topResults.pop(); - } - lowerBound = topResults.top().first; - } - } - } - } - - visitedlistpool->releaseVisitedList(vl); - return topResults; - } - void getNeighborsByHeuristic2(std::priority_queue< std::pair< dist_t, tableint>> &topResults, const int NN) - { - if (topResults.size() < NN) { - return; - } - std::priority_queue< std::pair< dist_t, tableint>> resultSet; - std::priority_queue< std::pair< dist_t, tableint>> templist; - vector> returnlist; - while (topResults.size() > 0) { - resultSet.emplace(-topResults.top().first, topResults.top().second); - topResults.pop(); - } - - while (resultSet.size()) { - if (returnlist.size() >= NN) - break; - std::pair< dist_t, tableint> curen = resultSet.top(); - dist_t dist_to_query = -curen.first; - resultSet.pop(); - bool good = true; - for (std::pair< dist_t, tableint> curen2 : returnlist) { - dist_t curdist = - fstdistfunc_(getDataByInternalId(curen2.second), getDataByInternalId(curen.second), dist_func_param_);; - if (curdist < dist_to_query) { - good = false; - break; - } - } - if (good) { - returnlist.push_back(curen); - } - - - } - - for (std::pair< dist_t, tableint> curen2 : returnlist) { - - topResults.emplace(-curen2.first, curen2.second); - } - } - linklistsizeint *get_linklist0(tableint cur_c) { - return (linklistsizeint *)(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_); - }; - linklistsizeint *get_linklist(tableint cur_c, int level) { - return (linklistsizeint *)(linkLists_[cur_c] + (level - 1) * size_links_per_element_); - }; - - void mutuallyConnectNewElement(void *datapoint, tableint cur_c, std::priority_queue< std::pair< dist_t, tableint >> topResults, int level) { - - size_t Mcurmax = level ? maxM_ : maxM0_; - getNeighborsByHeuristic2(topResults, M_); - while (topResults.size() > M_) { - throw exception(); - topResults.pop(); - } - vector rez; - rez.reserve(M_); - while (topResults.size() > 0) { - rez.push_back(topResults.top().second); - topResults.pop(); - } - { - linklistsizeint *ll_cur; - if (level == 0) - ll_cur = (linklistsizeint *)(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_); - else - ll_cur = (linklistsizeint *)(linkLists_[cur_c] + (level - 1) * size_links_per_element_); - if (*ll_cur) { - cout << *ll_cur << "\n"; - cout << elementLevels[cur_c] << "\n"; - cout << level << "\n"; - throw runtime_error("Should be blank"); - } - *ll_cur = rez.size(); - tableint *data = (tableint *)(ll_cur + 1); - - - for (int idx = 0; idx < rez.size(); idx++) { - if (data[idx]) - throw runtime_error("Should be blank"); - if (level > elementLevels[rez[idx]]) - throw runtime_error("Bad level"); - - data[idx] = rez[idx]; - } - } - for (int idx = 0; idx < rez.size(); idx++) { - - unique_lock lock(ll_locks[rez[idx]]); - - if (rez[idx] == cur_c) - throw runtime_error("Connection to the same element"); - linklistsizeint *ll_other; - if (level == 0) - ll_other = (linklistsizeint *)(data_level0_memory_ + rez[idx] * size_data_per_element_ + offsetLevel0_); - else - ll_other = (linklistsizeint *)(linkLists_[rez[idx]] + (level - 1) * size_links_per_element_); - if (level > elementLevels[rez[idx]]) - throw runtime_error("Bad level"); - int sz_link_list_other = *ll_other; - - if (sz_link_list_other > Mcurmax || sz_link_list_other < 0) - throw runtime_error("Bad sz_link_list_other"); - - if (sz_link_list_other < Mcurmax) { - tableint *data = (tableint *)(ll_other + 1); - data[sz_link_list_other] = cur_c; - *ll_other = sz_link_list_other + 1; - } - else { - // finding the "weakest" element to replace it with the new one - tableint *data = (tableint *)(ll_other + 1); - dist_t d_max = fstdistfunc_(getDataByInternalId(cur_c), getDataByInternalId(rez[idx]), dist_func_param_); - // Heuristic: - std::priority_queue< std::pair< dist_t, tableint>> candidates; - candidates.emplace(d_max, cur_c); - - for (int j = 0; j < sz_link_list_other; j++) { - candidates.emplace(fstdistfunc_(getDataByInternalId(data[j]), getDataByInternalId(rez[idx]), dist_func_param_), data[j]); - } - - getNeighborsByHeuristic2(candidates, Mcurmax); - - int indx = 0; - while (candidates.size() > 0) { - data[indx] = candidates.top().second; - candidates.pop(); - indx++; - } - *ll_other = indx; - // Nearest K: - /*int indx = -1; - for (int j = 0; j < sz_link_list_other; j++) { - dist_t d = fstdistfunc_(getDataByInternalId(data[j]), getDataByInternalId(rez[idx]), dist_func_param_); - if (d > d_max) { - indx = j; - d_max = d; - } - } - if (indx >= 0) { - data[indx] = cur_c; - } */ - } - - } - } - mutex global; - size_t ef_; - void setEf(size_t ef) { - ef_ = ef; - } - void addPoint(void *datapoint, labeltype label, int level = -1) { - - tableint cur_c = 0; - { - unique_lock lock(cur_element_count_guard_); - if (cur_element_count >= maxelements_) - { - cout << "The number of elements exceeds the specified limit\n"; - throw runtime_error("The number of elements exceeds the specified limit"); - }; - cur_c = cur_element_count; - cur_element_count++; - } - unique_lock lock_el(ll_locks[cur_c]); - int curlevel = getRandomLevel(mult_); - if (level > 0) - curlevel = level; - elementLevels[cur_c] = curlevel; - - - - - unique_lock templock(global); - int maxlevelcopy = maxlevel_; - if (curlevel <= maxlevelcopy) - templock.unlock(); - tableint currObj = enterpoint_node; - - - memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, 0, size_data_per_element_); - // Initialisation of the data and label - memcpy(getExternalLabeLp(cur_c), &label, sizeof(labeltype)); - memcpy(getDataByInternalId(cur_c), datapoint, data_size_); - - - if (curlevel) { - linkLists_[cur_c] = (char*)malloc(size_links_per_element_*curlevel); - memset(linkLists_[cur_c], 0, size_links_per_element_*curlevel); - } - if (currObj != -1) { - - - - - if (curlevel < maxlevelcopy) { - - dist_t curdist = fstdistfunc_(datapoint, getDataByInternalId(currObj), dist_func_param_); - for (int level = maxlevelcopy; level > curlevel; level--) { - - - - bool changed = true; - while (changed) { - changed = false; - int *data; - unique_lock lock(ll_locks[currObj]); - data = (int *)(linkLists_[currObj] + (level - 1) * size_links_per_element_); - int size = *data; - tableint *datal = (tableint *)(data + 1); - for (int i = 0; i < size; i++) { - tableint cand = datal[i]; - if (cand<0 || cand>maxelements_) - throw runtime_error("cand error"); - dist_t d = fstdistfunc_(datapoint, getDataByInternalId(cand), dist_func_param_); - if (d < curdist) { - curdist = d; - currObj = cand; - changed = true; - } - } - } - } - } - - for (int level = min(curlevel, maxlevelcopy); level >= 0; level--) { - if (level > maxlevelcopy || level < 0) - throw runtime_error("Level error"); - - std::priority_queue< std::pair< dist_t, tableint >> topResults = searchBaseLayer(currObj, datapoint, level); - mutuallyConnectNewElement(datapoint, cur_c, topResults, level); - } - - - } - - else { - // Do nothing for the first element - enterpoint_node = 0; - maxlevel_ = curlevel; - - } - - //Releasing lock for the maximum level - if (curlevel > maxlevelcopy) { - enterpoint_node = cur_c; - maxlevel_ = curlevel; - } - }; - std::priority_queue< std::pair< dist_t, labeltype >> searchKnn(void *query_data, int k) { - tableint currObj = enterpoint_node; - dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node), dist_func_param_); - dist_calc++; - for (int level = maxlevel_; level > 0; level--) { - bool changed = true; - while (changed) { - changed = false; - int *data; - data = (int *)(linkLists_[currObj] + (level - 1) * size_links_per_element_); - int size = *data; - tableint *datal = (tableint *)(data + 1); - for (int i = 0; i < size; i++) { - tableint cand = datal[i]; - if (cand<0 || cand>maxelements_) - throw runtime_error("cand error"); - dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); - dist_calc++; - if (d < curdist) { - curdist = d; - currObj = cand; - changed = true; - } - } - } - } - - - std::priority_queue< std::pair< dist_t, tableint >, vector>, CompareByFirst> topResults = searchBaseLayerST(currObj, query_data, ef_); - std::priority_queue< std::pair< dist_t, labeltype >> results; - while (topResults.size() > k) { - topResults.pop(); - } - while (topResults.size() > 0) { - std::pair rez = topResults.top(); - results.push(std::pair(rez.first, getExternalLabel(rez.second))); - topResults.pop(); - } - return results; - }; - - std::priority_queue< std::pair< dist_t, tableint>> searchKnnInternal(void *query_data, int k) { - tableint currObj = enterpoint_node; - dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node), dist_func_param_); - dist_calc++; - for (int level = maxlevel_; level > 0; level--) { - bool changed = true; - while (changed) { - changed = false; - int *data; - data = (int *)(linkLists_[currObj] + (level - 1) * size_links_per_element_); - int size = *data; - tableint *datal = (tableint *)(data + 1); - for (int i = 0; i < size; i++) { - tableint cand = datal[i]; - if (cand<0 || cand>maxelements_) - throw runtime_error("cand error"); - dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); - dist_calc++; - if (d < curdist) { - curdist = d; - currObj = cand; - changed = true; - } - } - } - } - - //std::priority_queue< std::pair< dist_t, tableint >> topResults = searchBaseLayer(currObj, query_data, 0); - std::priority_queue< std::pair< dist_t, tableint >> topResults = searchBaseLayerST(currObj, query_data, ef_); - while (topResults.size() > k) { - topResults.pop(); - } - return topResults; - }; - void SaveIndex(const string &location) - { - - cout << "Saving index to " << location.c_str() << "\n"; - std::ofstream output(location, std::ios::binary); - streampos position; - - writeBinaryPOD(output, offsetLevel0_); - writeBinaryPOD(output, maxelements_); - writeBinaryPOD(output, cur_element_count); - writeBinaryPOD(output, size_data_per_element_); - writeBinaryPOD(output, label_offset_); - writeBinaryPOD(output, offsetData_); - writeBinaryPOD(output, maxlevel_); - writeBinaryPOD(output, enterpoint_node); - writeBinaryPOD(output, maxM_); - - writeBinaryPOD(output, maxM0_); - writeBinaryPOD(output, M_); - writeBinaryPOD(output, mult_); - writeBinaryPOD(output, efConstruction_); - - output.write(data_level0_memory_, maxelements_*size_data_per_element_); - - for (size_t i = 0; i < maxelements_; i++) { - unsigned int linkListSize = elementLevels[i] > 0 ? size_links_per_element_*elementLevels[i] : 0; - writeBinaryPOD(output, linkListSize); - if (linkListSize) - output.write(linkLists_[i], linkListSize); - } - output.close(); - } - - void LoadIndex(const string &location, SpaceInterface *s) - { - - - //cout << "Loading index from " << location; - std::ifstream input(location, std::ios::binary); - streampos position; - - readBinaryPOD(input, offsetLevel0_); - readBinaryPOD(input, maxelements_); - readBinaryPOD(input, cur_element_count); - readBinaryPOD(input, size_data_per_element_); - readBinaryPOD(input, label_offset_); - readBinaryPOD(input, offsetData_); - readBinaryPOD(input, maxlevel_); - readBinaryPOD(input, enterpoint_node); - - readBinaryPOD(input, maxM_); - readBinaryPOD(input, maxM0_); - readBinaryPOD(input, M_); - readBinaryPOD(input, mult_); - readBinaryPOD(input, efConstruction_); - cout << efConstruction_ << "\n"; - - - - data_size_ = s->get_data_size(); - fstdistfunc_ = s->get_dist_func(); - dist_func_param_ = s->get_dist_func_param(); - - data_level0_memory_ = (char *)malloc(maxelements_*size_data_per_element_); - input.read(data_level0_memory_, maxelements_*size_data_per_element_); - - - size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); - - visitedlistpool = new VisitedListPool(1, maxelements_); - - - linkLists_ = (char **)malloc(sizeof(void *) * maxelements_); - cout << maxelements_ << "\n"; - elementLevels = vector(maxelements_); - revSize_ = 1.0 / mult_; - ef_ = 10; - for (size_t i = 0; i < maxelements_; i++) { - unsigned int linkListSize; - readBinaryPOD(input, linkListSize); - if (linkListSize == 0) { - elementLevels[i] = 0; - - linkLists_[i] = nullptr; - } - else { - elementLevels[i] = linkListSize / size_links_per_element_; - linkLists_[i] = (char *)malloc(linkListSize); - input.read(linkLists_[i], linkListSize); - } - } - - - input.close(); - size_t predicted_size_per_element = size_data_per_element_ + sizeof(void*) + 8 + 8 + 2 * 8; - cout << "Loaded index, predicted size=" << maxelements_*(predicted_size_per_element) / (1000 * 1000) << "\n"; - return; - } - }; - -} diff --git a/hnswlib.h b/hnswlib.h deleted file mode 100644 index d9cd79da..00000000 --- a/hnswlib.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once -#ifdef _MSC_VER -#include -#include - -#define __builtin_popcount(t) __popcnt(t) - -#endif - -typedef size_t labeltype; - -#if defined(__GNUC__) -#define PORTABLE_ALIGN32 __attribute__((aligned(32))) -#else -#define PORTABLE_ALIGN32 __declspec(align(32)) -#endif -namespace hnswlib { - //typedef void *labeltype; - //typedef float(*DISTFUNC) (const void *, const void *, const void *); - template - using DISTFUNC = MTYPE(*) (const void *, const void *, const void *); - - - - template class AlgorithmInterface { - public: - //virtual void addPoint(void *, labeltype) = 0; - virtual std::priority_queue< std::pair< dist_t, labeltype >> searchKnn(void *,int) = 0; - }; - template - class SpaceInterface { - public: - //virtual void search(void *); - virtual size_t get_data_size() = 0; - virtual DISTFUNC get_dist_func() = 0; - virtual void *get_dist_func_param() = 0; - - }; -} -#include "L2space.h" -#include "brutoforce.h" -#include "hnswalg.h" \ No newline at end of file diff --git a/hnswlib/L2space.h b/hnswlib/L2space.h new file mode 100644 index 00000000..abb27389 --- /dev/null +++ b/hnswlib/L2space.h @@ -0,0 +1,248 @@ + +#pragma once +#ifdef _MSC_VER +#include +#include + +#define __builtin_popcount(t) __popcnt(t) +#else + +#include + +#endif +#define USE_AVX +#if defined(__GNUC__) +#define PORTABLE_ALIGN32 __attribute__((aligned(32))) +#else +#define PORTABLE_ALIGN32 __declspec(align(32)) +#endif + + +#include "hnswlib.h" + +namespace hnswlib { + using namespace std; + + static float + L2Sqr(const void *pVect1, const void *pVect2, const void *qty_ptr) { + //return *((float *)pVect2); + size_t qty = *((size_t *) qty_ptr); + float res = 0; + for (int i = 0; i < qty; i++) { + float t = ((float *) pVect1)[i] - ((float *) pVect2)[i]; + res += t * t; + } + return (res); + + }; + + + static float + L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + float *pVect1 = (float *) pVect1v; + float *pVect2 = (float *) pVect2v; + size_t qty = *((size_t *) qty_ptr); + float PORTABLE_ALIGN32 TmpRes[8]; +#ifdef USE_AVX + size_t qty16 = qty >> 4; + + const float *pEnd1 = pVect1 + (qty16 << 4); + + __m256 diff, v1, v2; + __m256 sum = _mm256_set1_ps(0); + + while (pVect1 < pEnd1) { + v1 = _mm256_loadu_ps(pVect1); + pVect1 += 8; + v2 = _mm256_loadu_ps(pVect2); + pVect2 += 8; + diff = _mm256_sub_ps(v1, v2); + sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); + + v1 = _mm256_loadu_ps(pVect1); + pVect1 += 8; + v2 = _mm256_loadu_ps(pVect2); + pVect2 += 8; + diff = _mm256_sub_ps(v1, v2); + sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); + } + + _mm256_store_ps(TmpRes, sum); + float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; + + return (res); +#else + // size_t qty4 = qty >> 2; + size_t qty16 = qty >> 4; + + const float *pEnd1 = pVect1 + (qty16 << 4); + // const float* pEnd2 = pVect1 + (qty4 << 2); + // const float* pEnd3 = pVect1 + qty; + + __m128 diff, v1, v2; + __m128 sum = _mm_set1_ps(0); + + while (pVect1 < pEnd1) { + //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); + + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); + + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); + + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); + } + _mm_store_ps(TmpRes, sum); + float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; + + return (res); +#endif + }; + + + static float + L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + float PORTABLE_ALIGN32 TmpRes[8]; + float *pVect1 = (float *) pVect1v; + float *pVect2 = (float *) pVect2v; + size_t qty = *((size_t *) qty_ptr); + + + // size_t qty4 = qty >> 2; + size_t qty16 = qty >> 2; + + const float *pEnd1 = pVect1 + (qty16 << 2); + + __m128 diff, v1, v2; + __m128 sum = _mm_set1_ps(0); + + while (pVect1 < pEnd1) { + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); + } + _mm_store_ps(TmpRes, sum); + float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; + + return (res); + }; + + class L2Space : public SpaceInterface { + + DISTFUNC fstdistfunc_; + size_t data_size_; + size_t dim_; + public: + L2Space(size_t dim) { + fstdistfunc_ = L2Sqr; + if (dim % 4 == 0) + fstdistfunc_ = L2SqrSIMD4Ext; + if (dim % 16 == 0) + fstdistfunc_ = L2SqrSIMD16Ext; + /*else{ + throw runtime_error("Data type not supported!"); + }*/ + dim_ = dim; + data_size_ = dim * sizeof(float); + } + + size_t get_data_size() { + return data_size_; + } + + DISTFUNC get_dist_func() { + return fstdistfunc_; + } + + void *get_dist_func_param() { + return &dim_; + } + + }; + + static int + L2SqrI(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) { + + size_t qty = *((size_t *) qty_ptr); + int res = 0; + unsigned char *a = (unsigned char *) pVect1; + unsigned char *b = (unsigned char *) pVect2; + /*for (int i = 0; i < qty; i++) { + int t = int((a)[i]) - int((b)[i]); + res += t*t; + }*/ + + qty = qty >> 2; + for (size_t i = 0; i < qty; i++) { + + res += ((*a) - (*b)) * ((*a) - (*b)); + a++; + b++; + res += ((*a) - (*b)) * ((*a) - (*b)); + a++; + b++; + res += ((*a) - (*b)) * ((*a) - (*b)); + a++; + b++; + res += ((*a) - (*b)) * ((*a) - (*b)); + a++; + b++; + + + } + + return (res); + + }; + + class L2SpaceI : public SpaceInterface { + + DISTFUNC fstdistfunc_; + size_t data_size_; + size_t dim_; + public: + L2SpaceI(size_t dim) { + fstdistfunc_ = L2SqrI; + dim_ = dim; + data_size_ = dim * sizeof(unsigned char); + } + + size_t get_data_size() { + return data_size_; + } + + DISTFUNC get_dist_func() { + return fstdistfunc_; + } + + void *get_dist_func_param() { + return &dim_; + } + + }; + + +}; \ No newline at end of file diff --git a/hnswlib/brutoforce.h b/hnswlib/brutoforce.h new file mode 100644 index 00000000..15c0e5dd --- /dev/null +++ b/hnswlib/brutoforce.h @@ -0,0 +1,71 @@ +#pragma once + +#include + +namespace hnswlib { + template + class BruteforceSearch : public AlgorithmInterface { + public: + BruteforceSearch(SpaceInterface *s) { + + } + + BruteforceSearch(SpaceInterface *s, size_t maxElements) { + maxelements_ = maxElements; + + data_size_ = s->get_data_size(); + fstdistfunc_ = s->get_dist_func(); + dist_func_param_ = s->get_dist_func_param(); + cout << data_size_ << "\n"; + size_per_element_ = data_size_ + sizeof(labeltype); + data_ = (char *) malloc(maxElements * size_per_element_); + cur_element_count = 0; + } + + ~BruteforceSearch() { + free(data_); + } + + char *data_; + size_t maxelements_; + size_t cur_element_count; + size_t size_per_element_; + + size_t data_size_; + DISTFUNC fstdistfunc_; + void *dist_func_param_; + + void addPoint(void *datapoint, labeltype label) { + + if (cur_element_count >= maxelements_) { + cout << "The number of elements exceeds the specified limit\n"; + throw exception(); + }; + memcpy(data_ + size_per_element_ * cur_element_count + data_size_, &label, sizeof(labeltype)); + memcpy(data_ + size_per_element_ * cur_element_count, datapoint, data_size_); + cur_element_count++; + }; + + std::priority_queue> searchKnn(void *query_data, int k) { + std::priority_queue> topResults; + for (int i = 0; i < k; i++) { + dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_); + topResults.push(std::pair(dist, *((labeltype *) (data_ + size_per_element_ * i + + data_size_)))); + } + dist_t lastdist = topResults.top().first; + for (int i = k; i < cur_element_count; i++) { + dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_); + if (dist < lastdist) { + topResults.push(std::pair(dist, *((labeltype *) (data_ + size_per_element_ * i + + data_size_)))); + if (topResults.size() > k) + topResults.pop(); + lastdist = topResults.top().first; + } + + } + return topResults; + }; + }; +} diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h new file mode 100644 index 00000000..3d03e05f --- /dev/null +++ b/hnswlib/hnswalg.h @@ -0,0 +1,730 @@ +#pragma once + +#include "visited_list_pool.h" +#include "hnswlib.h" +#include +#include +#include +#include +#include +#include +#include +#include +template +void writeBinaryPOD(std::ostream &out, const T &podRef) { + out.write((char *) &podRef, sizeof(T)); +} + +template +static void readBinaryPOD(std::istream &in, T &podRef) { + in.read((char *) &podRef, sizeof(T)); +} + + +namespace hnswlib { + typedef unsigned int tableint; + typedef unsigned int linklistsizeint; + + template + class HierarchicalNSW : public AlgorithmInterface { + public: + + HierarchicalNSW(SpaceInterface *s) { + + } + + HierarchicalNSW(SpaceInterface *s, const string &location, bool nmslib = false) { + loadIndex(location, s); + } + + HierarchicalNSW(SpaceInterface *s, size_t max_elements, size_t M = 16, size_t ef_construction = 200, + bool allow_removal = false) : + link_list_locks_(max_elements), element_levels_(max_elements) { + max_elements_ = max_elements; + + + data_size_ = s->get_data_size(); + fstdistfunc_ = s->get_dist_func(); + dist_func_param_ = s->get_dist_func_param(); + M_ = M; + maxM_ = M_; + maxM0_ = M_ * 2; + ef_construction_ = ef_construction; + ef_ = 7; + + + + size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint); + size_data_per_element_ = size_links_level0_ + data_size_ + sizeof(labeltype); + offsetData_ = size_links_level0_; + label_offset_ = size_links_level0_ + data_size_; + offsetLevel0_ = 0; + + data_level0_memory_ = (char *) malloc(max_elements_ * size_data_per_element_); + if (data_level0_memory_ == nullptr) + throw new std::runtime_error("Not enough memory"); + + cur_element_count = 0; + + visited_list_pool_ = new VisitedListPool(1, max_elements); + + + + //initializations for special treatment of the first node + enterpoint_node_ = -1; + maxlevel_ = -1; + + linkLists_ = (char **) malloc(sizeof(void *) * max_elements_); + size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); + mult_ = 1 / log(1.0 * M_); + revSize_ = 1.0 / mult_; + } + + struct CompareByFirst { + constexpr bool operator()(pair const &a, + pair const &b) const noexcept { + return a.first < b.first; + } + }; + + ~HierarchicalNSW() { + + free(data_level0_memory_); + for (tableint i = 0; i < cur_element_count; i++) { + if (element_levels_[i] > 0) + free(linkLists_[i]); + } + free(linkLists_); + delete visited_list_pool_; + } + + size_t max_elements_; + size_t cur_element_count; + size_t size_data_per_element_; + size_t size_links_per_element_; + + size_t M_; + size_t maxM_; + size_t maxM0_; + size_t ef_construction_; + + double mult_, revSize_; + int maxlevel_; + + + VisitedListPool *visited_list_pool_; + mutex cur_element_count_guard_; + + vector link_list_locks_; + tableint enterpoint_node_; + + //size_t dist_calc_counter_; + std::atomic dist_calc_counter_; + size_t size_links_level0_; + size_t offsetData_, offsetLevel0_; + + + char *data_level0_memory_; + char **linkLists_; + vector element_levels_; + + + size_t data_size_; + size_t label_offset_; + DISTFUNC fstdistfunc_; + void *dist_func_param_; + + std::default_random_engine level_generator_ = std::default_random_engine(100); + + inline labeltype getExternalLabel(tableint internal_id) { + return *((labeltype *) (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_)); + } + + inline labeltype *getExternalLabeLp(tableint internal_id) { + return (labeltype *) (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_); + } + + inline char *getDataByInternalId(tableint internal_id) { + return (data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_); + } + + int getRandomLevel(double reverse_size) { + std::uniform_real_distribution distribution(0.0, 1.0); + double r = -log(distribution(level_generator_)) * reverse_size; + return (int) r; + } + + std::priority_queue, vector>, CompareByFirst> + searchBaseLayer(tableint enterpoint_id, void *data_point, int layer) { + VisitedList *vl = visited_list_pool_->getFreeVisitedList(); + vl_type *visited_array = vl->mass; + vl_type visited_array_tag = vl->curV; + + std::priority_queue, vector>, CompareByFirst> top_candidates; + std::priority_queue, vector>, CompareByFirst> candidateSet; + dist_t dist = fstdistfunc_(data_point, getDataByInternalId(enterpoint_id), dist_func_param_); + + top_candidates.emplace(dist, enterpoint_id); + candidateSet.emplace(-dist, enterpoint_id); + visited_array[enterpoint_id] = visited_array_tag; + dist_t lowerBound = dist; + + while (!candidateSet.empty()) { + + std::pair curr_el_pair = candidateSet.top(); + + if ((-curr_el_pair.first) > lowerBound) { + break; + } + candidateSet.pop(); + + tableint curNodeNum = curr_el_pair.second; + + unique_lock lock(link_list_locks_[curNodeNum]); + + int *data;// = (int *)(linkList0_ + curNodeNum * size_links_per_element0_); + if (layer == 0) + data = (int *) (data_level0_memory_ + curNodeNum * size_data_per_element_ + offsetLevel0_); + else + data = (int *) (linkLists_[curNodeNum] + (layer - 1) * size_links_per_element_); + int size = *data; + tableint *datal = (tableint *) (data + 1); + _mm_prefetch((char *) (visited_array + *(data + 1)), _MM_HINT_T0); + _mm_prefetch((char *) (visited_array + *(data + 1) + 64), _MM_HINT_T0); + _mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0); + _mm_prefetch(getDataByInternalId(*(datal + 1)), _MM_HINT_T0); + + for (int j = 0; j < size; j++) { + tableint candidate_id = *(datal + j); + _mm_prefetch((char *) (visited_array + *(datal + j + 1)), _MM_HINT_T0); + _mm_prefetch(getDataByInternalId(*(datal + j + 1)), _MM_HINT_T0); + if (visited_array[candidate_id] == visited_array_tag) continue; + visited_array[candidate_id] = visited_array_tag; + char *currObj1 = (getDataByInternalId(candidate_id)); + + dist_t dist1 = fstdistfunc_(data_point, currObj1, dist_func_param_); + if (top_candidates.top().first > dist1 || top_candidates.size() < ef_construction_) { + candidateSet.emplace(-dist1, candidate_id); + _mm_prefetch(getDataByInternalId(candidateSet.top().second), _MM_HINT_T0); + top_candidates.emplace(dist1, candidate_id); + if (top_candidates.size() > ef_construction_) { + top_candidates.pop(); + } + lowerBound = top_candidates.top().first; + } + } + } + visited_list_pool_->releaseVisitedList(vl); + + return top_candidates; + } + + std::priority_queue, vector>, CompareByFirst> + searchBaseLayerST(tableint ep_id, void *data_point, size_t ef) { + VisitedList *vl = visited_list_pool_->getFreeVisitedList(); + vl_type *visited_array = vl->mass; + vl_type visited_array_tag = vl->curV; + + std::priority_queue, vector>, CompareByFirst> top_candidates; + std::priority_queue, vector>, CompareByFirst> candidate_set; + dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), dist_func_param_); + dist_calc_counter_++; + top_candidates.emplace(dist, ep_id); + candidate_set.emplace(-dist, ep_id); + visited_array[ep_id] = visited_array_tag; + dist_t lower_bound = dist; + + while (!candidate_set.empty()) { + + std::pair current_node_pair = candidate_set.top(); + + if ((-current_node_pair.first) > lower_bound) { + break; + } + candidate_set.pop(); + + tableint current_node_id = current_node_pair.second; + int *data = (int *) (data_level0_memory_ + current_node_id * size_data_per_element_ + offsetLevel0_); + int size = *data; + _mm_prefetch((char *) (visited_array + *(data + 1)), _MM_HINT_T0); + _mm_prefetch((char *) (visited_array + *(data + 1) + 64), _MM_HINT_T0); + _mm_prefetch(data_level0_memory_ + (*(data + 1)) * size_data_per_element_ + offsetData_, _MM_HINT_T0); + _mm_prefetch((char *) (data + 2), _MM_HINT_T0); + + for (int j = 1; j <= size; j++) { + int candidate_id = *(data + j); + _mm_prefetch((char *) (visited_array + *(data + j + 1)), _MM_HINT_T0); + _mm_prefetch(data_level0_memory_ + (*(data + j + 1)) * size_data_per_element_ + offsetData_, + _MM_HINT_T0);//////////// + if (!(visited_array[candidate_id] == visited_array_tag)) { + + visited_array[candidate_id] = visited_array_tag; + + char *currObj1 = (getDataByInternalId(candidate_id)); + dist_t dist = fstdistfunc_(data_point, currObj1, dist_func_param_); + dist_calc_counter_++; + if (top_candidates.top().first > dist || top_candidates.size() < ef) { + candidate_set.emplace(-dist, candidate_id); + _mm_prefetch(data_level0_memory_ + candidate_set.top().second * size_data_per_element_ + + offsetLevel0_,/////////// + _MM_HINT_T0);//////////////////////// + + top_candidates.emplace(dist, candidate_id); + + if (top_candidates.size() > ef) { + top_candidates.pop(); + } + lower_bound = top_candidates.top().first; + } + } + } + } + + visited_list_pool_->releaseVisitedList(vl); + return top_candidates; + } + + void getNeighborsByHeuristic2( + std::priority_queue, vector>, CompareByFirst> &top_candidates, + const int M) { + if (top_candidates.size() < M) { + return; + } + std::priority_queue> queue_closest; + vector> return_list; + while (top_candidates.size() > 0) { + queue_closest.emplace(-top_candidates.top().first, top_candidates.top().second); + top_candidates.pop(); + } + + while (queue_closest.size()) { + if (return_list.size() >= M) + break; + std::pair curent_pair = queue_closest.top(); + dist_t dist_to_query = -curent_pair.first; + queue_closest.pop(); + bool good = true; + for (std::pair second_pair : return_list) { + dist_t curdist = + fstdistfunc_(getDataByInternalId(second_pair.second), + getDataByInternalId(curent_pair.second), + dist_func_param_);; + if (curdist < dist_to_query) { + good = false; + break; + } + } + if (good) { + return_list.push_back(curent_pair); + } + + + } + + for (std::pair curent_pair : return_list) { + + top_candidates.emplace(-curent_pair.first, curent_pair.second); + } + } + + + linklistsizeint *get_linklist0(tableint internal_id) { + return (linklistsizeint *) (data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); + }; + + linklistsizeint *get_linklist0(tableint internal_id, char *data_level0_memory_) { + return (linklistsizeint *) (data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); + }; + + linklistsizeint *get_linklist(tableint internal_id, int level) { + return (linklistsizeint *) (linkLists_[internal_id] + (level - 1) * size_links_per_element_); + }; + + void mutuallyConnectNewElement(void *data_point, tableint cur_c, + std::priority_queue, vector>, CompareByFirst> top_candidates, + int level) { + + size_t Mcurmax = level ? maxM_ : maxM0_; + getNeighborsByHeuristic2(top_candidates, M_); + if (top_candidates.size() > M_) + throw runtime_error("Should be not be more than M_ candidates returned by the heuristic"); + + vector selectedNeighbors; + selectedNeighbors.reserve(M_); + while (top_candidates.size() > 0) { + selectedNeighbors.push_back(top_candidates.top().second); + top_candidates.pop(); + } + { + linklistsizeint *ll_cur; + if (level == 0) + ll_cur = get_linklist0(cur_c); + else + ll_cur = get_linklist(cur_c, level); + + if (*ll_cur) { + cout << *ll_cur << "\n"; + cout << element_levels_[cur_c] << "\n"; + cout << level << "\n"; + throw runtime_error("The newly inserted element should have blank link list"); + } + *ll_cur = selectedNeighbors.size(); + tableint *data = (tableint *) (ll_cur + 1); + + + for (int idx = 0; idx < selectedNeighbors.size(); idx++) { + if (data[idx]) + throw runtime_error("Possible memory corruption"); + if (level > element_levels_[selectedNeighbors[idx]]) + throw runtime_error("Trying to make a link on a non-existent level"); + + data[idx] = selectedNeighbors[idx]; + + } + } + for (int idx = 0; idx < selectedNeighbors.size(); idx++) { + + unique_lock lock(link_list_locks_[selectedNeighbors[idx]]); + + + linklistsizeint *ll_other; + if (level == 0) + ll_other = get_linklist0(selectedNeighbors[idx]); + else + ll_other = get_linklist(selectedNeighbors[idx], level); + int sz_link_list_other = *ll_other; + + + if (sz_link_list_other > Mcurmax || sz_link_list_other < 0) + throw runtime_error("Bad value of sz_link_list_other"); + if (selectedNeighbors[idx] == cur_c) + throw runtime_error("Trying to connect an element to itself"); + if (level > element_levels_[selectedNeighbors[idx]]) + throw runtime_error("Trying to make a link on a non-existent level"); + + tableint *data = (tableint *) (ll_other + 1); + if (sz_link_list_other < Mcurmax) { + data[sz_link_list_other] = cur_c; + *ll_other = sz_link_list_other + 1; +// cout<<"BB"; + } else { + // finding the "weakest" element to replace it with the new one + dist_t d_max = fstdistfunc_(getDataByInternalId(cur_c), getDataByInternalId(selectedNeighbors[idx]), + dist_func_param_); + // Heuristic: + std::priority_queue, vector>, CompareByFirst> candidates; + candidates.emplace(d_max, cur_c); + + for (int j = 0; j < sz_link_list_other; j++) { + candidates.emplace( + fstdistfunc_(getDataByInternalId(data[j]), getDataByInternalId(selectedNeighbors[idx]), + dist_func_param_), data[j]); + } + + getNeighborsByHeuristic2(candidates, Mcurmax); + + int indx = 0; + while (candidates.size() > 0) { + data[indx] = candidates.top().second; + candidates.pop(); + indx++; + } + *ll_other = indx; + // Nearest K: + /*int indx = -1; + for (int j = 0; j < sz_link_list_other; j++) { + dist_t d = fstdistfunc_(getDataByInternalId(data[j]), getDataByInternalId(rez[idx]), dist_func_param_); + if (d > d_max) { + indx = j; + d_max = d; + } + } + if (indx >= 0) { + data[indx] = cur_c; + } */ + } + + } + } + + mutex global; + size_t ef_; + + void setEf(size_t ef) { + ef_ = ef; + } + + + std::priority_queue> searchKnnInternal(void *query_data, int k) { + tableint currObj = enterpoint_node_; + dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); + dist_calc_counter_++; + for (int level = maxlevel_; level > 0; level--) { + bool changed = true; + while (changed) { + changed = false; + int *data; + data = (int *) (linkLists_[currObj] + (level - 1) * size_links_per_element_); + int size = *data; + tableint *datal = (tableint *) (data + 1); + for (int i = 0; i < size; i++) { + tableint cand = datal[i]; + if (cand < 0 || cand > max_elements_) + throw runtime_error("cand error"); + dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); + dist_calc_counter_++; + if (d < curdist) { + curdist = d; + currObj = cand; + changed = true; + } + } + } + } + + //std::priority_queue< std::pair< dist_t, tableint >> top_candidates = searchBaseLayer(currObj, query_data, 0); + std::priority_queue> top_candidates = searchBaseLayerST(currObj, query_data, + ef_); + while (top_candidates.size() > k) { + top_candidates.pop(); + } + return top_candidates; + }; + + void saveIndex(const string &location) { + + cout << "Saving index to " << location.c_str() << "\n"; + std::ofstream output(location, std::ios::binary); + streampos position; + + writeBinaryPOD(output, offsetLevel0_); + writeBinaryPOD(output, max_elements_); + writeBinaryPOD(output, cur_element_count); + writeBinaryPOD(output, size_data_per_element_); + writeBinaryPOD(output, label_offset_); + writeBinaryPOD(output, offsetData_); + writeBinaryPOD(output, maxlevel_); + writeBinaryPOD(output, enterpoint_node_); + writeBinaryPOD(output, maxM_); + + writeBinaryPOD(output, maxM0_); + writeBinaryPOD(output, M_); + writeBinaryPOD(output, mult_); + writeBinaryPOD(output, ef_construction_); + + output.write(data_level0_memory_, max_elements_ * size_data_per_element_); + + for (size_t i = 0; i < max_elements_; i++) { + unsigned int linkListSize = element_levels_[i] > 0 ? size_links_per_element_ * element_levels_[i] : 0; + writeBinaryPOD(output, linkListSize); + if (linkListSize) + output.write(linkLists_[i], linkListSize); + } + output.close(); + } + + void loadIndex(const string &location, SpaceInterface *s) { + + + //cout << "Loading index from " << location; + std::ifstream input(location, std::ios::binary); + streampos position; + + readBinaryPOD(input, offsetLevel0_); + readBinaryPOD(input, max_elements_); + readBinaryPOD(input, cur_element_count); + readBinaryPOD(input, size_data_per_element_); + readBinaryPOD(input, label_offset_); + readBinaryPOD(input, offsetData_); + readBinaryPOD(input, maxlevel_); + readBinaryPOD(input, enterpoint_node_); + + readBinaryPOD(input, maxM_); + readBinaryPOD(input, maxM0_); + readBinaryPOD(input, M_); + readBinaryPOD(input, mult_); + readBinaryPOD(input, ef_construction_); + cout << ef_construction_ << "\n"; + + + data_size_ = s->get_data_size(); + fstdistfunc_ = s->get_dist_func(); + dist_func_param_ = s->get_dist_func_param(); + + data_level0_memory_ = (char *) malloc(max_elements_ * size_data_per_element_); + input.read(data_level0_memory_, max_elements_ * size_data_per_element_); + + + size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); + + visited_list_pool_ = new VisitedListPool(1, max_elements_); + + + linkLists_ = (char **) malloc(sizeof(void *) * max_elements_); + cout << max_elements_ << "\n"; + element_levels_ = vector(max_elements_); + revSize_ = 1.0 / mult_; + ef_ = 10; + for (size_t i = 0; i < max_elements_; i++) { + unsigned int linkListSize; + readBinaryPOD(input, linkListSize); + if (linkListSize == 0) { + element_levels_[i] = 0; + + linkLists_[i] = nullptr; + } else { + element_levels_[i] = linkListSize / size_links_per_element_; + linkLists_[i] = (char *) malloc(linkListSize); + input.read(linkLists_[i], linkListSize); + } + } + + + input.close(); + size_t predicted_size_per_element = size_data_per_element_ + sizeof(void *) + 8 + 8 + 2 * 8; + cout << "Loaded index, predicted size=" << max_elements_ * (predicted_size_per_element) / (1000 * 1000) + << "\n"; + return; + } + + tableint addPoint(void *data_point, labeltype label, int level = -1) { + + tableint cur_c = 0; + { + unique_lock lock(cur_element_count_guard_); + if (cur_element_count >= max_elements_) { + cout << "The number of elements exceeds the specified limit\n"; + throw runtime_error("The number of elements exceeds the specified limit"); + }; + cur_c = cur_element_count; + cur_element_count++; + } + unique_lock lock_el(link_list_locks_[cur_c]); + int curlevel = getRandomLevel(mult_); + if (level > 0) + curlevel = level; + + element_levels_[cur_c] = curlevel; + + + unique_lock templock(global); + int maxlevelcopy = maxlevel_; + if (curlevel <= maxlevelcopy) + templock.unlock(); + tableint currObj = enterpoint_node_; + + + memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, 0, size_data_per_element_); + + // Initialisation of the data and label + memcpy(getExternalLabeLp(cur_c), &label, sizeof(labeltype)); + memcpy(getDataByInternalId(cur_c), data_point, data_size_); + + + if (curlevel) { + linkLists_[cur_c] = (char *) malloc(size_links_per_element_ * curlevel); + memset(linkLists_[cur_c], 0, size_links_per_element_ * curlevel); + } + if (currObj != -1) { + + + if (curlevel < maxlevelcopy) { + + dist_t curdist = fstdistfunc_(data_point, getDataByInternalId(currObj), dist_func_param_); + for (int level = maxlevelcopy; level > curlevel; level--) { + + + bool changed = true; + while (changed) { + changed = false; + int *data; + unique_lock lock(link_list_locks_[currObj]); + data = (int *) (linkLists_[currObj] + (level - 1) * size_links_per_element_); + int size = *data; + tableint *datal = (tableint *) (data + 1); + for (int i = 0; i < size; i++) { + tableint cand = datal[i]; + if (cand < 0 || cand > max_elements_) + throw runtime_error("cand error"); + dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_); + if (d < curdist) { + curdist = d; + currObj = cand; + changed = true; + } + } + } + } + } + + for (int level = min(curlevel, maxlevelcopy); level >= 0; level--) { + if (level > maxlevelcopy || level < 0) + throw runtime_error("Level error"); + + std::priority_queue, vector>, CompareByFirst> top_candidates = searchBaseLayer( + currObj, data_point, level); + mutuallyConnectNewElement(data_point, cur_c, top_candidates, level); + } + + + } else { + // Do nothing for the first element + enterpoint_node_ = 0; + maxlevel_ = curlevel; + + } + + //Releasing lock for the maximum level + if (curlevel > maxlevelcopy) { + enterpoint_node_ = cur_c; + maxlevel_ = curlevel; + } + return cur_c; + }; + + std::priority_queue> searchKnn(void *query_data, int k) { + tableint currObj = enterpoint_node_; + dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); + dist_calc_counter_++; + for (int level = maxlevel_; level > 0; level--) { + bool changed = true; + while (changed) { + changed = false; + int *data; + data = (int *) (linkLists_[currObj] + (level - 1) * size_links_per_element_); + int size = *data; + tableint *datal = (tableint *) (data + 1); + for (int i = 0; i < size; i++) { + tableint cand = datal[i]; + if (cand < 0 || cand > max_elements_) + throw runtime_error("cand error"); + dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); + dist_calc_counter_++; + if (d < curdist) { + curdist = d; + currObj = cand; + changed = true; + } + } + } + } + + + std::priority_queue, vector>, CompareByFirst> top_candidates = searchBaseLayerST( + currObj, query_data, ef_); + std::priority_queue> results; + while (top_candidates.size() > k) { + top_candidates.pop(); + } + while (top_candidates.size() > 0) { + std::pair rez = top_candidates.top(); + results.push(std::pair(rez.first, getExternalLabel(rez.second))); + top_candidates.pop(); + } + return results; + }; + + + }; + +} diff --git a/hnswlib/hnswlib.h b/hnswlib/hnswlib.h new file mode 100644 index 00000000..23430145 --- /dev/null +++ b/hnswlib/hnswlib.h @@ -0,0 +1,48 @@ +#pragma once +#ifdef _MSC_VER +#include +#include + +#define __builtin_popcount(t) __popcnt(t) + +#endif + + +#include + +#if defined(__GNUC__) +#define PORTABLE_ALIGN32 __attribute__((aligned(32))) +#else +#define PORTABLE_ALIGN32 __declspec(align(32)) +#endif + +namespace hnswlib { + typedef size_t labeltype; + + template + using DISTFUNC = MTYPE(*)(const void *, const void *, const void *); + + + template + class AlgorithmInterface { + public: + //virtual void addPoint(void *, labeltype) = 0; + virtual std::priority_queue> searchKnn(void *, int) = 0; + }; + + template + class SpaceInterface { + public: + //virtual void search(void *); + virtual size_t get_data_size() = 0; + + virtual DISTFUNC get_dist_func() = 0; + + virtual void *get_dist_func_param() = 0; + + }; +} + +#include "L2space.h" +#include "brutoforce.h" +#include "hnswalg.h" \ No newline at end of file diff --git a/hnswlib/visited_list_pool.h b/hnswlib/visited_list_pool.h new file mode 100644 index 00000000..28a088ab --- /dev/null +++ b/hnswlib/visited_list_pool.h @@ -0,0 +1,79 @@ +#pragma once + +#include +#include + +namespace hnswlib { + typedef unsigned short int vl_type; + + class VisitedList { + public: + vl_type curV; + vl_type *mass; + unsigned int numelements; + + VisitedList(int numelements1) { + curV = -1; + numelements = numelements1; + mass = new vl_type[numelements]; + } + + void reset() { + curV++; + if (curV == 0) { + memset(mass, 0, sizeof(vl_type) * numelements); + curV++; + } + }; + + ~VisitedList() { delete mass; } + }; +/////////////////////////////////////////////////////////// +// +// Class for multi-threaded pool-management of VisitedLists +// +///////////////////////////////////////////////////////// + + class VisitedListPool { + deque pool; + mutex poolguard; + int maxpools; + int numelements; + + public: + VisitedListPool(int initmaxpools, int numelements1) { + numelements = numelements1; + for (int i = 0; i < initmaxpools; i++) + pool.push_front(new VisitedList(numelements)); + } + + VisitedList *getFreeVisitedList() { + VisitedList *rez; + { + unique_lock lock(poolguard); + if (pool.size() > 0) { + rez = pool.front(); + pool.pop_front(); + } else { + rez = new VisitedList(numelements); + } + } + rez->reset(); + return rez; + }; + + void releaseVisitedList(VisitedList *vl) { + unique_lock lock(poolguard); + pool.push_front(vl); + }; + + ~VisitedListPool() { + while (pool.size()) { + VisitedList *rez = pool.front(); + pool.pop_front(); + delete rez; + } + }; + }; +} + diff --git a/main.cpp b/main.cpp index 5e834d21..6c8acc9b 100644 --- a/main.cpp +++ b/main.cpp @@ -1,8 +1,8 @@ -#include + void sift_test1B(); +int main() { + sift_test1B(); -int main() { - sift_test1B(); - return 0; + return 0; }; \ No newline at end of file diff --git a/python_bindings/bindings.cpp b/python_bindings/bindings.cpp new file mode 100644 index 00000000..29bf0d97 --- /dev/null +++ b/python_bindings/bindings.cpp @@ -0,0 +1,194 @@ +#include +#include +#include +#include "../hnswlib/hnswlib.h" +#include + +namespace py = pybind11; + +template +class Index { +public: + Index(const std::string &space_name, const int dim) : + space_name(space_name),dim(dim) { + l2space = new hnswlib::L2Space(dim); + appr_alg = NULL; + ep_added = true; + index_inited = false; + num_threads = omp_get_max_threads(); + } + void init_new_index(const size_t maxElements, const size_t M, const size_t efConstruction){ + cur_l=0; + appr_alg = new hnswlib::HierarchicalNSW(l2space, maxElements, M, efConstruction, false); + index_inited = true; + ep_added = false; + } + void set_ef(size_t ef){ + appr_alg->ef_=ef; + } + + void set_num_threads(int num_threads) { + this->num_threads = num_threads; + omp_set_num_threads(num_threads); + } + void saveIndex(const std::string &path_to_index){ + appr_alg->saveIndex(path_to_index); + } + void loadIndex(const std::string &path_to_index){ + appr_alg = new hnswlib::HierarchicalNSW(l2space, path_to_index); + } + py::object addItems(py::object input, py::object ids_ = py::none()) { + py::array_t < dist_t, py::array::c_style | py::array::forcecast > items(input); + auto buffer = items.request(); + size_t rows = buffer.shape[0], features = buffer.shape[1]; + + std::vector ids; + + if (!ids_.is_none()) { + py::array_t < size_t, py::array::c_style | py::array::forcecast > items(ids_); + auto ids_numpy = items.request(); + std::vector ids1(ids_numpy.shape[0]); + for(size_t i=0;iaddPoint((void *) items.data(0), (size_t) id); + start=1; + ep_added = true; + } + if (num_threads == 1) { + for (size_t row = start; row < rows; row++) { + size_t id = ids.size() ? ids.at(row) : (cur_l++); + data_numpy[row] = appr_alg->addPoint((void *) items.data(row), (size_t) id); + } + } else { +#pragma omp parallel for + for (size_t row = start; row < rows; row++) { + size_t id = ids.size() ? ids.at(row) : (cur_l++); + data_numpy[row] = appr_alg->addPoint((void *) items.data(row), (size_t) id); + } + } + + } + py::capsule free_when_done(data_numpy, [](void *f) { + delete[] f; + }); + + return py::array_t( + {rows}, // shape + {sizeof(hnswlib::tableint)}, // C-style contiguous strides for double + data_numpy, // the data pointer + free_when_done); + + + } + + py::object knnQuery_return_numpy(py::object input, size_t k = 1) { + + py::array_t < dist_t, py::array::c_style | py::array::forcecast > items(input); + auto buffer = items.request(); + hnswlib::labeltype *data_numpy_l; + dist_t *data_numpy_d; + size_t rows, features; + { + //py::gil_scoped_release l; + + if (buffer.ndim != 2) throw std::runtime_error("data must be a 2d array"); + + rows = buffer.shape[0]; + features = buffer.shape[1]; + + + data_numpy_l = new hnswlib::labeltype[rows * k]; + data_numpy_d = new dist_t[rows * k]; + +#pragma omp parallel for + for (size_t row = 0; row < rows; row++) { + std::priority_queue> result = appr_alg->searchKnn( + (void *) items.data(row), k); + if (result.size() != k) + std::runtime_error( + "Cannot return the results in a contigious 2D array. Probably ef or M is to small"); + for (int i = 0; i < k; i++) { + auto &result_tuple = result.top(); + data_numpy_d[row * k + i] = result_tuple.first; + data_numpy_l[row * k + i] = result_tuple.second; + } + } + } + py::capsule free_when_done_l(data_numpy_l, [](void *f) { + delete[] f; + }); + py::capsule free_when_done_d(data_numpy_d, [](void *f) { + delete[] f; + }); + + + return py::make_tuple( + py::array_t( + {rows,k}, // shape + {k*sizeof(hnswlib::labeltype),sizeof(hnswlib::labeltype)}, // C-style contiguous strides for double + data_numpy_l, // the data pointer + free_when_done_l), + py::array_t( + {rows,k}, // shape + {k*sizeof(dist_t),sizeof(dist_t)}, // C-style contiguous strides for double + data_numpy_d, // the data pointer + free_when_done_d)); + + } + + std::string space_name; + int dim; + + + bool index_inited; + bool ep_added; + int num_threads; + hnswlib::labeltype cur_l; + hnswlib::HierarchicalNSW *appr_alg; + hnswlib::L2Space *l2space; + ~Index(){ + delete l2space; + if (appr_alg) + delete appr_alg; + } +}; + +PYBIND11_PLUGIN(hnswlib) { + py::module m("hnswlib"); + + py::class_>(m, "Index") + .def(py::init(),py::arg("space"), py::arg("dim")) + .def("init_index", &Index::init_new_index,py::arg("max_elements"), py::arg("M")=16, py::arg("ef_construction")=200) + .def("knn_query", &Index::knnQuery_return_numpy,py::arg("data"), py::arg("k")=1) + .def("add_items", &Index::addItems,py::arg("data"), py::arg("ids") = py::none()) + .def("set_ef", &Index::set_ef,py::arg("ef")) + .def("set_num_threads", &Index::set_num_threads, py::arg("num_threads")) + .def("save_index", &Index::saveIndex,py::arg("path_to_index")) + .def("load_index", &Index::loadIndex,py::arg("path_to_index")) + .def("__repr__", + [](const Index &a) { + return ""; + } + ); + return m.ptr(); +} \ No newline at end of file diff --git a/python_bindings/setup.py b/python_bindings/setup.py new file mode 100644 index 00000000..c2f51572 --- /dev/null +++ b/python_bindings/setup.py @@ -0,0 +1,117 @@ +import os +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext +import sys +import setuptools + +__version__ = '0.2' + + +source_files = ['bindings.cpp'] + +libraries = [] +extra_objects = [] + + +ext_modules = [ + Extension( + 'hnswlib', + source_files, + # include_dirs=[os.path.join(libdir, "include")], + libraries=libraries, + language='c++', + extra_objects=extra_objects, + ), +] + + +# As of Python 3.6, CCompiler has a `has_flag` method. +# cf http://bugs.python.org/issue26689 +def has_flag(compiler, flagname): + """Return a boolean indicating whether a flag name is supported on + the specified compiler. + """ + import tempfile + with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f: + f.write('int main (int argc, char **argv) { return 0; }') + try: + compiler.compile([f.name], extra_postargs=[flagname]) + except setuptools.distutils.errors.CompileError: + return False + return True + + +def cpp_flag(compiler): + """Return the -std=c++[11/14] compiler flag. + The c++14 is prefered over c++11 (when it is available). + """ + if has_flag(compiler, '-std=c++14'): + return '-std=c++14' + elif has_flag(compiler, '-std=c++11'): + return '-std=c++11' + else: + raise RuntimeError('Unsupported compiler -- at least C++11 support ' + 'is needed!') + + +class BuildExt(build_ext): + """A custom build extension for adding compiler-specific options.""" + c_opts = { + 'msvc': ['/EHsc', '/openmp', '/O2'], + 'unix': ['-O3', '-march=native'], # , '-w' + } + link_opts = { + 'unix': [], + 'msvc': [], + } + + if sys.platform == 'darwin': + c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] + link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] + else: + c_opts['unix'].append("-fopenmp") + link_opts['unix'].extend(['-fopenmp', '-pthread']) + + def build_extensions(self): + ct = self.compiler.compiler_type + opts = self.c_opts.get(ct, []) + if ct == 'unix': + opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version()) + opts.append(cpp_flag(self.compiler)) + if has_flag(self.compiler, '-fvisibility=hidden'): + opts.append('-fvisibility=hidden') + elif ct == 'msvc': + opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()) + + # extend include dirs here (don't assume numpy/pybind11 are installed when first run, since + # pip could have installed them as part of executing this script + import pybind11 + import numpy as np + for ext in self.extensions: + ext.extra_compile_args.extend(opts) + ext.extra_link_args.extend(self.link_opts.get(ct, [])) + ext.include_dirs.extend([ + # Path to pybind11 headers + pybind11.get_include(), + pybind11.get_include(True), + + # Path to numpy headers + np.get_include() + ]) + + build_ext.build_extensions(self) + + +setup( + name='hnswlib', + version=__version__, + description='hnswlib', + author='Yury Malkov and others', + url='https://github.com/yurymalkov/hnsw', + long_description="""hnsw""", + ext_modules=ext_modules, + install_requires=['pybind11>=2.0', 'numpy'], + cmdclass={'build_ext': BuildExt}, + test_suite="tests", + zip_safe=False, +) diff --git a/sift_1b.cpp b/sift_1b.cpp index 5b3615f7..2b747fd8 100644 --- a/sift_1b.cpp +++ b/sift_1b.cpp @@ -1,29 +1,30 @@ #include #include -#include -#include #include #include -#include "hnswlib.h" +#include "hnswlib/hnswlib.h" #include + using namespace std; using namespace hnswlib; class StopW { - std::chrono::steady_clock::time_point time_begin; + std::chrono::steady_clock::time_point time_begin; public: - StopW() { - time_begin = std::chrono::steady_clock::now(); - } - float getElapsedTimeMicro() { - std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); - return (std::chrono::duration_cast(time_end - time_begin).count()); - } - void reset() { - time_begin = std::chrono::steady_clock::now(); - } + StopW() { + time_begin = std::chrono::steady_clock::now(); + } + + float getElapsedTimeMicro() { + std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); + return (std::chrono::duration_cast(time_end - time_begin).count()); + } + + void reset() { + time_begin = std::chrono::steady_clock::now(); + } }; @@ -41,6 +42,7 @@ class StopW { #include #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) + #include #include @@ -52,7 +54,6 @@ class StopW { #include #elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__) -#include #endif @@ -61,178 +62,169 @@ class StopW { #endif - - - /** * Returns the peak (maximum so far) resident set size (physical * memory use) measured in bytes, or zero if the value cannot be * determined on this OS. */ -size_t getPeakRSS() -{ +static size_t getPeakRSS() { #if defined(_WIN32) - /* Windows -------------------------------------------------- */ - PROCESS_MEMORY_COUNTERS info; - GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - return (size_t)info.PeakWorkingSetSize; + /* Windows -------------------------------------------------- */ + PROCESS_MEMORY_COUNTERS info; + GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + return (size_t)info.PeakWorkingSetSize; #elif (defined(_AIX) || defined(__TOS__AIX__)) || (defined(__sun__) || defined(__sun) || defined(sun) && (defined(__SVR4) || defined(__svr4__))) - /* AIX and Solaris ------------------------------------------ */ - struct psinfo psinfo; - int fd = -1; - if ((fd = open("/proc/self/psinfo", O_RDONLY)) == -1) - return (size_t)0L; /* Can't open? */ - if (read(fd, &psinfo, sizeof(psinfo)) != sizeof(psinfo)) - { - close(fd); - return (size_t)0L; /* Can't read? */ - } - close(fd); - return (size_t)(psinfo.pr_rssize * 1024L); + /* AIX and Solaris ------------------------------------------ */ + struct psinfo psinfo; + int fd = -1; + if ((fd = open("/proc/self/psinfo", O_RDONLY)) == -1) + return (size_t)0L; /* Can't open? */ + if (read(fd, &psinfo, sizeof(psinfo)) != sizeof(psinfo)) + { + close(fd); + return (size_t)0L; /* Can't read? */ + } + close(fd); + return (size_t)(psinfo.pr_rssize * 1024L); #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) - /* BSD, Linux, and OSX -------------------------------------- */ - struct rusage rusage; - getrusage(RUSAGE_SELF, &rusage); + /* BSD, Linux, and OSX -------------------------------------- */ + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); #if defined(__APPLE__) && defined(__MACH__) - return (size_t)rusage.ru_maxrss; + return (size_t)rusage.ru_maxrss; #else - return (size_t)(rusage.ru_maxrss * 1024L); + return (size_t) (rusage.ru_maxrss * 1024L); #endif #else - /* Unknown OS ----------------------------------------------- */ - return (size_t)0L; /* Unsupported. */ + /* Unknown OS ----------------------------------------------- */ + return (size_t)0L; /* Unsupported. */ #endif } - - - /** * Returns the current resident set size (physical memory use) measured * in bytes, or zero if the value cannot be determined on this OS. */ -size_t getCurrentRSS() -{ +static size_t getCurrentRSS() { #if defined(_WIN32) - /* Windows -------------------------------------------------- */ - PROCESS_MEMORY_COUNTERS info; - GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - return (size_t)info.WorkingSetSize; + /* Windows -------------------------------------------------- */ + PROCESS_MEMORY_COUNTERS info; + GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + return (size_t)info.WorkingSetSize; #elif defined(__APPLE__) && defined(__MACH__) - /* OSX ------------------------------------------------------ */ - struct mach_task_basic_info info; - mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; - if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, - (task_info_t)&info, &infoCount) != KERN_SUCCESS) - return (size_t)0L; /* Can't access? */ - return (size_t)info.resident_size; + /* OSX ------------------------------------------------------ */ + struct mach_task_basic_info info; + mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, + (task_info_t)&info, &infoCount) != KERN_SUCCESS) + return (size_t)0L; /* Can't access? */ + return (size_t)info.resident_size; #elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__) - /* Linux ---------------------------------------------------- */ - long rss = 0L; - FILE* fp = NULL; - if ((fp = fopen("/proc/self/statm", "r")) == NULL) - return (size_t)0L; /* Can't open? */ - if (fscanf(fp, "%*s%ld", &rss) != 1) - { - fclose(fp); - return (size_t)0L; /* Can't read? */ - } - fclose(fp); - return (size_t)rss * (size_t)sysconf(_SC_PAGESIZE); + /* Linux ---------------------------------------------------- */ + long rss = 0L; + FILE *fp = NULL; + if ((fp = fopen("/proc/self/statm", "r")) == NULL) + return (size_t) 0L; /* Can't open? */ + if (fscanf(fp, "%*s%ld", &rss) != 1) { + fclose(fp); + return (size_t) 0L; /* Can't read? */ + } + fclose(fp); + return (size_t) rss * (size_t) sysconf(_SC_PAGESIZE); #else - /* AIX, BSD, Solaris, and Unknown OS ------------------------ */ - return (size_t)0L; /* Unsupported. */ + /* AIX, BSD, Solaris, and Unknown OS ------------------------ */ + return (size_t)0L; /* Unsupported. */ #endif } +static void +get_gt(unsigned int *massQA, unsigned char *massQ, unsigned char *mass, size_t vecsize, size_t qsize, L2SpaceI &l2space, + size_t vecdim, vector>> &answers, size_t k) { + (vector>>(qsize)).swap(answers); + DISTFUNC fstdistfunc_ = l2space.get_dist_func(); + cout << qsize << "\n"; + for (int i = 0; i < qsize; i++) { + for (int j = 0; j < k; j++) { + answers[i].emplace(0.0f, massQA[1000 * i + j]); + } + } +} +static float +test_approx(unsigned char *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, + vector>> &answers, size_t k) { + size_t correct = 0; + size_t total = 0; + //uncomment to test in parallel mode: + //#pragma omp parallel for + for (int i = 0; i < qsize; i++) { + std::priority_queue> result = appr_alg.searchKnn(massQ + vecdim * i, k); + std::priority_queue> gt(answers[i]); + unordered_set g; + total += gt.size(); + while (gt.size()) { -static void get_gt(unsigned int *massQA, unsigned char *massQ, unsigned char *mass, size_t vecsize, size_t qsize, L2SpaceI &l2space, size_t vecdim, vector>> &answers, size_t k) { - - (vector>>(qsize)).swap(answers); - DISTFUNC fstdistfunc_ = l2space.get_dist_func(); - cout << qsize << "\n"; - for (int i = 0; i < qsize; i++) { - for (int j = 0; j &appr_alg, size_t vecdim, vector>> &answers, size_t k) { - size_t correct = 0; - size_t total = 0; - //uncomment to test in parallel mode: - //#pragma omp parallel for - for (int i = 0; i < qsize; i++) { - - std::priority_queue< std::pair< int, labeltype >> result = appr_alg.searchKnn(massQ + vecdim*i, k); - std::priority_queue< std::pair< int, labeltype >> gt(answers[i]); - unordered_set g; - total += gt.size(); - - while (gt.size()) { - - - g.insert(gt.top().second); - gt.pop(); - } - - while (result.size()) { - if (g.find(result.top().second) != g.end()) { - - correct++; - } - else { - } - result.pop(); - } - - } - return 1.0f*correct / total; + g.insert(gt.top().second); + gt.pop(); + } + + while (result.size()) { + if (g.find(result.top().second) != g.end()) { + + correct++; + } else { + } + result.pop(); + } + + } + return 1.0f * correct / total; } -static void test_vs_recall(unsigned char *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, vector>> &answers, size_t k) { - vector efs;// = { 10,10,10,10,10 }; + +static void +test_vs_recall(unsigned char *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, + vector>> &answers, size_t k) { + vector efs;// = { 10,10,10,10,10 }; for (int i = k; i < 30; i++) { - efs.push_back(i); - } - for (int i = 30; i < 100; i+=10) { - efs.push_back(i); - } - for (int i = 100; i < 500; i += 40) { - efs.push_back(i); - } - for (size_t ef : efs) - { - appr_alg.setEf(ef); - StopW stopw = StopW(); - appr_alg.dist_calc = 0; - float recall = test_approx(massQ, vecsize, qsize, appr_alg, vecdim, answers, k); - float time_us_per_query = stopw.getElapsedTimeMicro() / qsize; - float avr_dist_count = appr_alg.dist_calc*1.f / qsize; - cout << ef << "\t" << recall << "\t" << time_us_per_query << " us\t" << avr_dist_count << " dcs\n"; - if (recall > 1.0) { - cout << recall << "\t" << time_us_per_query << " us\t" << avr_dist_count << " dcs\n"; - break; - } - } + efs.push_back(i); + } + for (int i = 30; i < 100; i += 10) { + efs.push_back(i); + } + for (int i = 100; i < 500; i += 40) { + efs.push_back(i); + } + for (size_t ef : efs) { + appr_alg.setEf(ef); + StopW stopw = StopW(); + appr_alg.dist_calc_counter_ = 0; + float recall = test_approx(massQ, vecsize, qsize, appr_alg, vecdim, answers, k); + float time_us_per_query = stopw.getElapsedTimeMicro() / qsize; + float avr_dist_count = appr_alg.dist_calc_counter_ * 1.f / qsize; + cout << ef << "\t" << recall << "\t" << time_us_per_query << " us\t" << avr_dist_count << " dcs\n"; + if (recall > 1.0) { + cout << recall << "\t" << time_us_per_query << " us\t" << avr_dist_count << " dcs\n"; + break; + } + } } -inline bool exists_test(const std::string& name) { - ifstream f(name.c_str()); - return f.good(); +inline bool exists_test(const std::string &name) { + ifstream f(name.c_str()); + return f.good(); } @@ -244,133 +236,127 @@ void sift_test1B() { int M = 16; - - size_t vecsize = subset_size_milllions * 1000000; - - size_t qsize = 10000; - size_t vecdim = 128; - char path_index[1024]; - char path_gt[1024]; - char *path_q = "bigann/bigann_query.bvecs"; - char *path_data = "bigann/bigann_base.bvecs"; - sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_milllions, efConstruction, M); - - sprintf(path_gt,"bigann/gnd/idx_%dM.ivecs", subset_size_milllions); - - - - unsigned char *massb = new unsigned char[vecdim]; - - cout << "Loading GT:\n"; - ifstream inputGT(path_gt, ios::binary); - unsigned int *massQA = new unsigned int[qsize * 1000]; - for (int i = 0; i < qsize; i++) { - int t; - inputGT.read((char *)&t, 4); - inputGT.read((char *)(massQA + 1000 * i), t * 4); - if (t != 1000) { - cout << "err"; - return; - } - } - - cout << "Loading queries:\n"; - unsigned char *massQ = new unsigned char[qsize * vecdim]; - ifstream inputQ(path_q, ios::binary); - - for (int i = 0; i < qsize; i++) { - int in = 0; - inputQ.read((char *)&in, 4); - if (in != 128) - { - cout << "file error"; - exit(1); - } - inputQ.read((char *)massb, in); - for (int j = 0; j < vecdim; j++) { - massQ[i*vecdim + j] = massb[j]; - } - - } - inputQ.close(); - - - - unsigned char *mass = new unsigned char[vecdim]; - ifstream input(path_data, ios::binary); - int in = 0; - L2SpaceI l2space(vecdim); - - HierarchicalNSW *appr_alg; - if (exists_test(path_index)) { - cout << "Loading index from "<< path_index <<":\n"; - appr_alg=new HierarchicalNSW(&l2space, path_index, false); - cout << "Actual memory usage: " << getCurrentRSS() / 1000000 << " Mb \n"; -} - else { - cout << "Building index:\n"; - appr_alg = new HierarchicalNSW(&l2space, vecsize, M, efConstruction); - - - input.read((char *)&in, 4); - if (in != 128) - { - cout << "file error"; - exit(1); - } - input.read((char *)massb, in); - - for (int j = 0; j < vecdim; j++) { - mass[j] = massb[j] * (1.0f); - } - - appr_alg->addPoint((void *)(massb), (size_t)0); - int j1 = 0; - StopW stopw = StopW(); - StopW stopw_full = StopW(); - size_t report_every = 100000; + size_t vecsize = subset_size_milllions * 1000000; + + size_t qsize = 10000; + size_t vecdim = 128; + char path_index[1024]; + char path_gt[1024]; + char *path_q = "bigann/bigann_query.bvecs"; + char *path_data = "bigann/bigann_base.bvecs"; + sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_milllions, efConstruction, M); + + sprintf(path_gt, "/home/yurymal/frameworks/nns/hnsw/bigann/gnd/idx_%dM.ivecs", subset_size_milllions); + + + unsigned char *massb = new unsigned char[vecdim]; + + cout << "Loading GT:\n"; + ifstream inputGT(path_gt, ios::binary); + unsigned int *massQA = new unsigned int[qsize * 1000]; + for (int i = 0; i < qsize; i++) { + int t; + inputGT.read((char *) &t, 4); + inputGT.read((char *) (massQA + 1000 * i), t * 4); + if (t != 1000) { + cout << "err"; + return; + } + } + + cout << "Loading queries:\n"; + unsigned char *massQ = new unsigned char[qsize * vecdim]; + ifstream inputQ(path_q, ios::binary); + + for (int i = 0; i < qsize; i++) { + int in = 0; + inputQ.read((char *) &in, 4); + if (in != 128) { + cout << "file error"; + exit(1); + } + inputQ.read((char *) massb, in); + for (int j = 0; j < vecdim; j++) { + massQ[i * vecdim + j] = massb[j]; + } + + } + inputQ.close(); + + + unsigned char *mass = new unsigned char[vecdim]; + ifstream input(path_data, ios::binary); + int in = 0; + L2SpaceI l2space(vecdim); + + HierarchicalNSW *appr_alg; + if (exists_test(path_index)) { + cout << "Loading index from " << path_index << ":\n"; + appr_alg = new HierarchicalNSW(&l2space, path_index, false); + cout << "Actual memory usage: " << getCurrentRSS() / 1000000 << " Mb \n"; + } else { + cout << "Building index:\n"; + appr_alg = new HierarchicalNSW(&l2space, vecsize, M, efConstruction); + + + input.read((char *) &in, 4); + if (in != 128) { + cout << "file error"; + exit(1); + } + input.read((char *) massb, in); + + for (int j = 0; j < vecdim; j++) { + mass[j] = massb[j] * (1.0f); + } + + appr_alg->addPoint((void *) (massb), (size_t) 0); + int j1 = 0; + StopW stopw = StopW(); + StopW stopw_full = StopW(); + size_t report_every = 100000; #pragma omp parallel for - for (int i = 1; i < vecsize; i++) { - unsigned char mass[128]; + for (int i = 1; i < vecsize; i++) { + unsigned char mass[128]; #pragma omp critical - { - - input.read((char *)&in, 4); - if (in != 128) - { - cout << "file error"; - exit(1); - } - input.read((char *)massb, in); - for (int j = 0; j < vecdim; j++) { - mass[j] = massb[j]; - } - j1++; - if (j1 % report_every == 0) { - cout << j1 / (0.01*vecsize) << " %, " << report_every / (1000.0*1e-6*stopw.getElapsedTimeMicro()) << " kips " << " Mem: " << getCurrentRSS() / 1000000 << " Mb \n"; - stopw.reset(); - } - } - appr_alg->addPoint((void *)(mass), (size_t)j1); - - - - } - input.close(); - cout << "Build time:" << 1e-6*stopw_full.getElapsedTimeMicro() << " seconds\n"; - appr_alg->SaveIndex(path_index); - } - - - vector>> answers; - size_t k = 1; - cout << "Parsing gt:\n"; - get_gt(massQA, massQ, mass, vecsize, qsize, l2space, vecdim, answers, k); - cout << "Loaded gt\n"; - for (int i = 0; i < 1; i++) - test_vs_recall(massQ, vecsize, qsize, *appr_alg, vecdim, answers, k); - cout << "Actual memory usage: " << getCurrentRSS() / 1000000 << " Mb \n"; - return; + { + + input.read((char *) &in, 4); + if (in != 128) { + cout << "file error"; + exit(1); + } + input.read((char *) massb, in); + for (int j = 0; j < vecdim; j++) { + mass[j] = massb[j]; + } + j1++; + if (j1 % report_every == 0) { + cout << j1 / (0.01 * vecsize) << " %, " + << report_every / (1000.0 * 1e-6 * stopw.getElapsedTimeMicro()) << " kips " << " Mem: " + << getCurrentRSS() / 1000000 << " Mb \n"; + stopw.reset(); + } + } + appr_alg->addPoint((void *) (mass), (size_t) j1); + + + } + input.close(); + cout << "Build time:" << 1e-6 * stopw_full.getElapsedTimeMicro() << " seconds\n"; + appr_alg->saveIndex(path_index); + } + + + vector>> answers; + size_t k = 1; + cout << "Parsing gt:\n"; + get_gt(massQA, massQ, mass, vecsize, qsize, l2space, vecdim, answers, k); + cout << "Loaded gt\n"; + for (int i = 0; i < 1; i++) + test_vs_recall(massQ, vecsize, qsize, *appr_alg, vecdim, answers, k); + cout << "Actual memory usage: " << getCurrentRSS() / 1000000 << " Mb \n"; + return; } diff --git a/sift_test.cpp b/sift_test.cpp index 20dd67a7..72d5e073 100644 --- a/sift_test.cpp +++ b/sift_test.cpp @@ -1,15 +1,15 @@ #include #include -#include -#include #include #include -#include "hnswlib.h" +#include "hnswlib/hnswlib.h" #include + using namespace std; using namespace hnswlib; + /* template void writeBinaryPOD(ostream& out, const T& podRef) { @@ -26,49 +26,59 @@ class StopW { StopW() { time_begin = std::chrono::steady_clock::now(); } + float getElapsedTimeMicro() { std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); return (std::chrono::duration_cast(time_end - time_begin).count()); } + void reset() { time_begin = std::chrono::steady_clock::now(); } }; -void get_gt(float *mass, float *massQ, size_t vecsize, size_t qsize, L2Space &l2space, size_t vecdim, vector>> &answers, size_t k) { - BruteforceSearch bs(&l2space, vecsize); + +void get_gt(float *mass, float *massQ, size_t vecsize, size_t qsize, L2Space &l2space, size_t vecdim, + vector>> &answers, size_t k) { + BruteforceSearch bs(&l2space, vecsize); for (int i = 0; i < vecsize; i++) { - bs.addPoint((void *)(mass + vecdim*i), (size_t)i); + bs.addPoint((void *) (mass + vecdim * i), (size_t) i); } - (vector>>(qsize)).swap(answers); + (vector>>(qsize)).swap(answers); //answers.swap(vector>>(qsize)); for (int i = 0; i < qsize; i++) { - std::priority_queue< std::pair< float, labeltype >> gt = bs.searchKnn(massQ + vecdim*i, 10); + std::priority_queue> gt = bs.searchKnn(massQ + vecdim * i, 10); answers[i] = gt; } } -void get_gt(unsigned int *massQA, float *massQ, float *mass, size_t vecsize, size_t qsize, L2Space &l2space, size_t vecdim, vector>> &answers, size_t k) { - + +void +get_gt(unsigned int *massQA, float *massQ, float *mass, size_t vecsize, size_t qsize, L2Space &l2space, size_t vecdim, + vector>> &answers, size_t k) { + //answers.swap(vector>>(qsize)); - (vector>>(qsize)).swap(answers); + (vector>>(qsize)).swap(answers); DISTFUNC fstdistfunc_ = l2space.get_dist_func(); - cout << qsize<<"\n"; + cout << qsize << "\n"; for (int i = 0; i < qsize; i++) { - for (int j = 0; j &appr_alg, size_t vecdim, vector>> &answers, size_t k) { + +float test_approx(float *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, + vector>> &answers, size_t k) { size_t correct = 0; size_t total = 0; //#pragma omp parallel for for (int i = 0; i < qsize; i++) { - - std::priority_queue< std::pair< float, labeltype >> result = appr_alg.searchKnn(massQ + vecdim*i, 10); - std::priority_queue< std::pair< float, labeltype >> gt(answers[i]); - unordered_set g; + + std::priority_queue> result = appr_alg.searchKnn(massQ + vecdim * i, 10); + std::priority_queue> gt(answers[i]); + unordered_set g; total += gt.size(); while (gt.size()) { g.insert(gt.top().second); @@ -80,9 +90,11 @@ float test_approx(float *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, vector>> &answers,size_t k) { + +void test_vs_recall(float *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, + vector>> &answers, size_t k) { //vector efs = { 1,2,3,4,6,8,12,16,24,32,64,128,256,320 };// = ; { 23 }; vector efs; for (int i = 10; i < 30; i++) { @@ -94,17 +106,16 @@ void test_vs_recall(float *massQ, size_t vecsize, size_t qsize, HierarchicalNSW< /*for (int i = 300; i <600; i += 20) { efs.push_back(i); }*/ - for (size_t ef : efs) - { + for (size_t ef : efs) { appr_alg.setEf(ef); StopW stopw = StopW(); - appr_alg.dist_calc = 0; - float recall = test_approx(massQ, vecsize, qsize, appr_alg, vecdim, answers,k); - float time_us_per_query = stopw.getElapsedTimeMicro() / qsize; - float avr_dist_count = appr_alg.dist_calc*1.f / qsize; - cout << ef<<"\t"< 1.0) { - cout << recall << "\t" << time_us_per_query<<" us\t"<< avr_dist_count <<" dcs\n"; + cout << recall << "\t" << time_us_per_query << " us\t" << avr_dist_count << " dcs\n"; break; } } @@ -147,21 +158,21 @@ void sift_test() { float *mass = new float[vecsize * vecdim]; ifstream input("../../sift100k.bin", ios::binary); //ifstream input("../../1M_d=4.bin", ios::binary); - input.read((char *)mass, vecsize * vecdim * sizeof(float)); + input.read((char *) mass, vecsize * vecdim * sizeof(float)); input.close(); float *massQ = new float[qsize * vecdim]; //ifstream inputQ("../siftQ100k.bin", ios::binary); ifstream inputQ("../../siftQ100k.bin", ios::binary); //ifstream inputQ("../../1M_d=4q.bin", ios::binary); - inputQ.read((char *)massQ, qsize * vecdim * sizeof(float)); + inputQ.read((char *) massQ, qsize * vecdim * sizeof(float)); inputQ.close(); unsigned int *massQA = new unsigned int[qsize * 100]; //ifstream inputQA("../knnQA100k.bin", ios::binary); ifstream inputQA("../../knnQA100k.bin", ios::binary); //ifstream inputQA("../../1M_d=4qa.bin", ios::binary); - inputQA.read((char *)massQA, qsize * 100 * sizeof(int)); + inputQA.read((char *) massQA, qsize * 100 * sizeof(int)); inputQA.close(); int maxn = 16; @@ -175,58 +186,58 @@ void sift_test() { //for(int tr=1;tr<9;tr++) //#define LOAD_I #ifdef LOAD_I - + HierarchicalNSW appr_alg(&l2space, "hnswlib_sift",false); //HierarchicalNSW appr_alg(&l2space, "D:/stuff/hnsw_lib/nmslib/similarity_search/release/temp",true); //HierarchicalNSW appr_alg(&l2space, "/mnt/d/stuff/hnsw_lib/nmslib/similarity_search/release/temp", true); - //appr_alg_saved.SaveIndex("d:\\hnsw-index.bin"); - //appr_alg_saved.LoadIndex("d:\\hnsw-index2.bin", &l2space); + //appr_alg_saved.saveIndex("d:\\hnsw-index.bin"); + //appr_alg_saved.loadIndex("d:\\hnsw-index2.bin", &l2space); #else //return; //for (int u = 0; u < 10; u++) { - /* PROCESS_MEMORY_COUNTERS pmc; - - GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); - SIZE_T virtualMemUsedByMe = pmc.WorkingSetSize; - - cout << virtualMemUsedByMe/1000/1000 << "\n";*/ - //HierarchicalNSW appr_alg(&l2space, vecsize, 6, 40); - HierarchicalNSW appr_alg(&l2space, vecsize, 16, 200); - - cout << "Building index\n"; - StopW stopwb = StopW(); - for (int i = 0; i < 1; i++) { - appr_alg.addPoint((void *)(mass + vecdim*i), (size_t)i); - } + /* PROCESS_MEMORY_COUNTERS pmc; + + GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); + SIZE_T virtualMemUsedByMe = pmc.WorkingSetSize; + + cout << virtualMemUsedByMe/1000/1000 << "\n";*/ + //HierarchicalNSW appr_alg(&l2space, vecsize, 6, 40); + HierarchicalNSW appr_alg(&l2space, vecsize, 16, 200); + + cout << "Building index\n"; + StopW stopwb = StopW(); + for (int i = 0; i < 1; i++) { + appr_alg.addPoint((void *) (mass + vecdim * i), (size_t) i); + } #pragma omp parallel for - for (int i = 1; i < vecsize; i++) { - appr_alg.addPoint((void *)(mass + vecdim*i), (size_t)i); - } - /*GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); - virtualMemUsedByMe = pmc.WorkingSetSize; - cout << virtualMemUsedByMe / 1000 / 1000 << "\n";*/ - cout << "Index built, time=" << stopwb.getElapsedTimeMicro()*1e-6 << "\n"; - //appr_alg.SaveIndex("hnswlib_sift"); + for (int i = 1; i < vecsize; i++) { + appr_alg.addPoint((void *) (mass + vecdim * i), (size_t) i); + } + /*GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); + virtualMemUsedByMe = pmc.WorkingSetSize; + cout << virtualMemUsedByMe / 1000 / 1000 << "\n";*/ + cout << "Index built, time=" << stopwb.getElapsedTimeMicro() * 1e-6 << "\n"; + //appr_alg.saveIndex("hnswlib_sift"); - //appr_alg.SaveIndex("d:\\hnsw-index2.bin"); + //appr_alg.saveIndex("d:\\hnsw-index2.bin"); #endif //get_knn_quality(massA, vecsize, maxn, appr_alg); //return; - vector>> answers; - size_t k = 10; - cout << "Loading gt\n"; - //get_gt(mass, massQ, vecsize, qsize, l2space, vecdim, answers,k); - get_gt(massQA, massQ, mass, vecsize, qsize, l2space, vecdim, answers, k); - cout << "Loaded gt\n"; - for (int i = 0; i < 1; i++) - test_vs_recall(massQ, vecsize, qsize, appr_alg, vecdim, answers, k); - //cout << "opt:\n"; - //appr_alg.opt = true; - + vector>> answers; + size_t k = 10; + cout << "Loading gt\n"; + //get_gt(mass, massQ, vecsize, qsize, l2space, vecdim, answers,k); + get_gt(massQA, massQ, mass, vecsize, qsize, l2space, vecdim, answers, k); + cout << "Loaded gt\n"; + for (int i = 0; i < 1; i++) + test_vs_recall(massQ, vecsize, qsize, appr_alg, vecdim, answers, k); + //cout << "opt:\n"; + //appr_alg.opt = true; + return; //test_approx(mass, massQ, vecsize, qsize, appr_alg, vecdim, answers); // //return; diff --git a/visited_list_pool.h b/visited_list_pool.h deleted file mode 100644 index 1c08cd86..00000000 --- a/visited_list_pool.h +++ /dev/null @@ -1,79 +0,0 @@ -#pragma once -#include -#include - -namespace hnswlib{ -typedef unsigned short int vl_type; -class VisitedList { -public: - vl_type curV; - vl_type *mass; - unsigned int numelements; - - VisitedList(int numelements1) - { - curV = -1; - numelements = numelements1; - mass = new vl_type[numelements]; - } - void reset() - { - curV++; - if (curV == 0) { - memset(mass, 0, sizeof(vl_type) * numelements); - curV++; - } - }; - ~VisitedList() { delete mass; } -}; -/////////////////////////////////////////////////////////// -// -// Class for multi-threaded pool-management of VisitedLists -// -///////////////////////////////////////////////////////// - -class VisitedListPool { - deque pool; - mutex poolguard; - int maxpools; - int numelements; - -public: - VisitedListPool(int initmaxpools, int numelements1) - { - numelements = numelements1; - for (int i = 0; i < initmaxpools; i++) - pool.push_front(new VisitedList(numelements)); - } - VisitedList *getFreeVisitedList() - { - VisitedList *rez; - { - unique_lock lock(poolguard); - if (pool.size() > 0) { - rez = pool.front(); - pool.pop_front(); - } - else { - rez = new VisitedList(numelements); - } - } - rez->reset(); - return rez; - }; - void releaseVisitedList(VisitedList *vl) - { - unique_lock lock(poolguard); - pool.push_front(vl); - }; - ~VisitedListPool() - { - while (pool.size()) { - VisitedList *rez = pool.front(); - pool.pop_front(); - delete rez; - } - }; -}; -} -