Back to home page

EIC code displayed by LXR

 
 

    


Warning, /acts/thirdparty/Annoy/0001-Modify-annoy.patch is written in an unsupported language. File is not indexed.

0001 From 16f4711d53903e2541dd518122d52e11e8569908 Mon Sep 17 00:00:00 2001
0002 From: Jeremy Couthures <jeremy.couthures@lapp.in2p3.fr>
0003 Date: Wed, 10 Jul 2024 10:31:31 +0200
0004 Subject: [PATCH] Add AngularEuclidean metric, get_seed function and
0005  optimizations
0006 
0007 ---
0008  src/annoylib.h | 118 ++++++++++++++++++++++++++++++++++++++-----------
0009  1 file changed, 93 insertions(+), 25 deletions(-)
0010 
0011 diff --git a/src/annoylib.h b/src/annoylib.h
0012 index 492d9ca..5c83601 100644
0013 --- a/src/annoylib.h
0014 +++ b/src/annoylib.h
0015 @@ -12,6 +12,10 @@
0016  // License for the specific language governing permissions and limitations under
0017  // the License.
0018  
0019 +/*
0020 + * Note: This file has been modified to meet the needs of the ACTS projects and
0021 + * differs from the original provided by Spotify AB.
0022 + */
0023  
0024  #ifndef ANNOY_ANNOYLIB_H
0025  #define ANNOY_ANNOYLIB_H
0026 @@ -128,7 +132,11 @@ inline void set_error_from_errno(char **error, const char* msg) {
0027    annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
0028    if (error) {
0029      *error = (char *)malloc(256);  // TODO: win doesn't support snprintf
0030 -    snprintf(*error, 255, "%s: %s (%d)", msg, strerror(errno), errno);
0031 +#if defined(_MSC_VER) || defined(__MINGW32__)
0032 +    sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
0033 +#else
0034 +    snprintf(*error, 256, "%s: %s (%d)", msg, strerror(errno), errno);
0035 +#endif
0036    }
0037  }
0038  
0039 @@ -201,6 +209,35 @@ inline T euclidean_distance(const T* x, const T* y, int f) {
0040    return d;
0041  }
0042  
0043 +template <typename T>
0044 +inline T angular_euclidean_distance(const T *x, const T *y, int f) {
0045 +  /// First dimension is an angle, so we need to be careful about periodicity.
0046 +  /// Here we assume that the angle is in radians, and that the difference is
0047 +  /// less than 2pi. Other dimensions are regular Euclidean.
0048 +
0049 +  /// Angular part
0050 +  T tmp = std::fabs(*x - *y);
0051 +
0052 +  /// If the difference is larger than pi, then we can get a smaller distance by
0053 +  /// going the other way around the circle.
0054 +  if (tmp > M_PI) {
0055 +    tmp = 2 * M_PI - tmp;
0056 +  }
0057 +
0058 +  T d = tmp * tmp;
0059 +  ++x;
0060 +  ++y;
0061 +
0062 +  /// Euclidean part
0063 +  for (int i = 1; i < f; ++i) {
0064 +    tmp = *x - *y;
0065 +    d += tmp * tmp;
0066 +    ++x;
0067 +    ++y;
0068 +  }
0069 +  return d;
0070 +}
0071 +
0072  #ifdef ANNOYLIB_USE_AVX
0073  // Horizontal single sum of 256bit vector.
0074  inline float hsum256_ps_avx(__m256 v) {
0075 @@ -412,13 +449,13 @@ inline void two_means(const vector<Node*>& nodes, int f, Random& random, bool co
0076  
0077  struct Base {
0078    template<typename T, typename S, typename Node>
0079 -  static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) {
0080 +  static inline void preprocess(void* /*unused*/, size_t /*unused*/, const S /*unused*/, const int /*unused*/) {
0081      // Override this in specific metric structs below if you need to do any pre-processing
0082      // on the entire set of nodes passed into this index.
0083    }
0084  
0085    template<typename Node>
0086 -  static inline void zero_value(Node* dest) {
0087 +  static inline void zero_value(Node* /*unused*/) {
0088      // Initialize any fields that require sane defaults within this node.
0089    }
0090  
0091 @@ -547,7 +584,7 @@ struct DotProduct : Angular {
0092    }
0093  
0094    template<typename S, typename T>
0095 -  static inline void init_node(Node<S, T>* n, int f) {
0096 +  static inline void init_node(Node<S, T>* /*unused*/, int /*unused*/) {
0097    }
0098  
0099    template<typename T, typename Node>
0100 @@ -671,17 +708,17 @@ struct Hamming : Base {
0101      return dist;
0102    }
0103    template<typename S, typename T>
0104 -  static inline bool margin(const Node<S, T>* n, const T* y, int f) {
0105 +  static inline bool margin(const Node<S, T>* n, const T* y, int /*unused*/) {
0106      static const size_t n_bits = sizeof(T) * 8;
0107      T chunk = n->v[0] / n_bits;
0108      return (y[chunk] & (static_cast<T>(1) << (n_bits - 1 - (n->v[0] % n_bits)))) != 0;
0109    }
0110    template<typename S, typename T, typename Random>
0111 -  static inline bool side(const Node<S, T>* n, const T* y, int f, Random& random) {
0112 +  static inline bool side(const Node<S, T>* n, const T* y, int f, Random& /*unused*/) {
0113      return margin(n, y, f);
0114    }
0115    template<typename S, typename T, typename Random>
0116 -  static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
0117 +  static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t /*unused*/, Random& random, Node<S, T>* n) {
0118      size_t cur_size = 0;
0119      size_t i = 0;
0120      int dim = f * 8 * sizeof(T);
0121 @@ -720,7 +757,7 @@ struct Hamming : Base {
0122      return distance;
0123    }
0124    template<typename S, typename T>
0125 -  static inline void init_node(Node<S, T>* n, int f) {
0126 +  static inline void init_node(Node<S, T>* /*unused*/, int /*unused*/) {
0127    }
0128    static const char* name() {
0129      return "hamming";
0130 @@ -784,7 +821,7 @@ struct Euclidean : Minkowski {
0131      return sqrt(std::max(distance, T(0)));
0132    }
0133    template<typename S, typename T>
0134 -  static inline void init_node(Node<S, T>* n, int f) {
0135 +  static inline void init_node(Node<S, T>* /*unused*/, int /*unused*/) {
0136    }
0137    static const char* name() {
0138      return "euclidean";
0139 @@ -792,6 +829,35 @@ struct Euclidean : Minkowski {
0140  
0141  };
0142  
0143 +struct AngularEuclidean : Minkowski {
0144 +  template <typename S, typename T>
0145 +  static inline T distance(const Node<S, T> *x, const Node<S, T> *y, int f) {
0146 +    return angular_euclidean_distance(x->v, y->v, f);
0147 +  }
0148 +  template <typename S, typename T, typename Random>
0149 +  static inline void create_split(const vector<Node<S, T> *> &nodes, int f,
0150 +                                  size_t s, Random &random,
0151 +                                  Node<S, T> *n) {
0152 +    Node<S, T> *p = (Node<S, T> *)alloca(s);
0153 +    Node<S, T> *q = (Node<S, T> *)alloca(s);
0154 +    two_means<T, Random, AngularEuclidean, Node<S, T>>(nodes, f, random, false,
0155 +                                                       p, q);
0156 +
0157 +    for (int z = 0; z < f; z++)
0158 +      n->v[z] = p->v[z] - q->v[z];
0159 +    Base::normalize<T, Node<S, T>>(n, f);
0160 +    n->a = 0.0;
0161 +    for (int z = 0; z < f; z++)
0162 +      n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2;
0163 +  }
0164 +  template <typename T> static inline T normalized_distance(T distance) {
0165 +    return sqrt(std::max(distance, T(0)));
0166 +  }
0167 +  template <typename S, typename T>
0168 +  static inline void init_node(Node<S, T> * /*unused*/, int /*unused*/) {}
0169 +  static const char *name() { return "angular_euclidean"; }
0170 +};
0171 +
0172  struct Manhattan : Minkowski {
0173    template<typename S, typename T>
0174    static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
0175 @@ -815,7 +881,7 @@ struct Manhattan : Minkowski {
0176      return std::max(distance, T(0));
0177    }
0178    template<typename S, typename T>
0179 -  static inline void init_node(Node<S, T>* n, int f) {
0180 +  static inline void init_node(Node<S, T>* /*unused*/, int /*unused*/) {
0181    }
0182    static const char* name() {
0183      return "manhattan";
0184 @@ -841,6 +907,7 @@ class AnnoyIndexInterface {
0185    virtual void verbose(bool v) = 0;
0186    virtual void get_item(S item, T* v) const = 0;
0187    virtual void set_seed(R q) = 0;
0188 +  virtual R get_seed() const = 0;
0189    virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
0190  };
0191  
0192 @@ -869,14 +936,14 @@ public:
0193  #endif
0194  
0195  protected:
0196 -  const int _f;
0197 -  size_t _s;
0198 +  const unsigned int _f;
0199 +  size_t _s; // Size of each node
0200    S _n_items;
0201    void* _nodes; // Could either be mmapped, or point to a memory buffer that we reallocate
0202    S _n_nodes;
0203    S _nodes_size;
0204    vector<S> _roots;
0205 -  S _K;
0206 +  S _K; // Max number of descendants to fit into node
0207    R _seed;
0208    bool _loaded;
0209    bool _verbose;
0210 @@ -885,7 +952,7 @@ protected:
0211    bool _built;
0212  public:
0213  
0214 -   AnnoyIndex(int f) : _f(f), _seed(Random::default_seed) {
0215 +   AnnoyIndex(unsigned int f) : _f(f), _seed(Random::default_seed) {
0216      _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
0217      _verbose = false;
0218      _built = false;
0219 @@ -896,7 +963,7 @@ public:
0220      unload();
0221    }
0222  
0223 -  int get_f() const {
0224 +  unsigned int get_f() const {
0225      return _f;
0226    }
0227  
0228 @@ -911,6 +978,9 @@ public:
0229        return false;
0230      }
0231      _allocate_size(item + 1);
0232 +    if (item >= _n_items)
0233 +      _n_items = item + 1;
0234 +
0235      Node* n = _get(item);
0236  
0237      D::zero_value(n);
0238 @@ -919,14 +989,11 @@ public:
0239      n->children[1] = 0;
0240      n->n_descendants = 1;
0241  
0242 -    for (int z = 0; z < _f; z++)
0243 +    for (unsigned int z = 0; z < _f; z++)
0244        n->v[z] = w[z];
0245  
0246      D::init_node(n, _f);
0247  
0248 -    if (item >= _n_items)
0249 -      _n_items = item + 1;
0250 -
0251      return true;
0252    }
0253      
0254 @@ -976,8 +1043,7 @@ public:
0255      // This way we can load them faster without reading the whole file
0256      _allocate_size(_n_nodes + (S)_roots.size());
0257      for (size_t i = 0; i < _roots.size(); i++)
0258 -      memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s);
0259 -    _n_nodes += _roots.size();
0260 +      memcpy(_get(_n_nodes++), _get(_roots[i]), _s);
0261  
0262      if (_verbose) annoylib_showUpdate("has %d nodes\n", _n_nodes);
0263      
0264 @@ -1119,10 +1185,10 @@ public:
0265  
0266      // Find the roots by scanning the end of the file and taking the nodes with most descendants
0267      _roots.clear();
0268 -    S m = -1;
0269 +    S m = (S)-1;
0270      for (S i = _n_nodes - 1; i >= 0; i--) {
0271        S k = _get(i)->n_descendants;
0272 -      if (m == -1 || k == m) {
0273 +      if (m == (S)-1 || k == m) {
0274          _roots.push_back(i);
0275          m = k;
0276        } else {
0277 @@ -1175,6 +1241,8 @@ public:
0278      _seed = seed;
0279    }
0280  
0281 +  R get_seed() const { return _seed; }
0282 +
0283    void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) {
0284      // Each thread needs its own seed, otherwise each thread would be building the same tree(s)
0285      Random _random(_seed + thread_idx);
0286 @@ -1332,7 +1400,7 @@ protected:
0287        children_indices[1].clear();
0288  
0289        // Set the vector to 0.0
0290 -      for (int z = 0; z < _f; z++)
0291 +      for (unsigned int z = 0; z < _f; z++)
0292          m->v[z] = 0;
0293  
0294        for (size_t i = 0; i < indices.size(); i++) {
0295 @@ -1425,7 +1493,7 @@ protected:
0296  class AnnoyIndexSingleThreadedBuildPolicy {
0297  public:
0298    template<typename S, typename T, typename D, typename Random>
0299 -  static void build(AnnoyIndex<S, T, D, Random, AnnoyIndexSingleThreadedBuildPolicy>* annoy, int q, int n_threads) {
0300 +  static void build(AnnoyIndex<S, T, D, Random, AnnoyIndexSingleThreadedBuildPolicy>* annoy, int q, int /*unused*/) {
0301      AnnoyIndexSingleThreadedBuildPolicy threaded_build_policy;
0302      annoy->thread_build(q, 0, threaded_build_policy);
0303    }
0304 -- 
0305 2.34.1
0306