File indexing completed on 2025-10-25 08:48:05
0001 
0002 
0003 
0004 
0005 
0006 
0007 
0008 
0009 
0010 
0011 
0012 
0013 
0014 
0015 
0016 
0017 
0018 
0019 
0020 
0021 
0022 
0023 
0024 
0025 
0026 
0027 
0028 
0029 
0030 #ifndef _TEST_H
0031 #define _TEST_H
0032 
0033 
0034 
0035 
0036 
0037 
0038 
0039 
0040 
0041 
0042 #ifdef __cplusplus
0043 extern "C" {
0044 #endif
0045 
0046 #include <stdio.h>
0047 #include <stdint.h>
0048 
0049 #ifdef _MSC_VER
0050 #define inline __inline
0051 #endif
0052 
0053 
0054 #if defined __unix__ || defined __APPLE__
0055 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
0056 #define __forceinline                   static inline
0057 #define aligned_free(x)                 free(x)
0058 #else
0059 #ifdef __MINGW32__
0060 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
0061 #define posix_memalign(p, algn, len)                                                               \
0062         (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
0063 #define aligned_free(x) _aligned_free(x)
0064 #else
0065 #define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
0066 #define posix_memalign(p, algn, len)                                                               \
0067         (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
0068 #define aligned_free(x) _aligned_free(x)
0069 #endif
0070 #endif
0071 
0072 #ifdef DEBUG
0073 #define DEBUG_PRINT(x) printf x
0074 #else
0075 #define DEBUG_PRINT(x)                                                                             \
0076         do {                                                                                       \
0077         } while (0)
0078 #endif
0079 
0080 
0081 
0082 #define BENCHMARK_MIN_TIME    0
0083 #define BENCHMARK_APPROX_TIME 1
0084 #ifndef BENCHMARK_TYPE
0085 #define BENCHMARK_TYPE BENCHMARK_MIN_TIME
0086 #endif
0087 
0088 #ifdef USE_RDTSC
0089 
0090 
0091 
0092 
0093 #include <inttypes.h>
0094 #include <x86intrin.h>
0095 #define USE_CYCLES
0096 #else
0097 #include <time.h>
0098 #define USE_SECONDS
0099 #endif
0100 
0101 #ifdef USE_RDTSC
0102 #ifndef BENCHMARK_TIME
0103 #define BENCHMARK_TIME 6
0104 #endif
0105 #define GHZ            1000000000
0106 #define UNIT_SCALE     (GHZ)
0107 #define CALIBRATE_TIME (UNIT_SCALE / 2)
0108 static inline long long
0109 get_time(void)
0110 {
0111         unsigned int dummy;
0112         return __rdtscp(&dummy);
0113 }
0114 
0115 static inline long long
0116 get_res(void)
0117 {
0118         return 1;
0119 }
0120 #else
0121 #ifndef BENCHMARK_TIME
0122 #define BENCHMARK_TIME 3
0123 #endif
0124 #ifdef _MSC_VER
0125 #define UNIT_SCALE     get_res()
0126 #define CALIBRATE_TIME (UNIT_SCALE / 4)
0127 static inline long long
0128 get_time(void)
0129 {
0130         long long ret = 0;
0131         QueryPerformanceCounter(&ret);
0132         return ret;
0133 }
0134 
0135 static inline long long
0136 get_res(void)
0137 {
0138         long long ret = 0;
0139         QueryPerformanceFrequency(&ret);
0140         return ret;
0141 }
0142 #else
0143 #define NANO_SCALE     1000000000
0144 #define UNIT_SCALE     NANO_SCALE
0145 #define CALIBRATE_TIME (UNIT_SCALE / 4)
0146 #ifdef __FreeBSD__
0147 #define CLOCK_ID CLOCK_MONOTONIC_PRECISE
0148 #else
0149 #define CLOCK_ID CLOCK_MONOTONIC
0150 #endif
0151 
0152 static inline long long
0153 get_time(void)
0154 {
0155         struct timespec time;
0156         long long nano_total;
0157         clock_gettime(CLOCK_ID, &time);
0158         nano_total = time.tv_sec;
0159         nano_total *= NANO_SCALE;
0160         nano_total += time.tv_nsec;
0161         return nano_total;
0162 }
0163 
0164 static inline long long
0165 get_res(void)
0166 {
0167         struct timespec time;
0168         long long nano_total;
0169         clock_getres(CLOCK_ID, &time);
0170         nano_total = time.tv_sec;
0171         nano_total *= NANO_SCALE;
0172         nano_total += time.tv_nsec;
0173         return nano_total;
0174 }
0175 #endif
0176 #endif
0177 struct perf {
0178         long long start;
0179         long long stop;
0180         long long run_total;
0181         long long iterations;
0182 };
0183 
0184 static inline void
0185 perf_init(struct perf *p)
0186 {
0187         p->start = 0;
0188         p->stop = 0;
0189         p->run_total = 0;
0190 }
0191 
0192 static inline void
0193 perf_continue(struct perf *p)
0194 {
0195         p->start = get_time();
0196 }
0197 
0198 static inline void
0199 perf_pause(struct perf *p)
0200 {
0201         p->stop = get_time();
0202         p->run_total = p->run_total + p->stop - p->start;
0203         p->start = p->stop;
0204 }
0205 
0206 static inline void
0207 perf_start(struct perf *p)
0208 {
0209         perf_init(p);
0210         perf_continue(p);
0211 }
0212 
0213 static inline void
0214 perf_stop(struct perf *p)
0215 {
0216         perf_pause(p);
0217 }
0218 
0219 static inline double
0220 get_time_elapsed(struct perf *p)
0221 {
0222         return 1.0 * p->run_total / UNIT_SCALE;
0223 }
0224 
0225 static inline long long
0226 get_base_elapsed(struct perf *p)
0227 {
0228         return p->run_total;
0229 }
0230 
0231 static inline unsigned long long
0232 estimate_perf_iterations(struct perf *p, unsigned long long runs, unsigned long long total)
0233 {
0234         total = total * runs;
0235         if (get_base_elapsed(p) > 0)
0236                 return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
0237         else
0238                 return (total + get_res() - 1) / get_res();
0239 }
0240 
0241 #define CALIBRATE(PERF, FUNC_CALL)                                                                 \
0242         {                                                                                          \
0243                 unsigned long long _i, _iter = 1;                                                  \
0244                 perf_start(PERF);                                                                  \
0245                 FUNC_CALL;                                                                         \
0246                 perf_pause(PERF);                                                                  \
0247                                                                                                    \
0248                 while (get_base_elapsed(PERF) < CALIBRATE_TIME) {                                  \
0249                         _iter = estimate_perf_iterations(PERF, _iter, 2 * CALIBRATE_TIME);         \
0250                         perf_start(PERF);                                                          \
0251                         for (_i = 0; _i < _iter; _i++) {                                           \
0252                                 FUNC_CALL;                                                         \
0253                         }                                                                          \
0254                         perf_stop(PERF);                                                           \
0255                 }                                                                                  \
0256                 (PERF)->iterations = _iter;                                                        \
0257         }
0258 
0259 #define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL)                                                \
0260         {                                                                                          \
0261                 unsigned long long _i, _iter = (PERF)->iterations;                                 \
0262                 unsigned long long _run_total = RUN_TIME;                                          \
0263                 _run_total *= UNIT_SCALE;                                                          \
0264                 _iter = estimate_perf_iterations(PERF, _iter, _run_total);                         \
0265                 (PERF)->iterations = 0;                                                            \
0266                 perf_start(PERF);                                                                  \
0267                 for (_i = 0; _i < _iter; _i++) {                                                   \
0268                         FUNC_CALL;                                                                 \
0269                 }                                                                                  \
0270                 perf_pause(PERF);                                                                  \
0271                 (PERF)->iterations += _iter;                                                       \
0272                                                                                                    \
0273                 if (get_base_elapsed(PERF) < _run_total && BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
0274                         _iter = estimate_perf_iterations(PERF, _iter,                              \
0275                                                          _run_total - get_base_elapsed(PERF) +     \
0276                                                                  (UNIT_SCALE / 16));               \
0277                         perf_continue(PERF);                                                       \
0278                         for (_i = 0; _i < _iter; _i++) {                                           \
0279                                 FUNC_CALL;                                                         \
0280                         }                                                                          \
0281                         perf_pause(PERF);                                                          \
0282                         (PERF)->iterations += _iter;                                               \
0283                 }                                                                                  \
0284         }
0285 
0286 #define BENCHMARK(PERF, RUN_TIME, FUNC_CALL)                                                       \
0287         {                                                                                          \
0288                 if ((RUN_TIME) > 0) {                                                              \
0289                         CALIBRATE(PERF, FUNC_CALL);                                                \
0290                         PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL);                               \
0291                                                                                                    \
0292                 } else {                                                                           \
0293                         (PERF)->iterations = 1;                                                    \
0294                         perf_start(PERF);                                                          \
0295                         FUNC_CALL;                                                                 \
0296                         perf_stop(PERF);                                                           \
0297                 }                                                                                  \
0298         }
0299 
0300 #ifdef USE_CYCLES
0301 static inline void
0302 perf_print(struct perf p, long long unit_count)
0303 {
0304         long long total_units = p.iterations * unit_count;
0305 
0306         printf("runtime = %10lld ticks", get_base_elapsed(&p));
0307         if (total_units != 0) {
0308                 printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", total_units / (1000000),
0309                        get_time_elapsed(&p), get_base_elapsed(&p) / (double) total_units);
0310         }
0311         printf("\n");
0312 }
0313 #else
0314 static inline void
0315 perf_print(struct perf p, double unit_count)
0316 {
0317         long long total_units = p.iterations * unit_count;
0318         long long usecs = (long long) (get_time_elapsed(&p) * 1000000);
0319 
0320         printf("runtime = %10lld usecs", usecs);
0321         if (total_units != 0) {
0322                 printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", total_units / (1000000),
0323                        get_time_elapsed(&p),
0324                        ((double) total_units) / (1000000 * get_time_elapsed(&p)));
0325         }
0326         printf("\n");
0327 }
0328 #endif
0329 
0330 static inline uint64_t
0331 get_filesize(FILE *fp)
0332 {
0333         uint64_t file_size;
0334         fpos_t pos, pos_curr;
0335 
0336         fgetpos(fp, &pos_curr); 
0337 #if defined(_WIN32) || defined(_WIN64)
0338         _fseeki64(fp, 0, SEEK_END);
0339 #else
0340         fseeko(fp, 0, SEEK_END);
0341 #endif
0342         fgetpos(fp, &pos);
0343         file_size = *(uint64_t *) &pos;
0344         fsetpos(fp, &pos_curr); 
0345 
0346         return file_size;
0347 }
0348 
0349 #ifdef __cplusplus
0350 }
0351 #endif
0352 
0353 #endif