File indexing completed on 2025-09-17 09:02:13
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030 #ifndef _TEST_H
0031 #define _TEST_H
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042 #ifdef __cplusplus
0043 extern "C" {
0044 #endif
0045
0046 #include <stdio.h>
0047 #include <stdint.h>
0048
0049 #ifdef _MSC_VER
0050 #define inline __inline
0051 #endif
0052
0053
0054 #if defined __unix__ || defined __APPLE__
0055 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
0056 #define __forceinline static inline
0057 #define aligned_free(x) free(x)
0058 #else
0059 #ifdef __MINGW32__
0060 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
0061 #define posix_memalign(p, algn, len) \
0062 (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
0063 #define aligned_free(x) _aligned_free(x)
0064 #else
0065 #define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
0066 #define posix_memalign(p, algn, len) \
0067 (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
0068 #define aligned_free(x) _aligned_free(x)
0069 #endif
0070 #endif
0071
0072 #ifdef DEBUG
0073 #define DEBUG_PRINT(x) printf x
0074 #else
0075 #define DEBUG_PRINT(x) \
0076 do { \
0077 } while (0)
0078 #endif
0079
0080
0081
0082 #define BENCHMARK_MIN_TIME 0
0083 #define BENCHMARK_APPROX_TIME 1
0084 #ifndef BENCHMARK_TYPE
0085 #define BENCHMARK_TYPE BENCHMARK_MIN_TIME
0086 #endif
0087
0088 #ifdef USE_RDTSC
0089
0090
0091
0092
0093 #include <inttypes.h>
0094 #include <x86intrin.h>
0095 #define USE_CYCLES
0096 #else
0097 #include <time.h>
0098 #define USE_SECONDS
0099 #endif
0100
0101 #ifdef USE_RDTSC
0102 #ifndef BENCHMARK_TIME
0103 #define BENCHMARK_TIME 6
0104 #endif
0105 #define GHZ 1000000000
0106 #define UNIT_SCALE (GHZ)
0107 #define CALIBRATE_TIME (UNIT_SCALE / 2)
0108 static inline long long
0109 get_time(void)
0110 {
0111 unsigned int dummy;
0112 return __rdtscp(&dummy);
0113 }
0114
0115 static inline long long
0116 get_res(void)
0117 {
0118 return 1;
0119 }
0120 #else
0121 #ifndef BENCHMARK_TIME
0122 #define BENCHMARK_TIME 3
0123 #endif
0124 #ifdef _MSC_VER
0125 #define UNIT_SCALE get_res()
0126 #define CALIBRATE_TIME (UNIT_SCALE / 4)
0127 static inline long long
0128 get_time(void)
0129 {
0130 long long ret = 0;
0131 QueryPerformanceCounter(&ret);
0132 return ret;
0133 }
0134
0135 static inline long long
0136 get_res(void)
0137 {
0138 long long ret = 0;
0139 QueryPerformanceFrequency(&ret);
0140 return ret;
0141 }
0142 #else
0143 #define NANO_SCALE 1000000000
0144 #define UNIT_SCALE NANO_SCALE
0145 #define CALIBRATE_TIME (UNIT_SCALE / 4)
0146 #ifdef __FreeBSD__
0147 #define CLOCK_ID CLOCK_MONOTONIC_PRECISE
0148 #else
0149 #define CLOCK_ID CLOCK_MONOTONIC
0150 #endif
0151
0152 static inline long long
0153 get_time(void)
0154 {
0155 struct timespec time;
0156 long long nano_total;
0157 clock_gettime(CLOCK_ID, &time);
0158 nano_total = time.tv_sec;
0159 nano_total *= NANO_SCALE;
0160 nano_total += time.tv_nsec;
0161 return nano_total;
0162 }
0163
0164 static inline long long
0165 get_res(void)
0166 {
0167 struct timespec time;
0168 long long nano_total;
0169 clock_getres(CLOCK_ID, &time);
0170 nano_total = time.tv_sec;
0171 nano_total *= NANO_SCALE;
0172 nano_total += time.tv_nsec;
0173 return nano_total;
0174 }
0175 #endif
0176 #endif
0177 struct perf {
0178 long long start;
0179 long long stop;
0180 long long run_total;
0181 long long iterations;
0182 };
0183
0184 static inline void
0185 perf_init(struct perf *p)
0186 {
0187 p->start = 0;
0188 p->stop = 0;
0189 p->run_total = 0;
0190 }
0191
0192 static inline void
0193 perf_continue(struct perf *p)
0194 {
0195 p->start = get_time();
0196 }
0197
0198 static inline void
0199 perf_pause(struct perf *p)
0200 {
0201 p->stop = get_time();
0202 p->run_total = p->run_total + p->stop - p->start;
0203 p->start = p->stop;
0204 }
0205
0206 static inline void
0207 perf_start(struct perf *p)
0208 {
0209 perf_init(p);
0210 perf_continue(p);
0211 }
0212
0213 static inline void
0214 perf_stop(struct perf *p)
0215 {
0216 perf_pause(p);
0217 }
0218
0219 static inline double
0220 get_time_elapsed(struct perf *p)
0221 {
0222 return 1.0 * p->run_total / UNIT_SCALE;
0223 }
0224
0225 static inline long long
0226 get_base_elapsed(struct perf *p)
0227 {
0228 return p->run_total;
0229 }
0230
0231 static inline unsigned long long
0232 estimate_perf_iterations(struct perf *p, unsigned long long runs, unsigned long long total)
0233 {
0234 total = total * runs;
0235 if (get_base_elapsed(p) > 0)
0236 return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
0237 else
0238 return (total + get_res() - 1) / get_res();
0239 }
0240
0241 #define CALIBRATE(PERF, FUNC_CALL) \
0242 { \
0243 unsigned long long _i, _iter = 1; \
0244 perf_start(PERF); \
0245 FUNC_CALL; \
0246 perf_pause(PERF); \
0247 \
0248 while (get_base_elapsed(PERF) < CALIBRATE_TIME) { \
0249 _iter = estimate_perf_iterations(PERF, _iter, 2 * CALIBRATE_TIME); \
0250 perf_start(PERF); \
0251 for (_i = 0; _i < _iter; _i++) { \
0252 FUNC_CALL; \
0253 } \
0254 perf_stop(PERF); \
0255 } \
0256 (PERF)->iterations = _iter; \
0257 }
0258
0259 #define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) \
0260 { \
0261 unsigned long long _i, _iter = (PERF)->iterations; \
0262 unsigned long long _run_total = RUN_TIME; \
0263 _run_total *= UNIT_SCALE; \
0264 _iter = estimate_perf_iterations(PERF, _iter, _run_total); \
0265 (PERF)->iterations = 0; \
0266 perf_start(PERF); \
0267 for (_i = 0; _i < _iter; _i++) { \
0268 FUNC_CALL; \
0269 } \
0270 perf_pause(PERF); \
0271 (PERF)->iterations += _iter; \
0272 \
0273 if (get_base_elapsed(PERF) < _run_total && BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
0274 _iter = estimate_perf_iterations(PERF, _iter, \
0275 _run_total - get_base_elapsed(PERF) + \
0276 (UNIT_SCALE / 16)); \
0277 perf_continue(PERF); \
0278 for (_i = 0; _i < _iter; _i++) { \
0279 FUNC_CALL; \
0280 } \
0281 perf_pause(PERF); \
0282 (PERF)->iterations += _iter; \
0283 } \
0284 }
0285
0286 #define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) \
0287 { \
0288 if ((RUN_TIME) > 0) { \
0289 CALIBRATE(PERF, FUNC_CALL); \
0290 PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \
0291 \
0292 } else { \
0293 (PERF)->iterations = 1; \
0294 perf_start(PERF); \
0295 FUNC_CALL; \
0296 perf_stop(PERF); \
0297 } \
0298 }
0299
0300 #ifdef USE_CYCLES
0301 static inline void
0302 perf_print(struct perf p, long long unit_count)
0303 {
0304 long long total_units = p.iterations * unit_count;
0305
0306 printf("runtime = %10lld ticks", get_base_elapsed(&p));
0307 if (total_units != 0) {
0308 printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", total_units / (1000000),
0309 get_time_elapsed(&p), get_base_elapsed(&p) / (double) total_units);
0310 }
0311 printf("\n");
0312 }
0313 #else
0314 static inline void
0315 perf_print(struct perf p, double unit_count)
0316 {
0317 long long total_units = p.iterations * unit_count;
0318 long long usecs = (long long) (get_time_elapsed(&p) * 1000000);
0319
0320 printf("runtime = %10lld usecs", usecs);
0321 if (total_units != 0) {
0322 printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", total_units / (1000000),
0323 get_time_elapsed(&p),
0324 ((double) total_units) / (1000000 * get_time_elapsed(&p)));
0325 }
0326 printf("\n");
0327 }
0328 #endif
0329
0330 static inline uint64_t
0331 get_filesize(FILE *fp)
0332 {
0333 uint64_t file_size;
0334 fpos_t pos, pos_curr;
0335
0336 fgetpos(fp, &pos_curr);
0337 #if defined(_WIN32) || defined(_WIN64)
0338 _fseeki64(fp, 0, SEEK_END);
0339 #else
0340 fseeko(fp, 0, SEEK_END);
0341 #endif
0342 fgetpos(fp, &pos);
0343 file_size = *(uint64_t *) &pos;
0344 fsetpos(fp, &pos_curr);
0345
0346 return file_size;
0347 }
0348
0349 #ifdef __cplusplus
0350 }
0351 #endif
0352
0353 #endif