File indexing completed on 2025-01-18 10:01:25
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030 #ifndef _TEST_H
0031 #define _TEST_H
0032
0033 #ifdef __cplusplus
0034 extern "C" {
0035 #endif
0036
0037 #include <stdio.h>
0038 #include <stdint.h>
0039
0040 #ifdef _MSC_VER
0041 # define inline __inline
0042 #endif
0043
0044
0045
0046 #define BENCHMARK_MIN_TIME 0
0047 #define BENCHMARK_APPROX_TIME 1
0048 #ifndef BENCHMARK_TYPE
0049 #define BENCHMARK_TYPE BENCHMARK_MIN_TIME
0050 #endif
0051
0052 #ifdef USE_RDTSC
0053
0054
0055
0056
0057 # include <inttypes.h>
0058 # include <x86intrin.h>
0059 # define USE_CYCLES
0060 #else
0061 # include <time.h>
0062 #define USE_SECONDS
0063 #endif
0064
0065 #ifdef USE_RDTSC
0066 #ifndef BENCHMARK_TIME
0067 # define BENCHMARK_TIME 6
0068 #endif
0069 # define GHZ 1000000000
0070 # define UNIT_SCALE (GHZ)
0071 # define CALLIBRATE_TIME (UNIT_SCALE / 2)
0072 static inline long long get_time(void) {
0073 unsigned int dummy;
0074 return __rdtscp(&dummy);
0075 }
0076
0077 static inline long long get_res(void) {
0078 return 1;
0079 }
0080 #else
0081 #ifndef BENCHMARK_TIME
0082 # define BENCHMARK_TIME 3
0083 #endif
0084 #ifdef _MSC_VER
0085 #define UNIT_SCALE get_res()
0086 #define CALLIBRATE_TIME (UNIT_SCALE / 4)
0087 static inline long long get_time(void) {
0088 long long ret = 0;
0089 QueryPerformanceCounter(&ret);
0090 return ret;
0091 }
0092
0093 static inline long long get_res(void) {
0094 long long ret = 0;
0095 QueryPerformanceFrequency(&ret);
0096 return ret;
0097 }
0098 #else
0099 # define NANO_SCALE 1000000000
0100 # define UNIT_SCALE NANO_SCALE
0101 # define CALLIBRATE_TIME (UNIT_SCALE / 4)
0102 #ifdef __FreeBSD__
0103 # define CLOCK_ID CLOCK_MONOTONIC_PRECISE
0104 #else
0105 # define CLOCK_ID CLOCK_MONOTONIC
0106 #endif
0107
0108 static inline long long get_time(void) {
0109 struct timespec time;
0110 long long nano_total;
0111 clock_gettime(CLOCK_ID, &time);
0112 nano_total = time.tv_sec;
0113 nano_total *= NANO_SCALE;
0114 nano_total += time.tv_nsec;
0115 return nano_total;
0116 }
0117
0118 static inline long long get_res(void) {
0119 struct timespec time;
0120 long long nano_total;
0121 clock_getres(CLOCK_ID, &time);
0122 nano_total = time.tv_sec;
0123 nano_total *= NANO_SCALE;
0124 nano_total += time.tv_nsec;
0125 return nano_total;
0126 }
0127 #endif
0128 #endif
0129 struct perf {
0130 long long start;
0131 long long stop;
0132 long long run_total;
0133 long long iterations;
0134 };
0135
0136 static inline void perf_init(struct perf *p) {
0137 p->start = 0;
0138 p->stop = 0;
0139 p->run_total = 0;
0140 }
0141
0142 static inline void perf_continue(struct perf *p) {
0143 p->start = get_time();
0144 }
0145
0146 static inline void perf_pause(struct perf *p) {
0147 p->stop = get_time();
0148 p->run_total = p->run_total + p->stop - p->start;
0149 p->start = p->stop;
0150 }
0151
0152 static inline void perf_start(struct perf *p) {
0153 perf_init(p);
0154 perf_continue(p);
0155 }
0156
0157 static inline void perf_stop(struct perf *p) {
0158 perf_pause(p);
0159 }
0160
0161 static inline double get_time_elapsed(struct perf *p) {
0162 return 1.0 * p->run_total / UNIT_SCALE;
0163 }
0164
0165 static inline long long get_base_elapsed(struct perf *p) {
0166 return p->run_total;
0167 }
0168
0169 static inline unsigned long long estimate_perf_iterations(struct perf *p,
0170 unsigned long long runs,
0171 unsigned long long total) {
0172 total = total * runs;
0173 if (get_base_elapsed(p) > 0)
0174 return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
0175 else
0176 return (total + get_res() - 1) / get_res();
0177 }
0178
0179 #define CALLIBRATE(PERF, FUNC_CALL) { \
0180 unsigned long long _i, _iter = 1; \
0181 perf_start(PERF); \
0182 FUNC_CALL; \
0183 perf_pause(PERF); \
0184 \
0185 while (get_base_elapsed(PERF) < CALLIBRATE_TIME) { \
0186 _iter = estimate_perf_iterations(PERF, _iter, \
0187 2 * CALLIBRATE_TIME); \
0188 perf_start(PERF); \
0189 for (_i = 0; _i < _iter; _i++) { \
0190 FUNC_CALL; \
0191 } \
0192 perf_stop(PERF); \
0193 } \
0194 (PERF)->iterations=_iter; \
0195 }
0196
0197 #define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) { \
0198 unsigned long long _i, _iter = (PERF)->iterations; \
0199 unsigned long long _run_total = RUN_TIME; \
0200 _run_total *= UNIT_SCALE; \
0201 _iter = estimate_perf_iterations(PERF, _iter, _run_total);\
0202 (PERF)->iterations = 0; \
0203 perf_start(PERF); \
0204 for (_i = 0; _i < _iter; _i++) { \
0205 FUNC_CALL; \
0206 } \
0207 perf_pause(PERF); \
0208 (PERF)->iterations += _iter; \
0209 \
0210 if(get_base_elapsed(PERF) < _run_total && \
0211 BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
0212 _iter = estimate_perf_iterations(PERF, _iter, \
0213 _run_total - get_base_elapsed(PERF) + \
0214 (UNIT_SCALE / 16)); \
0215 perf_continue(PERF); \
0216 for (_i = 0; _i < _iter; _i++) { \
0217 FUNC_CALL; \
0218 } \
0219 perf_pause(PERF); \
0220 (PERF)->iterations += _iter; \
0221 } \
0222 }
0223
0224 #define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) { \
0225 if((RUN_TIME) > 0) { \
0226 CALLIBRATE(PERF, FUNC_CALL); \
0227 PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \
0228 \
0229 } else { \
0230 (PERF)->iterations = 1; \
0231 perf_start(PERF); \
0232 FUNC_CALL; \
0233 perf_stop(PERF); \
0234 } \
0235 }
0236
0237 #ifdef USE_CYCLES
0238 static inline void perf_print(struct perf p, long long unit_count) {
0239 long long total_units = p.iterations * unit_count;
0240
0241 printf("runtime = %10lld ticks", get_base_elapsed(&p));
0242 if (total_units != 0) {
0243 printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte",
0244 total_units / (1000000), get_time_elapsed(&p),
0245 get_base_elapsed(&p) / (double)total_units);
0246 }
0247 printf("\n");
0248 }
0249 #else
0250 static inline void perf_print(struct perf p, double unit_count) {
0251 long long total_units = p.iterations * unit_count;
0252 long long usecs = (long long)(get_time_elapsed(&p) * 1000000);
0253
0254 printf("runtime = %10lld usecs", usecs);
0255 if (total_units != 0) {
0256 printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s",
0257 total_units / (1000000), get_time_elapsed(&p),
0258 ((double)total_units) / (1000000 * get_time_elapsed(&p)));
0259 }
0260 printf("\n");
0261 }
0262 #endif
0263
0264 static inline uint64_t get_filesize(FILE * fp) {
0265 uint64_t file_size;
0266 fpos_t pos, pos_curr;
0267
0268 fgetpos(fp, &pos_curr);
0269 #if defined(_WIN32) || defined(_WIN64)
0270 _fseeki64(fp, 0, SEEK_END);
0271 #else
0272 fseeko(fp, 0, SEEK_END);
0273 #endif
0274 fgetpos(fp, &pos);
0275 file_size = *(uint64_t *) & pos;
0276 fsetpos(fp, &pos_curr);
0277
0278 return file_size;
0279 }
0280
0281 #ifdef __cplusplus
0282 }
0283 #endif
0284
0285 #endif