Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 09:02:13

0001 /**********************************************************************
0002   Copyright(c) 2011-2015 Intel Corporation All rights reserved.
0003 
0004   Redistribution and use in source and binary forms, with or without
0005   modification, are permitted provided that the following conditions
0006   are met:
0007     * Redistributions of source code must retain the above copyright
0008       notice, this list of conditions and the following disclaimer.
0009     * Redistributions in binary form must reproduce the above copyright
0010       notice, this list of conditions and the following disclaimer in
0011       the documentation and/or other materials provided with the
0012       distribution.
0013     * Neither the name of Intel Corporation nor the names of its
0014       contributors may be used to endorse or promote products derived
0015       from this software without specific prior written permission.
0016 
0017   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0018   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0019   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0020   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0021   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0022   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0023   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0024   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0025   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0026   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0027   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0028 **********************************************************************/
0029 
0030 #ifndef _TEST_H
0031 #define _TEST_H
0032 
0033 /**
0034  *  @file  test.h
0035  *  @brief Test helper include for common perf and test macros
0036  *
0037  *  This is a helper file to enable short and simple tests. Not intended for use
0038  *  in library functions or production. Includes helper routines for alignment,
0039  *  benchmark timing, and filesize.
0040  */
0041 
0042 #ifdef __cplusplus
0043 extern "C" {
0044 #endif
0045 
0046 #include <stdio.h>
0047 #include <stdint.h>
0048 
0049 #ifdef _MSC_VER
0050 #define inline __inline
0051 #endif
0052 
0053 /* Make os-independent alignment attribute, alloc and free. */
0054 #if defined __unix__ || defined __APPLE__
0055 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
0056 #define __forceinline                   static inline
0057 #define aligned_free(x)                 free(x)
0058 #else
0059 #ifdef __MINGW32__
0060 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
0061 #define posix_memalign(p, algn, len)                                                               \
0062         (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
0063 #define aligned_free(x) _aligned_free(x)
0064 #else
0065 #define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
0066 #define posix_memalign(p, algn, len)                                                               \
0067         (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
0068 #define aligned_free(x) _aligned_free(x)
0069 #endif
0070 #endif
0071 
0072 #ifdef DEBUG
0073 #define DEBUG_PRINT(x) printf x
0074 #else
0075 #define DEBUG_PRINT(x)                                                                             \
0076         do {                                                                                       \
0077         } while (0)
0078 #endif
0079 
0080 /* Decide whether to use benchmark time as an approximation or a minimum. Fewer
0081  * calls to the timer are required for the approximation case.*/
0082 #define BENCHMARK_MIN_TIME    0
0083 #define BENCHMARK_APPROX_TIME 1
0084 #ifndef BENCHMARK_TYPE
0085 #define BENCHMARK_TYPE BENCHMARK_MIN_TIME
0086 #endif
0087 
0088 #ifdef USE_RDTSC
0089 /* The use of rtdsc is nuanced. On many processors it corresponds to a
0090  * standardized clock source. To obtain a meaningful result it may be
0091  * necessary to fix the CPU clock to match the rtdsc tick rate.
0092  */
0093 #include <inttypes.h>
0094 #include <x86intrin.h>
0095 #define USE_CYCLES
0096 #else
0097 #include <time.h>
0098 #define USE_SECONDS
0099 #endif
0100 
0101 #ifdef USE_RDTSC
0102 #ifndef BENCHMARK_TIME
0103 #define BENCHMARK_TIME 6
0104 #endif
0105 #define GHZ            1000000000
0106 #define UNIT_SCALE     (GHZ)
0107 #define CALIBRATE_TIME (UNIT_SCALE / 2)
0108 static inline long long
0109 get_time(void)
0110 {
0111         unsigned int dummy;
0112         return __rdtscp(&dummy);
0113 }
0114 
0115 static inline long long
0116 get_res(void)
0117 {
0118         return 1;
0119 }
0120 #else
0121 #ifndef BENCHMARK_TIME
0122 #define BENCHMARK_TIME 3
0123 #endif
0124 #ifdef _MSC_VER
0125 #define UNIT_SCALE     get_res()
0126 #define CALIBRATE_TIME (UNIT_SCALE / 4)
0127 static inline long long
0128 get_time(void)
0129 {
0130         long long ret = 0;
0131         QueryPerformanceCounter(&ret);
0132         return ret;
0133 }
0134 
0135 static inline long long
0136 get_res(void)
0137 {
0138         long long ret = 0;
0139         QueryPerformanceFrequency(&ret);
0140         return ret;
0141 }
0142 #else
0143 #define NANO_SCALE     1000000000
0144 #define UNIT_SCALE     NANO_SCALE
0145 #define CALIBRATE_TIME (UNIT_SCALE / 4)
0146 #ifdef __FreeBSD__
0147 #define CLOCK_ID CLOCK_MONOTONIC_PRECISE
0148 #else
0149 #define CLOCK_ID CLOCK_MONOTONIC
0150 #endif
0151 
0152 static inline long long
0153 get_time(void)
0154 {
0155         struct timespec time;
0156         long long nano_total;
0157         clock_gettime(CLOCK_ID, &time);
0158         nano_total = time.tv_sec;
0159         nano_total *= NANO_SCALE;
0160         nano_total += time.tv_nsec;
0161         return nano_total;
0162 }
0163 
0164 static inline long long
0165 get_res(void)
0166 {
0167         struct timespec time;
0168         long long nano_total;
0169         clock_getres(CLOCK_ID, &time);
0170         nano_total = time.tv_sec;
0171         nano_total *= NANO_SCALE;
0172         nano_total += time.tv_nsec;
0173         return nano_total;
0174 }
0175 #endif
0176 #endif
0177 struct perf {
0178         long long start;
0179         long long stop;
0180         long long run_total;
0181         long long iterations;
0182 };
0183 
0184 static inline void
0185 perf_init(struct perf *p)
0186 {
0187         p->start = 0;
0188         p->stop = 0;
0189         p->run_total = 0;
0190 }
0191 
0192 static inline void
0193 perf_continue(struct perf *p)
0194 {
0195         p->start = get_time();
0196 }
0197 
0198 static inline void
0199 perf_pause(struct perf *p)
0200 {
0201         p->stop = get_time();
0202         p->run_total = p->run_total + p->stop - p->start;
0203         p->start = p->stop;
0204 }
0205 
0206 static inline void
0207 perf_start(struct perf *p)
0208 {
0209         perf_init(p);
0210         perf_continue(p);
0211 }
0212 
0213 static inline void
0214 perf_stop(struct perf *p)
0215 {
0216         perf_pause(p);
0217 }
0218 
0219 static inline double
0220 get_time_elapsed(struct perf *p)
0221 {
0222         return 1.0 * p->run_total / UNIT_SCALE;
0223 }
0224 
0225 static inline long long
0226 get_base_elapsed(struct perf *p)
0227 {
0228         return p->run_total;
0229 }
0230 
0231 static inline unsigned long long
0232 estimate_perf_iterations(struct perf *p, unsigned long long runs, unsigned long long total)
0233 {
0234         total = total * runs;
0235         if (get_base_elapsed(p) > 0)
0236                 return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
0237         else
0238                 return (total + get_res() - 1) / get_res();
0239 }
0240 
0241 #define CALIBRATE(PERF, FUNC_CALL)                                                                 \
0242         {                                                                                          \
0243                 unsigned long long _i, _iter = 1;                                                  \
0244                 perf_start(PERF);                                                                  \
0245                 FUNC_CALL;                                                                         \
0246                 perf_pause(PERF);                                                                  \
0247                                                                                                    \
0248                 while (get_base_elapsed(PERF) < CALIBRATE_TIME) {                                  \
0249                         _iter = estimate_perf_iterations(PERF, _iter, 2 * CALIBRATE_TIME);         \
0250                         perf_start(PERF);                                                          \
0251                         for (_i = 0; _i < _iter; _i++) {                                           \
0252                                 FUNC_CALL;                                                         \
0253                         }                                                                          \
0254                         perf_stop(PERF);                                                           \
0255                 }                                                                                  \
0256                 (PERF)->iterations = _iter;                                                        \
0257         }
0258 
0259 #define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL)                                                \
0260         {                                                                                          \
0261                 unsigned long long _i, _iter = (PERF)->iterations;                                 \
0262                 unsigned long long _run_total = RUN_TIME;                                          \
0263                 _run_total *= UNIT_SCALE;                                                          \
0264                 _iter = estimate_perf_iterations(PERF, _iter, _run_total);                         \
0265                 (PERF)->iterations = 0;                                                            \
0266                 perf_start(PERF);                                                                  \
0267                 for (_i = 0; _i < _iter; _i++) {                                                   \
0268                         FUNC_CALL;                                                                 \
0269                 }                                                                                  \
0270                 perf_pause(PERF);                                                                  \
0271                 (PERF)->iterations += _iter;                                                       \
0272                                                                                                    \
0273                 if (get_base_elapsed(PERF) < _run_total && BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
0274                         _iter = estimate_perf_iterations(PERF, _iter,                              \
0275                                                          _run_total - get_base_elapsed(PERF) +     \
0276                                                                  (UNIT_SCALE / 16));               \
0277                         perf_continue(PERF);                                                       \
0278                         for (_i = 0; _i < _iter; _i++) {                                           \
0279                                 FUNC_CALL;                                                         \
0280                         }                                                                          \
0281                         perf_pause(PERF);                                                          \
0282                         (PERF)->iterations += _iter;                                               \
0283                 }                                                                                  \
0284         }
0285 
0286 #define BENCHMARK(PERF, RUN_TIME, FUNC_CALL)                                                       \
0287         {                                                                                          \
0288                 if ((RUN_TIME) > 0) {                                                              \
0289                         CALIBRATE(PERF, FUNC_CALL);                                                \
0290                         PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL);                               \
0291                                                                                                    \
0292                 } else {                                                                           \
0293                         (PERF)->iterations = 1;                                                    \
0294                         perf_start(PERF);                                                          \
0295                         FUNC_CALL;                                                                 \
0296                         perf_stop(PERF);                                                           \
0297                 }                                                                                  \
0298         }
0299 
0300 #ifdef USE_CYCLES
0301 static inline void
0302 perf_print(struct perf p, long long unit_count)
0303 {
0304         long long total_units = p.iterations * unit_count;
0305 
0306         printf("runtime = %10lld ticks", get_base_elapsed(&p));
0307         if (total_units != 0) {
0308                 printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", total_units / (1000000),
0309                        get_time_elapsed(&p), get_base_elapsed(&p) / (double) total_units);
0310         }
0311         printf("\n");
0312 }
0313 #else
0314 static inline void
0315 perf_print(struct perf p, double unit_count)
0316 {
0317         long long total_units = p.iterations * unit_count;
0318         long long usecs = (long long) (get_time_elapsed(&p) * 1000000);
0319 
0320         printf("runtime = %10lld usecs", usecs);
0321         if (total_units != 0) {
0322                 printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", total_units / (1000000),
0323                        get_time_elapsed(&p),
0324                        ((double) total_units) / (1000000 * get_time_elapsed(&p)));
0325         }
0326         printf("\n");
0327 }
0328 #endif
0329 
0330 static inline uint64_t
0331 get_filesize(FILE *fp)
0332 {
0333         uint64_t file_size;
0334         fpos_t pos, pos_curr;
0335 
0336         fgetpos(fp, &pos_curr); /* Save current position */
0337 #if defined(_WIN32) || defined(_WIN64)
0338         _fseeki64(fp, 0, SEEK_END);
0339 #else
0340         fseeko(fp, 0, SEEK_END);
0341 #endif
0342         fgetpos(fp, &pos);
0343         file_size = *(uint64_t *) &pos;
0344         fsetpos(fp, &pos_curr); /* Restore position */
0345 
0346         return file_size;
0347 }
0348 
0349 #ifdef __cplusplus
0350 }
0351 #endif
0352 
0353 #endif // _TEST_H