Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:01:25

0001 /**********************************************************************
0002   Copyright(c) 2011-2015 Intel Corporation All rights reserved.
0003 
0004   Redistribution and use in source and binary forms, with or without
0005   modification, are permitted provided that the following conditions
0006   are met:
0007     * Redistributions of source code must retain the above copyright
0008       notice, this list of conditions and the following disclaimer.
0009     * Redistributions in binary form must reproduce the above copyright
0010       notice, this list of conditions and the following disclaimer in
0011       the documentation and/or other materials provided with the
0012       distribution.
0013     * Neither the name of Intel Corporation nor the names of its
0014       contributors may be used to endorse or promote products derived
0015       from this software without specific prior written permission.
0016 
0017   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0018   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0019   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0020   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0021   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0022   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0023   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0024   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0025   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0026   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0027   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0028 **********************************************************************/
0029 
0030 #ifndef _TEST_H
0031 #define _TEST_H
0032 
0033 #ifdef __cplusplus
0034 extern "C" {
0035 #endif
0036 
0037 #include <stdio.h>
0038 #include <stdint.h>
0039 
0040 #ifdef _MSC_VER
0041 # define inline __inline
0042 #endif
0043 
0044 /* Decide wether to use benchmark time as an approximation or a minimum. Fewer
0045  * calls to the timer are required for the approximation case.*/
0046 #define BENCHMARK_MIN_TIME 0
0047 #define BENCHMARK_APPROX_TIME 1
0048 #ifndef BENCHMARK_TYPE
0049 #define BENCHMARK_TYPE BENCHMARK_MIN_TIME
0050 #endif
0051 
0052 #ifdef USE_RDTSC
0053 /* The use of rtdsc is nuanced. On many processors it corresponds to a
0054  * standardized clock source. To obtain a meaningful result it may be
0055  * necessary to fix the CPU clock to match the rtdsc tick rate.
0056  */
0057 # include <inttypes.h>
0058 # include <x86intrin.h>
0059 # define USE_CYCLES
0060 #else
0061 # include <time.h>
0062 #define USE_SECONDS
0063 #endif
0064 
0065 #ifdef USE_RDTSC
0066 #ifndef BENCHMARK_TIME
0067 # define BENCHMARK_TIME 6
0068 #endif
0069 # define GHZ 1000000000
0070 # define UNIT_SCALE (GHZ)
0071 # define CALLIBRATE_TIME (UNIT_SCALE / 2)
0072 static inline long long get_time(void) {
0073     unsigned int dummy;
0074     return __rdtscp(&dummy);
0075 }
0076 
0077 static inline long long get_res(void) {
0078     return 1;
0079 }
0080 #else
0081 #ifndef BENCHMARK_TIME
0082 # define BENCHMARK_TIME 3
0083 #endif
0084 #ifdef _MSC_VER
0085 #define UNIT_SCALE get_res()
0086 #define CALLIBRATE_TIME (UNIT_SCALE / 4)
0087 static inline long long get_time(void) {
0088     long long ret = 0;
0089     QueryPerformanceCounter(&ret);
0090     return ret;
0091 }
0092 
0093 static inline long long get_res(void) {
0094     long long ret = 0;
0095     QueryPerformanceFrequency(&ret);
0096     return ret;
0097 }
0098 #else
0099 # define NANO_SCALE 1000000000
0100 # define UNIT_SCALE NANO_SCALE
0101 # define CALLIBRATE_TIME (UNIT_SCALE / 4)
0102 #ifdef __FreeBSD__
0103 # define CLOCK_ID CLOCK_MONOTONIC_PRECISE
0104 #else
0105 # define CLOCK_ID CLOCK_MONOTONIC
0106 #endif
0107 
0108 static inline long long get_time(void) {
0109     struct timespec time;
0110     long long nano_total;
0111      clock_gettime(CLOCK_ID, &time);
0112      nano_total = time.tv_sec;
0113      nano_total *= NANO_SCALE;
0114      nano_total += time.tv_nsec;
0115      return nano_total;
0116 }
0117 
0118 static inline long long get_res(void) {
0119     struct timespec time;
0120     long long nano_total;
0121     clock_getres(CLOCK_ID, &time);
0122     nano_total = time.tv_sec;
0123     nano_total *= NANO_SCALE;
0124     nano_total += time.tv_nsec;
0125     return nano_total;
0126 }
0127 #endif
0128 #endif
0129 struct perf {
0130     long long start;
0131     long long stop;
0132     long long run_total;
0133     long long iterations;
0134 };
0135 
0136 static inline void perf_init(struct perf *p) {
0137     p->start = 0;
0138     p->stop = 0;
0139     p->run_total = 0;
0140 }
0141 
0142 static inline void perf_continue(struct perf *p) {
0143     p->start = get_time();
0144 }
0145 
0146 static inline void perf_pause(struct perf *p) {
0147     p->stop = get_time();
0148     p->run_total = p->run_total + p->stop - p->start;
0149     p->start = p->stop;
0150 }
0151 
0152 static inline void perf_start(struct perf *p) {
0153     perf_init(p);
0154     perf_continue(p);
0155 }
0156 
0157 static inline void perf_stop(struct perf *p) {
0158     perf_pause(p);
0159 }
0160 
0161 static inline double get_time_elapsed(struct perf *p) {
0162     return 1.0 * p->run_total / UNIT_SCALE;
0163 }
0164 
0165 static inline long long get_base_elapsed(struct perf *p) {
0166     return p->run_total;
0167 }
0168 
0169 static inline unsigned long long estimate_perf_iterations(struct perf *p,
0170                            unsigned long long runs,
0171                            unsigned long long total) {
0172     total = total * runs;
0173     if (get_base_elapsed(p) > 0)
0174         return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
0175     else
0176         return (total + get_res() - 1) / get_res();
0177 }
0178 
0179 #define CALLIBRATE(PERF, FUNC_CALL) {               \
0180     unsigned long long _i, _iter = 1;           \
0181     perf_start(PERF);                   \
0182     FUNC_CALL;                      \
0183     perf_pause(PERF);                   \
0184                                 \
0185     while (get_base_elapsed(PERF) < CALLIBRATE_TIME) {  \
0186         _iter = estimate_perf_iterations(PERF, _iter,   \
0187                         2 * CALLIBRATE_TIME);   \
0188         perf_start(PERF);               \
0189         for (_i = 0; _i < _iter; _i++) {        \
0190             FUNC_CALL;              \
0191         }                       \
0192         perf_stop(PERF);                \
0193     }                           \
0194     (PERF)->iterations=_iter;               \
0195 }
0196 
0197 #define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) {       \
0198     unsigned long long _i, _iter = (PERF)->iterations;  \
0199     unsigned long long _run_total = RUN_TIME;       \
0200     _run_total *= UNIT_SCALE;               \
0201     _iter = estimate_perf_iterations(PERF, _iter, _run_total);\
0202     (PERF)->iterations = 0;                 \
0203     perf_start(PERF);                   \
0204     for (_i = 0; _i < _iter; _i++) {            \
0205         FUNC_CALL;                  \
0206     }                           \
0207     perf_pause(PERF);                   \
0208     (PERF)->iterations += _iter;                \
0209                                 \
0210     if(get_base_elapsed(PERF) < _run_total &&       \
0211         BENCHMARK_TYPE == BENCHMARK_MIN_TIME) {     \
0212         _iter = estimate_perf_iterations(PERF, _iter,   \
0213             _run_total - get_base_elapsed(PERF) +   \
0214             (UNIT_SCALE / 16));         \
0215         perf_continue(PERF);                \
0216         for (_i = 0; _i < _iter; _i++) {        \
0217             FUNC_CALL;              \
0218         }                       \
0219         perf_pause(PERF);               \
0220         (PERF)->iterations += _iter;            \
0221     }                           \
0222 }
0223 
0224 #define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) {          \
0225     if((RUN_TIME) > 0) {                    \
0226         CALLIBRATE(PERF, FUNC_CALL);            \
0227         PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL);    \
0228                                 \
0229     } else {                        \
0230         (PERF)->iterations = 1;             \
0231         perf_start(PERF);               \
0232         FUNC_CALL;                  \
0233         perf_stop(PERF);                \
0234     }                           \
0235 }
0236 
0237 #ifdef USE_CYCLES
0238 static inline void perf_print(struct perf p, long long unit_count) {
0239     long long total_units = p.iterations * unit_count;
0240 
0241     printf("runtime = %10lld ticks", get_base_elapsed(&p));
0242     if (total_units != 0) {
0243         printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte",
0244                total_units / (1000000), get_time_elapsed(&p),
0245                get_base_elapsed(&p) / (double)total_units);
0246     }
0247     printf("\n");
0248 }
0249 #else
0250 static inline void perf_print(struct perf p, double unit_count) {
0251     long long total_units = p.iterations * unit_count;
0252     long long usecs = (long long)(get_time_elapsed(&p) * 1000000);
0253 
0254     printf("runtime = %10lld usecs", usecs);
0255     if (total_units != 0) {
0256         printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s",
0257                total_units / (1000000), get_time_elapsed(&p),
0258                ((double)total_units) / (1000000 * get_time_elapsed(&p)));
0259     }
0260     printf("\n");
0261 }
0262 #endif
0263 
0264 static inline uint64_t get_filesize(FILE * fp) {
0265     uint64_t file_size;
0266     fpos_t pos, pos_curr;
0267 
0268     fgetpos(fp, &pos_curr); /* Save current position */
0269 #if defined(_WIN32) || defined(_WIN64)
0270     _fseeki64(fp, 0, SEEK_END);
0271 #else
0272     fseeko(fp, 0, SEEK_END);
0273 #endif
0274     fgetpos(fp, &pos);
0275     file_size = *(uint64_t *) & pos;
0276     fsetpos(fp, &pos_curr); /* Restore position */
0277 
0278     return file_size;
0279 }
0280 
0281 #ifdef __cplusplus
0282 }
0283 #endif
0284 
0285 #endif // _TEST_H