File indexing completed on 2026-04-09 07:49:24
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <math.h>
0014 #include <stdint.h>
0015
0016 #define PORTABLE (1)
0017
0018 float njuffa_logf (float a);
0019 #if !PORTABLE
0020 #include "immintrin.h"
0021 float sse_recipf (float a);
0022 float sse_rsqrtf (float a);
0023 #endif
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 float njuffa_erfcinvf (float a)
0034 {
0035 float r;
0036
0037 if ((a >= 2.1875e-3f) && (a <= 1.998125f)) {
0038 float p, t;
0039 t = fmaf (-a, a, a + a);
0040 t = njuffa_logf (t);
0041 p = 5.43877832e-9f;
0042 p = fmaf (p, t, 1.43286059e-7f);
0043 p = fmaf (p, t, 1.22775396e-6f);
0044 p = fmaf (p, t, 1.12962631e-7f);
0045 p = fmaf (p, t, -5.61531961e-5f);
0046 p = fmaf (p, t, -1.47697705e-4f);
0047 p = fmaf (p, t, 2.31468701e-3f);
0048 p = fmaf (p, t, 1.15392562e-2f);
0049 p = fmaf (p, t, -2.32015476e-1f);
0050 t = fmaf (p, t, 8.86226892e-1f);
0051 r = fmaf (t, -a, t);
0052 } else {
0053 float p, q, s, t;
0054 t = (a >= 1.0f) ? (2.0f - a) : a;
0055 t = 0.0f - njuffa_logf (t);
0056 #if PORTABLE
0057 s = sqrtf (1.0f / t);
0058 #else
0059 s = sse_rsqrtf (t);
0060 #endif
0061 p = 2.23100796e+1f;
0062 p = fmaf (p, s, -5.23008537e+1f);
0063 p = fmaf (p, s, 5.44409714e+1f);
0064 p = fmaf (p, s, -3.35030403e+1f);
0065 p = fmaf (p, s, 1.38580027e+1f);
0066 p = fmaf (p, s, -4.37277269e+0f);
0067 p = fmaf (p, s, 1.53075826e+0f);
0068 p = fmaf (p, s, 2.97993328e-2f);
0069 p = fmaf (p, s, -3.71997419e-4f);
0070 p = fmaf (p, s, s);
0071 #if PORTABLE
0072 r = 1.0f / p;
0073 #else
0074 r = sse_recipf (p);
0075 if (t == INFINITY) r = t;
0076 #endif
0077 if (a >= 1.0f) r = 0.0f - r;
0078 }
0079 return r;
0080 }
0081
0082
0083
0084
0085 float njuffa_normcdfinvf (float a)
0086 {
0087 return fmaf (-1.41421356f, njuffa_erfcinvf (a + a), 0.0f);
0088 }
0089
0090
0091 float njuffa_logf (float a)
0092 {
0093 float m, r, s, t, i, f;
0094 int32_t e;
0095 const float cutoff_f = 0.666666667f;
0096 if ((a > 0.0f) && (a <= 0x1.fffffep+127f)) {
0097 m = frexpf (a, &e);
0098 if (m < cutoff_f) {
0099 m = m + m;
0100 e = e - 1;
0101 }
0102 i = (float)e;
0103 f = m - 1.0f;
0104 s = f * f;
0105
0106 r = -0x1.0ae000p-3f;
0107 t = 0x1.208000p-3f;
0108 r = fmaf (r, s, -0x1.f1988ap-4f);
0109 t = fmaf (t, s, 0x1.1e5740p-3f);
0110 r = fmaf (r, s, -0x1.55b36ep-3f);
0111 t = fmaf (t, s, 0x1.99d8b2p-3f);
0112 r = fmaf (r, s, -0x1.fffe02p-3f);
0113 r = fmaf (t, f, r);
0114 r = fmaf (r, f, 0x1.5554fap-2f);
0115 r = fmaf (r, f, -0x1.000000p-1f);
0116 r = fmaf (r, s, f);
0117 r = fmaf (i, 0x1.62e430p-01f, r);
0118 } else {
0119 r = a + a;
0120 if (a < 0.0f) r = 0.0f/0.0f;
0121 if (a == 0.0f) r = -INFINITY;
0122 }
0123 return r;
0124 }
0125
0126 #if !PORTABLE
0127 float sse_recipf (float a)
0128 {
0129 __m128 t;
0130 float e, r;
0131 t = _mm_set_ss (a);
0132 t = _mm_rcp_ss (t);
0133 _mm_store_ss (&r, t);
0134 e = fmaf (0.0f - a, r, 1.0f);
0135 e = fmaf (e, e, e);
0136 r = fmaf (e, r, r);
0137 return r;
0138 }
0139
0140
0141 float sse_rsqrtf (float a)
0142 {
0143 __m128 t;
0144 float e, r;
0145 t = _mm_set_ss (a);
0146 t = _mm_rsqrt_ss (t);
0147 _mm_store_ss (&r, t);
0148 e = fmaf (0.0f - a, r * r, 1.0f);
0149 r = fmaf (fmaf (0.375f, e, 0.5f), e * r, r);
0150 return r;
0151 }
0152 #endif
0153