File indexing completed on 2025-12-16 10:14:57
0001 from peachpy import *
0002 from peachpy.x86_64 import *
0003
0004
0005 def fp16_alt_xmm_to_fp32_xmm(xmm_half):
0006 xmm_zero = XMMRegister()
0007 VPXOR(xmm_zero, xmm_zero, xmm_zero)
0008
0009 xmm_word = XMMRegister()
0010 VPUNPCKLWD(xmm_word, xmm_zero, xmm_half)
0011
0012 xmm_shl1_half = XMMRegister()
0013 VPADDW(xmm_shl1_half, xmm_half, xmm_half)
0014
0015 xmm_shl1_nonsign = XMMRegister()
0016 VPADDD(xmm_shl1_nonsign, xmm_word, xmm_word)
0017
0018 sign_mask = Constant.float32x4(-0.0)
0019
0020 xmm_sign = XMMRegister()
0021 VANDPS(xmm_sign, xmm_word, sign_mask)
0022
0023 xmm_shr3_nonsign = XMMRegister()
0024 VPSRLD(xmm_shr3_nonsign, xmm_shl1_nonsign, 4)
0025
0026 exp_offset = Constant.uint32x4(0x38000000)
0027
0028 xmm_norm_nonsign = XMMRegister()
0029 VPADDD(xmm_norm_nonsign, xmm_shr3_nonsign, exp_offset)
0030
0031 magic_mask = Constant.uint16x8(0x3E80)
0032 xmm_denorm_nonsign = XMMRegister()
0033 VPUNPCKLWD(xmm_denorm_nonsign, xmm_shl1_half, magic_mask)
0034
0035 magic_bias = Constant.float32x4(0.25)
0036 VSUBPS(xmm_denorm_nonsign, xmm_denorm_nonsign, magic_bias)
0037
0038 xmm_denorm_cutoff = XMMRegister()
0039 VMOVDQA(xmm_denorm_cutoff, Constant.uint32x4(0x00800000))
0040
0041 xmm_denorm_mask = XMMRegister()
0042 VPCMPGTD(xmm_denorm_mask, xmm_denorm_cutoff, xmm_shr3_nonsign)
0043
0044 xmm_nonsign = XMMRegister()
0045 VBLENDVPS(xmm_nonsign, xmm_norm_nonsign, xmm_denorm_nonsign, xmm_denorm_mask)
0046
0047 xmm_float = XMMRegister()
0048 VORPS(xmm_float, xmm_nonsign, xmm_sign)
0049
0050 return xmm_float