File indexing completed on 2025-12-16 10:14:57
0001 from peachpy import *
0002 from peachpy.x86_64 import *
0003
0004
0005 def fp16_alt_xmm_to_fp32_ymm(xmm_half):
0006 ymm_half = YMMRegister()
0007 VPERMQ(ymm_half, xmm_half.as_ymm, 0b01010000)
0008
0009 ymm_zero = YMMRegister()
0010 VPXOR(ymm_zero.as_xmm, ymm_zero.as_xmm, ymm_zero.as_xmm)
0011
0012 ymm_word = YMMRegister()
0013 VPUNPCKLWD(ymm_word, ymm_zero, ymm_half)
0014
0015 ymm_shl1_half = YMMRegister()
0016 VPADDW(ymm_shl1_half, ymm_half, ymm_half)
0017
0018 ymm_shl1_nonsign = YMMRegister()
0019 VPADDD(ymm_shl1_nonsign, ymm_word, ymm_word)
0020
0021 sign_mask = Constant.float32x8(-0.0)
0022
0023 ymm_sign = YMMRegister()
0024 VANDPS(ymm_sign, ymm_word, sign_mask)
0025
0026 ymm_shr3_nonsign = YMMRegister()
0027 VPSRLD(ymm_shr3_nonsign, ymm_shl1_nonsign, 4)
0028
0029 exp_offset = Constant.uint32x8(0x38000000)
0030
0031 ymm_norm_nonsign = YMMRegister()
0032 VPADDD(ymm_norm_nonsign, ymm_shr3_nonsign, exp_offset)
0033
0034 magic_mask = Constant.uint16x16(0x3E80)
0035 ymm_denorm_nonsign = YMMRegister()
0036 VPUNPCKLWD(ymm_denorm_nonsign, ymm_shl1_half, magic_mask)
0037
0038 magic_bias = Constant.float32x8(0.25)
0039 VSUBPS(ymm_denorm_nonsign, ymm_denorm_nonsign, magic_bias)
0040
0041 ymm_denorm_cutoff = YMMRegister()
0042 VMOVDQA(ymm_denorm_cutoff, Constant.uint32x8(0x00800000))
0043
0044 ymm_denorm_mask = YMMRegister()
0045 VPCMPGTD(ymm_denorm_mask, ymm_denorm_cutoff, ymm_shr3_nonsign)
0046
0047 ymm_nonsign = YMMRegister()
0048 VBLENDVPS(ymm_nonsign, ymm_norm_nonsign, ymm_denorm_nonsign, ymm_denorm_mask)
0049
0050 ymm_float = YMMRegister()
0051 VORPS(ymm_float, ymm_nonsign, ymm_sign)
0052
0053 return ymm_float