Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:14:57

0001 from peachpy import *
0002 from peachpy.x86_64 import *
0003 
0004 
0005 def fp16_alt_xmm_to_fp32_ymm(xmm_half):
0006     ymm_half = YMMRegister()
0007     VPERMQ(ymm_half, xmm_half.as_ymm, 0b01010000)
0008 
0009     ymm_zero = YMMRegister()
0010     VPXOR(ymm_zero.as_xmm, ymm_zero.as_xmm, ymm_zero.as_xmm)
0011 
0012     ymm_word = YMMRegister()
0013     VPUNPCKLWD(ymm_word, ymm_zero, ymm_half)
0014 
0015     ymm_shl1_half = YMMRegister()
0016     VPADDW(ymm_shl1_half, ymm_half, ymm_half)
0017 
0018     ymm_shl1_nonsign = YMMRegister()
0019     VPADDD(ymm_shl1_nonsign, ymm_word, ymm_word)
0020 
0021     sign_mask = Constant.float32x8(-0.0)
0022 
0023     ymm_sign = YMMRegister()
0024     VANDPS(ymm_sign, ymm_word, sign_mask)
0025 
0026     ymm_shr3_nonsign = YMMRegister()
0027     VPSRLD(ymm_shr3_nonsign, ymm_shl1_nonsign, 4)
0028 
0029     exp_offset = Constant.uint32x8(0x38000000)
0030 
0031     ymm_norm_nonsign = YMMRegister()
0032     VPADDD(ymm_norm_nonsign, ymm_shr3_nonsign, exp_offset)
0033 
0034     magic_mask = Constant.uint16x16(0x3E80)
0035     ymm_denorm_nonsign = YMMRegister()
0036     VPUNPCKLWD(ymm_denorm_nonsign, ymm_shl1_half, magic_mask)
0037 
0038     magic_bias = Constant.float32x8(0.25)
0039     VSUBPS(ymm_denorm_nonsign, ymm_denorm_nonsign, magic_bias)
0040 
0041     ymm_denorm_cutoff = YMMRegister()
0042     VMOVDQA(ymm_denorm_cutoff, Constant.uint32x8(0x00800000))
0043     
0044     ymm_denorm_mask = YMMRegister()
0045     VPCMPGTD(ymm_denorm_mask, ymm_denorm_cutoff, ymm_shr3_nonsign)
0046 
0047     ymm_nonsign = YMMRegister()
0048     VBLENDVPS(ymm_nonsign, ymm_norm_nonsign, ymm_denorm_nonsign, ymm_denorm_mask)
0049 
0050     ymm_float = YMMRegister()
0051     VORPS(ymm_float, ymm_nonsign, ymm_sign)
0052 
0053     return ymm_float