| ;; |
| ;; Vector Primitevs |
| ;; |
| |
| ;; TODO(4, rossberg): avoid hardcoding v128 |
| |
| |
| ;; Lanes |
| |
| def $lanes_(shape, vec_(V128)) : lane_($lanetype(shape))* hint(inverse $inv_lanes_) |
| |
| def $inv_lanes_(shape, lane_($lanetype(shape))*) : vec_(V128) |
| hint(show $lanes_(%)^(-1)#((%))) |
| |
| ;; TODO(3, rossberg): implement numerics internally |
| def $lanes_ hint(builtin) |
| def $inv_lanes_ hint(builtin) |
| |
| def $zeroop(shape_1, shape_2, vcvtop__(shape_1, shape_2)) : zero? |
| def $zeroop(Jnn_1 X M_1, Jnn_2 X M_2, EXTEND half sx) = eps |
| def $zeroop(Jnn_1 X M_1, Fnn_2 X M_2, CONVERT half? sx) = eps |
| def $zeroop(Fnn_1 X M_1, Jnn_2 X M_2, TRUNC_SAT sx zero?) = zero? |
| def $zeroop(Fnn_1 X M_1, Jnn_2 X M_2, RELAXED_TRUNC sx zero?) = zero? |
| def $zeroop(Fnn_1 X M_1, Fnn_2 X M_2, DEMOTE zero) = zero |
| def $zeroop(Fnn_1 X M_1, Fnn_2 X M_2, PROMOTE LOW) = eps |
| |
| def $halfop(shape_1, shape_2, vcvtop__(shape_1, shape_2)) : half? |
| def $halfop(Jnn_1 X M_1, Jnn_2 X M_2, EXTEND half sx) = half |
| def $halfop(Jnn_1 X M_1, Fnn_2 X M_2, CONVERT half? sx) = half? |
| def $halfop(Fnn_1 X M_1, Jnn_2 X M_2, TRUNC_SAT sx zero?) = eps |
| def $halfop(Fnn_1 X M_1, Jnn_2 X M_2, RELAXED_TRUNC sx zero?) = eps |
| def $halfop(Fnn_1 X M_1, Fnn_2 X M_2, DEMOTE zero) = eps |
| def $halfop(Fnn_1 X M_1, Fnn_2 X M_2, PROMOTE LOW) = LOW |
| |
| def $half(half, nat, nat) : nat |
| def $half(LOW, i, j) = i |
| def $half(HIGH, i, j) = j |
| |
| |
| def $iswizzle_lane_(N, iN(N)*, iN(N)) : iN(N) |
| def $irelaxed_swizzle_lane_(N, iN(N)*, iN(N)) : iN(N) |
| |
| def $iswizzle_lane_(N, c*, i) = c*[i] -- if i < |c*| |
| def $iswizzle_lane_(N, c*, i) = 0 -- otherwise |
| |
| def $irelaxed_swizzle_lane_(N, c*, i) = c*[i] -- if i < |c*| |
| def $irelaxed_swizzle_lane_(N, c*, i) = 0 -- if $signed_(N, i) < 0 |
| def $irelaxed_swizzle_lane_(N, c*, i) = $relaxed2($R_swizzle, iN(N), 0, c*[i \ |c*|]) |
| -- otherwise |
| |
| |
| ;; Lanewise operations |
| |
| def $ivunop_(shape, def $f_(N, iN(N)) : iN(N), vec_(V128)) : vec_(V128)* |
| def $fvunop_(shape, def $f_(N, fN(N)) : fN(N)*, vec_(V128)) : vec_(V128)* |
| |
| def $ivbinop_(shape, def $f_(N, iN(N), iN(N)) : iN(N), vec_(V128), vec_(V128)) : vec_(V128)* |
| def $ivbinopsx_(shape, def $f_(N, sx, iN(N), iN(N)) : iN(N), sx, vec_(V128), vec_(V128)) : vec_(V128)* |
| def $ivbinopsxnd_(shape, def $f_(N, sx, iN(N), iN(N)) : iN(N)*, sx, vec_(V128), vec_(V128)) : vec_(V128)* |
| def $fvbinop_(shape, def $f_(N, fN(N), fN(N)) : fN(N)*, vec_(V128), vec_(V128)) : vec_(V128)* |
| |
| def $ivternopnd_(shape, def $f_(N, iN(N), iN(N), iN(N)) : iN(N)*, vec_(V128), vec_(V128), vec_(V128)) : vec_(V128)* |
| def $fvternop_(shape, def $f_(N, fN(N), fN(N), fN(N)) : fN(N)*, vec_(V128), vec_(V128), vec_(V128)) : vec_(V128)* |
| |
| def $ivrelop_(shape, def $f_(N, iN(N), iN(N)) : u32, vec_(V128), vec_(V128)) : vec_(V128) |
| def $ivrelopsx_(shape, def $f_(N, sx, iN(N), iN(N)) : u32, sx, vec_(V128), vec_(V128)) : vec_(V128) |
| def $fvrelop_(shape, def $f_(N, fN(N), fN(N)) : u32, vec_(V128), vec_(V128)) : vec_(V128) |
| |
| def $ivshiftop_(shape, def $f_(N, iN(N), u32) : iN(N), vec_(V128), u32) : vec_(V128) |
| def $ivshiftopsx_(shape, def $f_(N, sx, iN(N), u32) : iN(N), sx, vec_(V128), u32) : vec_(V128) |
| |
| def $ivbitmaskop_(shape, vec_(V128)) : u32 |
| def $ivswizzlop_(shape, def $f_(N, iN(N)*, iN(N)) : iN(N), vec_(V128), vec_(V128)) : vec_(V128) |
| def $ivshufflop_(shape, laneidx*, vec_(V128), vec_(V128)) : vec_(V128) |
| |
| |
| def $ivunop_(Jnn X M, def $f_, v_1) = $inv_lanes_(Jnn X M, c*) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c* = $f_($lsizenn(Jnn), c_1)* |
| |
| def $fvunop_(Fnn X M, def $f_, v_1) = $inv_lanes_(Fnn X M, c*)* |
| -- if c_1* = $lanes_(Fnn X M, v_1) |
| -- if c** = $setproduct_(lane_(Fnn), $f_($sizenn(Fnn), c_1)*) |
| |
| |
| def $ivbinop_(Jnn X M, def $f_, v_1, v_2) = $inv_lanes_(Jnn X M, c*) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c_2* = $lanes_(Jnn X M, v_2) |
| -- if c* = $f_($lsizenn(Jnn), c_1, c_2)* |
| |
| def $ivbinopsx_(Jnn X M, def $f_, sx, v_1, v_2) = $inv_lanes_(Jnn X M, c*) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c_2* = $lanes_(Jnn X M, v_2) |
| -- if c* = $f_($lsizenn(Jnn), sx, c_1, c_2)* |
| |
| def $ivbinopsxnd_(Jnn X M, def $f_, sx, v_1, v_2) = $inv_lanes_(Jnn X M, c*)* |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c_2* = $lanes_(Jnn X M, v_2) |
| -- if c** = $setproduct_(lane_(Jnn), $f_($lsizenn(Jnn), sx, c_1, c_2)*) |
| |
| def $fvbinop_(Fnn X M, def $f_, v_1, v_2) = $inv_lanes_(Fnn X M, c*)* |
| -- if c_1* = $lanes_(Fnn X M, v_1) |
| -- if c_2* = $lanes_(Fnn X M, v_2) |
| -- if c** = $setproduct_(lane_(Fnn), $f_($sizenn(Fnn), c_1, c_2)*) |
| |
| |
| def $ivternopnd_(Jnn X M, def $f_, v_1, v_2, v_3) = $inv_lanes_(Jnn X M, c*)* |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c_2* = $lanes_(Jnn X M, v_2) |
| -- if c_3* = $lanes_(Jnn X M, v_3) |
| -- if c** = $setproduct_(lane_(Jnn), $f_($lsizenn(Jnn), c_1, c_2, c_3)*) |
| |
| def $fvternop_(Fnn X M, def $f_, v_1, v_2, v_3) = $inv_lanes_(Fnn X M, c*)* |
| -- if c_1* = $lanes_(Fnn X M, v_1) |
| -- if c_2* = $lanes_(Fnn X M, v_2) |
| -- if c_3* = $lanes_(Fnn X M, v_3) |
| -- if c** = $setproduct_(lane_(Fnn), $f_($sizenn(Fnn), c_1, c_2, c_3)*) |
| |
| |
| def $ivrelop_(Jnn X M, def $f_, v_1, v_2) = $inv_lanes_(Jnn X M, c*) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c_2* = $lanes_(Jnn X M, v_2) |
| -- if c* = $extend__(1, $lsizenn(Jnn), S, $f_($lsizenn(Jnn), c_1, c_2))* |
| |
| def $ivrelopsx_(Jnn X M, def $f_, sx, v_1, v_2) = $inv_lanes_(Jnn X M, c*) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c_2* = $lanes_(Jnn X M, v_2) |
| -- if c* = $extend__(1, $lsizenn(Jnn), S, $f_($lsizenn(Jnn), sx, c_1, c_2))* |
| |
| def $fvrelop_(Fnn X M, def $f_, v_1, v_2) = $inv_lanes_(Inn X M, c*) |
| -- if c_1* = $lanes_(Fnn X M, v_1) |
| -- if c_2* = $lanes_(Fnn X M, v_2) |
| -- if c* = $extend__(1, $sizenn(Fnn), S, $f_($sizenn(Fnn), c_1, c_2))* |
| -- if $isize(Inn) = $fsize(Fnn) ;; TODO(3, rossberg): make implicit |
| |
| |
| def $ivshiftop_(Jnn X M, def $f_, v_1, i) = $inv_lanes_(Jnn X M, c*) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c* = $f_($lsizenn(Jnn), c_1, i)* |
| |
| def $ivshiftopsx_(Jnn X M, def $f_, sx, v_1, i) = $inv_lanes_(Jnn X M, c*) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c* = $f_($lsizenn(Jnn), sx, c_1, i)* |
| |
| |
| def $ivbitmaskop_(Jnn X M, v_1) = $irev_(32, c) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if $ibits_(32, c) = $ilt_($lsizenn(Jnn), S, c_1, 0)* ++ (0)^(32-M) |
| |
| |
| def $ivswizzlop_(Jnn X M, def $f_, v_1, v_2) = $inv_lanes_(Jnn X M, c*) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c_2* = $lanes_(Jnn X M, v_2) |
| -- if c* = $f_($lsizenn(Jnn), c_1*, c_2)* |
| |
| |
| def $ivshufflop_(Jnn X M, i*, v_1, v_2) = $inv_lanes_(Jnn X M, c*) |
| -- if c_1* = $lanes_(Jnn X M, v_1) |
| -- if c_2* = $lanes_(Jnn X M, v_2) |
| -- if c* = ((c_1* ++ c_2*)[i])* |
| |
| |
| ;; Operator dispatch |
| |
| def $vvunop_(vectype, vvunop, vec_(vectype)) : vec_(vectype)* |
| hint(show %2#$_(%1,%3)) |
| def $vvbinop_(vectype, vvbinop, vec_(vectype), vec_(vectype)) : vec_(vectype)* |
| hint(show %2#$_(%1,%3,%4)) |
| def $vvternop_(vectype, vvternop, vec_(vectype), vec_(vectype), vec_(vectype)) : vec_(vectype)* |
| hint(show %2#$_(%1,%3,%4,%5)) |
| |
| def $vunop_(shape, vunop_(shape), vec_(V128)) : vec_(V128)* |
| hint(show %2#$_(%1,%3)) |
| def $vbinop_(shape, vbinop_(shape), vec_(V128), vec_(V128)) : vec_(V128)* |
| hint(show %2#$_(%1,%3,%4)) |
| def $vternop_(shape, vternop_(shape), vec_(V128), vec_(V128), vec_(V128)) : vec_(V128)* |
| hint(show %2#$_(%1,%3,%4,%5)) |
| def $vrelop_(shape, vrelop_(shape), vec_(V128), vec_(V128)) : vec_(V128) |
| hint(show %2#$_(%1,%3,%4)) |
| |
| def $lcvtop__(shape_1, shape_2, vcvtop__(shape_1, shape_2), lane_($lanetype(shape_1))) : lane_($lanetype(shape_2))* |
| hint(show %3#$__(%1,%2)^(%4)#((%5))) |
| def $vcvtop__(shape_1, shape_2, vcvtop__(shape_1, shape_2), vec_(V128)) : vec_(V128) |
| hint(show %3#$__(%1,%2)^(%4)#((%5))) |
| |
| def $vshiftop_(ishape, vshiftop_(ishape), vec_(V128), u32) : vec_(V128) |
| hint(show %2#$_(%1)#(%3, %4)) |
| def $vbitmaskop_(ishape, vec_(V128)) : u32 |
| hint(show VBITMASK#$_(%1,%2)) |
| def $vswizzlop_(bshape, vswizzlop_(bshape), vec_(V128), vec_(V128)) : vec_(V128) |
| hint(show %2#$_(%1,%3,%4)) |
| def $vshufflop_(bshape, laneidx*, vec_(V128), vec_(V128)) : vec_(V128) |
| hint(show VSHUFFLE#$_(%1,%2,%3,%4)) |
| |
| def $vnarrowop__(shape_1, shape_2, sx, vec_(V128), vec_(V128)) : vec_(V128) |
| hint(show VNARROW#$__(%1,%2)^(%3)#((%4,%5))) |
| |
| def $vextunop__(ishape_1, ishape_2, vextunop__(ishape_1, ishape_2), vec_(V128)) : vec_(V128) |
| hint(show %3#$__(%1,%2,%4)) |
| def $vextbinop__(ishape_1, ishape_2, vextbinop__(ishape_1, ishape_2), vec_(V128), vec_(V128)) : vec_(V128) |
| hint(show %3#$__(%1,%2,%4,%5)) |
| def $vextternop__(ishape_1, ishape_2, vextternop__(ishape_1, ishape_2), vec_(V128), vec_(V128), vec_(V128)) : vec_(V128) |
| hint(show %3#$__(%1,%2,%4,%5,%6)) |
| |
| |
| def $vvunop_(Vnn, NOT, v) = $inot_($vsizenn(Vnn), v) |
| |
| def $vvbinop_(Vnn, AND, v_1, v_2) = $iand_($vsizenn(Vnn), v_1, v_2) |
| def $vvbinop_(Vnn, ANDNOT, v_1, v_2) = $iandnot_($vsizenn(Vnn), v_1, v_2) |
| def $vvbinop_(Vnn, OR, v_1, v_2) = $ior_($vsizenn(Vnn), v_1, v_2) |
| def $vvbinop_(Vnn, XOR, v_1, v_2) = $ixor_($vsizenn(Vnn), v_1, v_2) |
| |
| def $vvternop_(Vnn, BITSELECT, v_1, v_2, v_3) = $ibitselect_($vsizenn(Vnn), v_1, v_2, v_3) |
| |
| def $vunop_(Fnn X M, ABS, v) = $fvunop_(Fnn X M, $fabs_, v) |
| def $vunop_(Fnn X M, NEG, v) = $fvunop_(Fnn X M, $fneg_, v) |
| def $vunop_(Fnn X M, SQRT, v) = $fvunop_(Fnn X M, $fsqrt_, v) |
| def $vunop_(Fnn X M, CEIL, v) = $fvunop_(Fnn X M, $fceil_, v) |
| def $vunop_(Fnn X M, FLOOR, v) = $fvunop_(Fnn X M, $ffloor_, v) |
| def $vunop_(Fnn X M, TRUNC, v) = $fvunop_(Fnn X M, $ftrunc_, v) |
| def $vunop_(Fnn X M, NEAREST, v) = $fvunop_(Fnn X M, $fnearest_, v) |
| |
| def $vunop_(Jnn X M, ABS, v) = $ivunop_(Jnn X M, $iabs_, v) |
| def $vunop_(Jnn X M, NEG, v) = $ivunop_(Jnn X M, $ineg_, v) |
| def $vunop_(Jnn X M, POPCNT, v) = $ivunop_(Jnn X M, $ipopcnt_, v) |
| |
| def $vbinop_(Jnn X M, ADD, v_1, v_2) = $ivbinop_(Jnn X M, $iadd_, v_1, v_2) |
| def $vbinop_(Jnn X M, SUB, v_1, v_2) = $ivbinop_(Jnn X M, $isub_, v_1, v_2) |
| def $vbinop_(Jnn X M, MUL, v_1, v_2) = $ivbinop_(Jnn X M, $imul_, v_1, v_2) |
| def $vbinop_(Jnn X M, ADD_SAT sx, v_1, v_2) = $ivbinopsx_(Jnn X M, $iadd_sat_, sx, v_1, v_2) |
| def $vbinop_(Jnn X M, SUB_SAT sx, v_1, v_2) = $ivbinopsx_(Jnn X M, $isub_sat_, sx, v_1, v_2) |
| def $vbinop_(Jnn X M, MIN sx, v_1, v_2) = $ivbinopsx_(Jnn X M, $imin_, sx, v_1, v_2) |
| def $vbinop_(Jnn X M, MAX sx, v_1, v_2) = $ivbinopsx_(Jnn X M, $imax_, sx, v_1, v_2) |
| def $vbinop_(Jnn X M, AVGR U, v_1, v_2) = $ivbinopsx_(Jnn X M, $iavgr_, U, v_1, v_2) |
| def $vbinop_(Jnn X M, Q15MULR_SAT S, v_1, v_2) = $ivbinopsx_(Jnn X M, $iq15mulr_sat_, S, v_1, v_2) |
| def $vbinop_(Jnn X M, RELAXED_Q15MULR S, v_1, v_2) = $ivbinopsxnd_(Jnn X M, $irelaxed_q15mulr_, S, v_1, v_2) |
| |
| def $vbinop_(Fnn X M, ADD, v_1, v_2) = $fvbinop_(Fnn X M, $fadd_, v_1, v_2) |
| def $vbinop_(Fnn X M, SUB, v_1, v_2) = $fvbinop_(Fnn X M, $fsub_, v_1, v_2) |
| def $vbinop_(Fnn X M, MUL, v_1, v_2) = $fvbinop_(Fnn X M, $fmul_, v_1, v_2) |
| def $vbinop_(Fnn X M, DIV, v_1, v_2) = $fvbinop_(Fnn X M, $fdiv_, v_1, v_2) |
| def $vbinop_(Fnn X M, MIN, v_1, v_2) = $fvbinop_(Fnn X M, $fmin_, v_1, v_2) |
| def $vbinop_(Fnn X M, MAX, v_1, v_2) = $fvbinop_(Fnn X M, $fmax_, v_1, v_2) |
| def $vbinop_(Fnn X M, PMIN, v_1, v_2) = $fvbinop_(Fnn X M, $fpmin_, v_1, v_2) |
| def $vbinop_(Fnn X M, PMAX, v_1, v_2) = $fvbinop_(Fnn X M, $fpmax_, v_1, v_2) |
| def $vbinop_(Fnn X M, RELAXED_MIN, v_1, v_2) = $fvbinop_(Fnn X M, $frelaxed_min_, v_1, v_2) |
| def $vbinop_(Fnn X M, RELAXED_MAX, v_1, v_2) = $fvbinop_(Fnn X M, $frelaxed_max_, v_1, v_2) |
| |
| def $vternop_(Jnn X M, RELAXED_LANESELECT, v_1, v_2, v_3) = $ivternopnd_(Jnn X M, $irelaxed_laneselect_, v_1, v_2, v_3) |
| |
| def $vternop_(Fnn X M, RELAXED_MADD, v_1, v_2, v_3) = $fvternop_(Fnn X M, $frelaxed_madd_, v_1, v_2, v_3) |
| def $vternop_(Fnn X M, RELAXED_NMADD, v_1, v_2, v_3) = $fvternop_(Fnn X M, $frelaxed_nmadd_, v_1, v_2, v_3) |
| |
| def $vrelop_(Jnn X M, EQ, v_1, v_2) = $ivrelop_(Jnn X M, $ieq_, v_1, v_2) |
| def $vrelop_(Jnn X M, NE, v_1, v_2) = $ivrelop_(Jnn X M, $ine_, v_1, v_2) |
| def $vrelop_(Jnn X M, LT sx, v_1, v_2) = $ivrelopsx_(Jnn X M, $ilt_, sx, v_1, v_2) |
| def $vrelop_(Jnn X M, GT sx, v_1, v_2) = $ivrelopsx_(Jnn X M, $igt_, sx, v_1, v_2) |
| def $vrelop_(Jnn X M, LE sx, v_1, v_2) = $ivrelopsx_(Jnn X M, $ile_, sx, v_1, v_2) |
| def $vrelop_(Jnn X M, GE sx, v_1, v_2) = $ivrelopsx_(Jnn X M, $ige_, sx, v_1, v_2) |
| |
| def $vrelop_(Fnn X M, EQ, v_1, v_2) = $fvrelop_(Fnn X M, $feq_, v_1, v_2) |
| def $vrelop_(Fnn X M, NE, v_1, v_2) = $fvrelop_(Fnn X M, $fne_, v_1, v_2) |
| def $vrelop_(Fnn X M, LT, v_1, v_2) = $fvrelop_(Fnn X M, $flt_, v_1, v_2) |
| def $vrelop_(Fnn X M, GT, v_1, v_2) = $fvrelop_(Fnn X M, $fgt_, v_1, v_2) |
| def $vrelop_(Fnn X M, LE, v_1, v_2) = $fvrelop_(Fnn X M, $fle_, v_1, v_2) |
| def $vrelop_(Fnn X M, GE, v_1, v_2) = $fvrelop_(Fnn X M, $fge_, v_1, v_2) |
| |
| def $vshiftop_(Jnn X M, SHL, v, i) = $ivshiftop_(Jnn X M, $ishl_, v, i) |
| def $vshiftop_(Jnn X M, SHR sx, v, i) = $ivshiftopsx_(Jnn X M, $ishr_, sx, v, i) |
| |
| def $vbitmaskop_(Jnn X M, v) = $ivbitmaskop_(Jnn X M, v) |
| |
| def $vswizzlop_(I8 X M, SWIZZLE, v_1, v_2) = $ivswizzlop_(I8 X M, $iswizzle_lane_, v_1, v_2) |
| def $vswizzlop_(I8 X M, RELAXED_SWIZZLE, v_1, v_2) = $ivswizzlop_(I8 X M, $irelaxed_swizzle_lane_, v_1, v_2) |
| |
| def $vshufflop_(I8 X M, i*, v_1, v_2) = $ivshufflop_(I8 X M, i*, v_1, v_2) |
| |
| def $lcvtop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTEND half sx , c_1) = c |
| -- if c = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, c_1) |
| def $lcvtop__(Jnn_1 X M_1, Fnn_2 X M_2, CONVERT half? sx, c_1) = c |
| -- if c = $convert__($lsizenn1(Jnn_1), $lsizenn2(Fnn_2), sx, c_1) |
| def $lcvtop__(Fnn_1 X M_1, Inn_2 X M_2, TRUNC_SAT sx zero?, c_1) = c? |
| -- if c? = $trunc_sat__($lsizenn1(Fnn_1), $lsizenn2(Inn_2), sx, c_1) |
| def $lcvtop__(Fnn_1 X M_1, Inn_2 X M_2, RELAXED_TRUNC sx zero?, c_1) = c? |
| -- if c? = $relaxed_trunc__($lsizenn1(Fnn_1), $lsizenn2(Inn_2), sx, c_1) |
| def $lcvtop__(Fnn_1 X M_1, Fnn_2 X M_2, DEMOTE ZERO, c_1) = c* |
| -- if c* = $demote__($lsizenn1(Fnn_1), $lsizenn2(Fnn_2), c_1) |
| def $lcvtop__(Fnn_1 X M_1, Fnn_2 X M_2, PROMOTE LOW, c_1) = c* |
| -- if c* = $promote__($lsizenn1(Fnn_1), $lsizenn2(Fnn_2), c_1) |
| |
| def $vcvtop__(Lnn_1 X M, Lnn_2 X M, vcvtop, v_1) = v |
| -- if $halfop(Lnn_1 X M, Lnn_2 X M, vcvtop) = eps /\ $zeroop(Lnn_1 X M, Lnn_2 X M, vcvtop) = eps |
| -- if c_1* = $lanes_(Lnn_1 X M, v_1) |
| -- if c** = $setproduct_(lane_(Lnn_2), $lcvtop__(Lnn_1 X M, Lnn_2 X M, vcvtop, c_1)*) |
| -- if v <- $inv_lanes_(Lnn_2 X M, c*)* |
| def $vcvtop__(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop, v_1) = v |
| -- if $halfop(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop) = half |
| -- if c_1* = $lanes_(Lnn_1 X M_1, v_1)[$half(half, 0, M_2) : M_2] |
| -- if c** = $setproduct_(lane_(Lnn_2), $lcvtop__(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop, c_1)*) |
| -- if v <- $inv_lanes_(Lnn_2 X M_2, c*)* |
| def $vcvtop__(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop, v_1) = v |
| -- if $zeroop(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop) = ZERO |
| -- if c_1* = $lanes_(Lnn_1 X M_1, v_1) |
| -- if c** = $setproduct_(lane_(Lnn_2), ($lcvtop__(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop, c_1)* ++ [$zero(Lnn_2)]^M_1)) |
| -- if v <- $inv_lanes_(Lnn_2 X M_2, c*)* |
| |
| def $vnarrowop__(Jnn_1 X M_1, Jnn_2 X M_2, sx, v_1, v_2) = v |
| -- if c_1* = $lanes_(Jnn_1 X M_1, v_1) |
| -- if c_2* = $lanes_(Jnn_1 X M_1, v_2) |
| -- if c'_1* = $narrow__($lsize(Jnn_1), $lsize(Jnn_2), sx, c_1)* |
| -- if c'_2* = $narrow__($lsize(Jnn_1), $lsize(Jnn_2), sx, c_2)* |
| -- if v = $inv_lanes_(Jnn_2 X M_2, c'_1* ++ c'_2*) |
| |
| (; |
| ;; TODO(2, rossberg): this is obsolete, clean up |
| def $vextunop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTADD_PAIRWISE sx, c_1) = c |
| -- if ci* = $lanes_(Jnn_1 X M_1, c_1) |
| -- if $concat_(iN($lsizenn2(Jnn_2)), (cj_1 cj_2)*) = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, ci)* |
| -- if c = $inv_lanes_(Jnn_2 X M_2, $iadd_($lsizenn2(Jnn_2), cj_1, cj_2)*) |
| |
| def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTMUL half sx, c_1, c_2) = c |
| -- if ci_1* = $lanes_(Jnn_1 X M_1, c_1)[$half(half, 0, M_2) : M_2] |
| -- if ci_2* = $lanes_(Jnn_1 X M_1, c_2)[$half(half, 0, M_2) : M_2] |
| -- if c = $inv_lanes_(Jnn_2 X M_2, $imul_($lsizenn2(Jnn_2), $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, ci_1), $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, ci_2))*) |
| def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, DOT S, c_1, c_2) = c |
| -- if ci_1* = $lanes_(Jnn_1 X M_1, c_1) |
| -- if ci_2* = $lanes_(Jnn_1 X M_1, c_2) |
| -- if ci'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), S, ci_1)* |
| -- if ci'_2* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), S, ci_2)* |
| -- if $concat_(iN($lsizenn2(Jnn_2)), (cj_1 cj_2)*) = $imul_($lsizenn2(Jnn_2), ci'_1, ci'_2)* |
| -- if c = $inv_lanes_(Jnn_2 X M_2, $iadd_($lsizenn2(Jnn_2), cj_1, cj_2)*) |
| def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, RELAXED_DOT S, c_1, c_2) = c |
| -- if ci_1* = $lanes_(Jnn_1 X M_1, c_1) |
| -- if ci_2* = $lanes_(Jnn_1 X M_1, c_2) |
| -- if ci'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), S, ci_1)* |
| -- if ci'_2* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), $relaxed2($R_idot, sx, S, U), ci_2)* |
| -- if $concat_(iN($lsizenn2(Jnn_2)), (cj_1 cj_2)*) = $imul_($lsizenn2(Jnn_2), ci'_1, ci'_2)* |
| -- if c = $inv_lanes_(Jnn_2 X M_2, $iadd_sat_($lsizenn2(Jnn_2), S, cj_1, cj_2)*) |
| ;) |
| |
| ;; TODO(2, rossberg): move to lanewise section |
| def $ivextunop__(shape_1, shape_2, def $f_(N, iN(N)*) : iN(N)*, sx, vec_(V128)) : vec_(V128) |
| def $ivextunop__(Jnn_1 X M_1, Jnn_2 X M_2, def $f_, sx, v_1) = $inv_lanes_(Jnn_2 X M_2, c*) |
| -- if c_1* = $lanes_(Jnn_1 X M_1, v_1) |
| -- if c'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, c_1)* |
| -- if c* = $f_($lsizenn2(Jnn_2), c'_1*) |
| |
| def $ivextbinop__(shape_1, shape_2, def $f_(N, iN(N)*, iN(N)*) : iN(N)*, sx, sx, laneidx, laneidx, vec_(V128), vec_(V128)) : vec_(V128) |
| def $ivextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, def $f_, sx_1, sx_2, i, k, v_1, v_2) = $inv_lanes_(Jnn_2 X M_2, c*) |
| -- if c_1* = $lanes_(Jnn_1 X M_1, v_1)[i : k] |
| -- if c_2* = $lanes_(Jnn_1 X M_1, v_2)[i : k] |
| -- if c'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx_1, c_1)* |
| -- if c'_2* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx_2, c_2)* |
| -- if c* = $f_($lsizenn2(Jnn_2), c'_1*, c'_2*) |
| |
| def $ivadd_pairwise_(N, iN(N)*) : iN(N)* hint(show $ivadd__pairwise_(%,%)) |
| def $ivadd_pairwise_(N, i*) = $iadd_(N, j_1, j_2)* |
| -- if $concat_(N, (j_1 j_2)*) = i* |
| |
| def $ivmul_(N, iN(N)*, iN(N)*) : iN(N)* |
| def $ivmul_(N, i_1*, i_2*) = $imul_(N, i_1, i_2)* |
| |
| def $ivdot_(N, iN(N)*, iN(N)*) : iN(N)* |
| def $ivdot_(N, i_1*, i_2*) = $iadd_(N, j_1, j_2)* |
| -- if $concat_(iN(N), (j_1 j_2)*) = $imul_(N, i_1, i_2)* |
| |
| def $ivdot_sat_(N, iN(N)*, iN(N)*) : iN(N)* hint(show $ivdot__sat_(%,%,%)) |
| def $ivdot_sat_(N, i_1*, i_2*) = $iadd_sat_(N, S, j_1, j_2)* |
| -- if $concat_(iN(N), (j_1 j_2)*) = $imul_(N, i_1, i_2)* |
| |
| def $vextunop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTADD_PAIRWISE sx, v_1) = |
| $ivextunop__(Jnn_1 X M_1, Jnn_2 X M_2, $ivadd_pairwise_, sx, v_1) |
| |
| def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTMUL half sx, v_1, v_2) = |
| $ivextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, $ivmul_, sx, sx, $half(half, 0, M_2), M_2, v_1, v_2) |
| def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, DOT S, v_1, v_2) = |
| $ivextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, $ivdot_, S, S, 0, M_1, v_1, v_2) |
| def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, RELAXED_DOT S, v_1, v_2) = |
| $ivextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, $ivdot_sat_, S, $relaxed2($R_idot, sx, S, U), 0, M_1, v_1, v_2) |
| |
| (; |
| ;; TODO(2, rossberg): this is obsolete, clean up |
| def $vextternop__(Jnn_1 X M_1, Jnn_2 X M_2, RELAXED_DOT_ADD S, c_1, c_2, c_3) = c |
| -- if $jsizenn(Jnn) = $(2*$lsizenn1(Jnn_1)) |
| -- if ci_1* = $lanes_(Jnn_1 X M_1, c_1) |
| -- if ci_2* = $lanes_(Jnn_1 X M_1, c_2) |
| -- if ci_3* = $lanes_(Jnn_2 X M_2, c_3) |
| -- if ci'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn(Jnn), S, ci_1)* |
| -- if ci'_2* = $extend__($lsizenn1(Jnn_1), $lsizenn(Jnn), $relaxed2($R_idot, sx, S, U), ci_2)* |
| -- if $concat_(iN($lsizenn(Jnn)), (cj_1 cj_2)*) = $imul_($lsizenn(Jnn), ci'_1, ci'_2)* |
| -- if $concat_(iN($lsizenn(Jnn)), (ck_1 ck_2)*) = $iadd_sat_($lsizenn(Jnn), S, cj_1, cj_2)* |
| -- if ck'_1* = $extend__($lsizenn(Jnn), $lsizenn2(Jnn_2), S, ck_1)* |
| -- if ck'_2* = $extend__($lsizenn(Jnn), $lsizenn2(Jnn_2), S, ck_2)* |
| -- if ck* = $iadd_($lsizenn2(Jnn_2), ck'_1, ck'_2)* |
| -- if c = $inv_lanes_(Jnn_2 X M_2, $iadd_($lsizenn2(Jnn_2), ck, ci_3)*) |
| ;) |
| |
| def $vextternop__(Jnn_1 X M_1, Jnn_2 X M_2, RELAXED_DOT_ADD S, c_1, c_2, c_3) = c |
| -- if $jsizenn(Jnn) = $(2*$lsizenn1(Jnn_1)) |
| -- if M = $(2*M_2) |
| -- if c' = $vextbinop__(Jnn_1 X M_1, Jnn X M, RELAXED_DOT S, c_1, c_2) |
| -- if c'' = $vextunop__(Jnn X M, Jnn_2 X M_2, EXTADD_PAIRWISE S, c') |
| -- if c <- $vbinop_(Jnn_2 X M_2, ADD, c'', c_3) |