blob: c88b9dadfb4977113a7c931cf6cd5ff5b93b5b4c [file] [log] [blame] [edit]
;;
;; Vector Primitevs
;;
;; TODO(4, rossberg): avoid hardcoding v128
;; Lanes
def $lanes_(shape, vec_(V128)) : lane_($lanetype(shape))* hint(inverse $inv_lanes_)
def $inv_lanes_(shape, lane_($lanetype(shape))*) : vec_(V128)
hint(show $lanes_(%)^(-1)#((%)))
;; TODO(3, rossberg): implement numerics internally
def $lanes_ hint(builtin)
def $inv_lanes_ hint(builtin)
def $zeroop(shape_1, shape_2, vcvtop__(shape_1, shape_2)) : zero?
def $zeroop(Jnn_1 X M_1, Jnn_2 X M_2, EXTEND half sx) = eps
def $zeroop(Jnn_1 X M_1, Fnn_2 X M_2, CONVERT half? sx) = eps
def $zeroop(Fnn_1 X M_1, Jnn_2 X M_2, TRUNC_SAT sx zero?) = zero?
def $zeroop(Fnn_1 X M_1, Jnn_2 X M_2, RELAXED_TRUNC sx zero?) = zero?
def $zeroop(Fnn_1 X M_1, Fnn_2 X M_2, DEMOTE zero) = zero
def $zeroop(Fnn_1 X M_1, Fnn_2 X M_2, PROMOTE LOW) = eps
def $halfop(shape_1, shape_2, vcvtop__(shape_1, shape_2)) : half?
def $halfop(Jnn_1 X M_1, Jnn_2 X M_2, EXTEND half sx) = half
def $halfop(Jnn_1 X M_1, Fnn_2 X M_2, CONVERT half? sx) = half?
def $halfop(Fnn_1 X M_1, Jnn_2 X M_2, TRUNC_SAT sx zero?) = eps
def $halfop(Fnn_1 X M_1, Jnn_2 X M_2, RELAXED_TRUNC sx zero?) = eps
def $halfop(Fnn_1 X M_1, Fnn_2 X M_2, DEMOTE zero) = eps
def $halfop(Fnn_1 X M_1, Fnn_2 X M_2, PROMOTE LOW) = LOW
def $half(half, nat, nat) : nat
def $half(LOW, i, j) = i
def $half(HIGH, i, j) = j
def $iswizzle_lane_(N, iN(N)*, iN(N)) : iN(N)
def $irelaxed_swizzle_lane_(N, iN(N)*, iN(N)) : iN(N)
def $iswizzle_lane_(N, c*, i) = c*[i] -- if i < |c*|
def $iswizzle_lane_(N, c*, i) = 0 -- otherwise
def $irelaxed_swizzle_lane_(N, c*, i) = c*[i] -- if i < |c*|
def $irelaxed_swizzle_lane_(N, c*, i) = 0 -- if $signed_(N, i) < 0
def $irelaxed_swizzle_lane_(N, c*, i) = $relaxed2($R_swizzle, iN(N), 0, c*[i \ |c*|])
-- otherwise
;; Lanewise operations
def $ivunop_(shape, def $f_(N, iN(N)) : iN(N), vec_(V128)) : vec_(V128)*
def $fvunop_(shape, def $f_(N, fN(N)) : fN(N)*, vec_(V128)) : vec_(V128)*
def $ivbinop_(shape, def $f_(N, iN(N), iN(N)) : iN(N), vec_(V128), vec_(V128)) : vec_(V128)*
def $ivbinopsx_(shape, def $f_(N, sx, iN(N), iN(N)) : iN(N), sx, vec_(V128), vec_(V128)) : vec_(V128)*
def $ivbinopsxnd_(shape, def $f_(N, sx, iN(N), iN(N)) : iN(N)*, sx, vec_(V128), vec_(V128)) : vec_(V128)*
def $fvbinop_(shape, def $f_(N, fN(N), fN(N)) : fN(N)*, vec_(V128), vec_(V128)) : vec_(V128)*
def $ivternopnd_(shape, def $f_(N, iN(N), iN(N), iN(N)) : iN(N)*, vec_(V128), vec_(V128), vec_(V128)) : vec_(V128)*
def $fvternop_(shape, def $f_(N, fN(N), fN(N), fN(N)) : fN(N)*, vec_(V128), vec_(V128), vec_(V128)) : vec_(V128)*
def $ivrelop_(shape, def $f_(N, iN(N), iN(N)) : u32, vec_(V128), vec_(V128)) : vec_(V128)
def $ivrelopsx_(shape, def $f_(N, sx, iN(N), iN(N)) : u32, sx, vec_(V128), vec_(V128)) : vec_(V128)
def $fvrelop_(shape, def $f_(N, fN(N), fN(N)) : u32, vec_(V128), vec_(V128)) : vec_(V128)
def $ivshiftop_(shape, def $f_(N, iN(N), u32) : iN(N), vec_(V128), u32) : vec_(V128)
def $ivshiftopsx_(shape, def $f_(N, sx, iN(N), u32) : iN(N), sx, vec_(V128), u32) : vec_(V128)
def $ivbitmaskop_(shape, vec_(V128)) : u32
def $ivswizzlop_(shape, def $f_(N, iN(N)*, iN(N)) : iN(N), vec_(V128), vec_(V128)) : vec_(V128)
def $ivshufflop_(shape, laneidx*, vec_(V128), vec_(V128)) : vec_(V128)
def $ivunop_(Jnn X M, def $f_, v_1) = $inv_lanes_(Jnn X M, c*)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c* = $f_($lsizenn(Jnn), c_1)*
def $fvunop_(Fnn X M, def $f_, v_1) = $inv_lanes_(Fnn X M, c*)*
-- if c_1* = $lanes_(Fnn X M, v_1)
-- if c** = $setproduct_(lane_(Fnn), $f_($sizenn(Fnn), c_1)*)
def $ivbinop_(Jnn X M, def $f_, v_1, v_2) = $inv_lanes_(Jnn X M, c*)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c_2* = $lanes_(Jnn X M, v_2)
-- if c* = $f_($lsizenn(Jnn), c_1, c_2)*
def $ivbinopsx_(Jnn X M, def $f_, sx, v_1, v_2) = $inv_lanes_(Jnn X M, c*)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c_2* = $lanes_(Jnn X M, v_2)
-- if c* = $f_($lsizenn(Jnn), sx, c_1, c_2)*
def $ivbinopsxnd_(Jnn X M, def $f_, sx, v_1, v_2) = $inv_lanes_(Jnn X M, c*)*
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c_2* = $lanes_(Jnn X M, v_2)
-- if c** = $setproduct_(lane_(Jnn), $f_($lsizenn(Jnn), sx, c_1, c_2)*)
def $fvbinop_(Fnn X M, def $f_, v_1, v_2) = $inv_lanes_(Fnn X M, c*)*
-- if c_1* = $lanes_(Fnn X M, v_1)
-- if c_2* = $lanes_(Fnn X M, v_2)
-- if c** = $setproduct_(lane_(Fnn), $f_($sizenn(Fnn), c_1, c_2)*)
def $ivternopnd_(Jnn X M, def $f_, v_1, v_2, v_3) = $inv_lanes_(Jnn X M, c*)*
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c_2* = $lanes_(Jnn X M, v_2)
-- if c_3* = $lanes_(Jnn X M, v_3)
-- if c** = $setproduct_(lane_(Jnn), $f_($lsizenn(Jnn), c_1, c_2, c_3)*)
def $fvternop_(Fnn X M, def $f_, v_1, v_2, v_3) = $inv_lanes_(Fnn X M, c*)*
-- if c_1* = $lanes_(Fnn X M, v_1)
-- if c_2* = $lanes_(Fnn X M, v_2)
-- if c_3* = $lanes_(Fnn X M, v_3)
-- if c** = $setproduct_(lane_(Fnn), $f_($sizenn(Fnn), c_1, c_2, c_3)*)
def $ivrelop_(Jnn X M, def $f_, v_1, v_2) = $inv_lanes_(Jnn X M, c*)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c_2* = $lanes_(Jnn X M, v_2)
-- if c* = $extend__(1, $lsizenn(Jnn), S, $f_($lsizenn(Jnn), c_1, c_2))*
def $ivrelopsx_(Jnn X M, def $f_, sx, v_1, v_2) = $inv_lanes_(Jnn X M, c*)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c_2* = $lanes_(Jnn X M, v_2)
-- if c* = $extend__(1, $lsizenn(Jnn), S, $f_($lsizenn(Jnn), sx, c_1, c_2))*
def $fvrelop_(Fnn X M, def $f_, v_1, v_2) = $inv_lanes_(Inn X M, c*)
-- if c_1* = $lanes_(Fnn X M, v_1)
-- if c_2* = $lanes_(Fnn X M, v_2)
-- if c* = $extend__(1, $sizenn(Fnn), S, $f_($sizenn(Fnn), c_1, c_2))*
-- if $isize(Inn) = $fsize(Fnn) ;; TODO(3, rossberg): make implicit
def $ivshiftop_(Jnn X M, def $f_, v_1, i) = $inv_lanes_(Jnn X M, c*)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c* = $f_($lsizenn(Jnn), c_1, i)*
def $ivshiftopsx_(Jnn X M, def $f_, sx, v_1, i) = $inv_lanes_(Jnn X M, c*)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c* = $f_($lsizenn(Jnn), sx, c_1, i)*
def $ivbitmaskop_(Jnn X M, v_1) = $irev_(32, c)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if $ibits_(32, c) = $ilt_($lsizenn(Jnn), S, c_1, 0)* ++ (0)^(32-M)
def $ivswizzlop_(Jnn X M, def $f_, v_1, v_2) = $inv_lanes_(Jnn X M, c*)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c_2* = $lanes_(Jnn X M, v_2)
-- if c* = $f_($lsizenn(Jnn), c_1*, c_2)*
def $ivshufflop_(Jnn X M, i*, v_1, v_2) = $inv_lanes_(Jnn X M, c*)
-- if c_1* = $lanes_(Jnn X M, v_1)
-- if c_2* = $lanes_(Jnn X M, v_2)
-- if c* = ((c_1* ++ c_2*)[i])*
;; Operator dispatch
def $vvunop_(vectype, vvunop, vec_(vectype)) : vec_(vectype)*
hint(show %2#$_(%1,%3))
def $vvbinop_(vectype, vvbinop, vec_(vectype), vec_(vectype)) : vec_(vectype)*
hint(show %2#$_(%1,%3,%4))
def $vvternop_(vectype, vvternop, vec_(vectype), vec_(vectype), vec_(vectype)) : vec_(vectype)*
hint(show %2#$_(%1,%3,%4,%5))
def $vunop_(shape, vunop_(shape), vec_(V128)) : vec_(V128)*
hint(show %2#$_(%1,%3))
def $vbinop_(shape, vbinop_(shape), vec_(V128), vec_(V128)) : vec_(V128)*
hint(show %2#$_(%1,%3,%4))
def $vternop_(shape, vternop_(shape), vec_(V128), vec_(V128), vec_(V128)) : vec_(V128)*
hint(show %2#$_(%1,%3,%4,%5))
def $vrelop_(shape, vrelop_(shape), vec_(V128), vec_(V128)) : vec_(V128)
hint(show %2#$_(%1,%3,%4))
def $lcvtop__(shape_1, shape_2, vcvtop__(shape_1, shape_2), lane_($lanetype(shape_1))) : lane_($lanetype(shape_2))*
hint(show %3#$__(%1,%2)^(%4)#((%5)))
def $vcvtop__(shape_1, shape_2, vcvtop__(shape_1, shape_2), vec_(V128)) : vec_(V128)
hint(show %3#$__(%1,%2)^(%4)#((%5)))
def $vshiftop_(ishape, vshiftop_(ishape), vec_(V128), u32) : vec_(V128)
hint(show %2#$_(%1)#(%3, %4))
def $vbitmaskop_(ishape, vec_(V128)) : u32
hint(show VBITMASK#$_(%1,%2))
def $vswizzlop_(bshape, vswizzlop_(bshape), vec_(V128), vec_(V128)) : vec_(V128)
hint(show %2#$_(%1,%3,%4))
def $vshufflop_(bshape, laneidx*, vec_(V128), vec_(V128)) : vec_(V128)
hint(show VSHUFFLE#$_(%1,%2,%3,%4))
def $vnarrowop__(shape_1, shape_2, sx, vec_(V128), vec_(V128)) : vec_(V128)
hint(show VNARROW#$__(%1,%2)^(%3)#((%4,%5)))
def $vextunop__(ishape_1, ishape_2, vextunop__(ishape_1, ishape_2), vec_(V128)) : vec_(V128)
hint(show %3#$__(%1,%2,%4))
def $vextbinop__(ishape_1, ishape_2, vextbinop__(ishape_1, ishape_2), vec_(V128), vec_(V128)) : vec_(V128)
hint(show %3#$__(%1,%2,%4,%5))
def $vextternop__(ishape_1, ishape_2, vextternop__(ishape_1, ishape_2), vec_(V128), vec_(V128), vec_(V128)) : vec_(V128)
hint(show %3#$__(%1,%2,%4,%5,%6))
def $vvunop_(Vnn, NOT, v) = $inot_($vsizenn(Vnn), v)
def $vvbinop_(Vnn, AND, v_1, v_2) = $iand_($vsizenn(Vnn), v_1, v_2)
def $vvbinop_(Vnn, ANDNOT, v_1, v_2) = $iandnot_($vsizenn(Vnn), v_1, v_2)
def $vvbinop_(Vnn, OR, v_1, v_2) = $ior_($vsizenn(Vnn), v_1, v_2)
def $vvbinop_(Vnn, XOR, v_1, v_2) = $ixor_($vsizenn(Vnn), v_1, v_2)
def $vvternop_(Vnn, BITSELECT, v_1, v_2, v_3) = $ibitselect_($vsizenn(Vnn), v_1, v_2, v_3)
def $vunop_(Fnn X M, ABS, v) = $fvunop_(Fnn X M, $fabs_, v)
def $vunop_(Fnn X M, NEG, v) = $fvunop_(Fnn X M, $fneg_, v)
def $vunop_(Fnn X M, SQRT, v) = $fvunop_(Fnn X M, $fsqrt_, v)
def $vunop_(Fnn X M, CEIL, v) = $fvunop_(Fnn X M, $fceil_, v)
def $vunop_(Fnn X M, FLOOR, v) = $fvunop_(Fnn X M, $ffloor_, v)
def $vunop_(Fnn X M, TRUNC, v) = $fvunop_(Fnn X M, $ftrunc_, v)
def $vunop_(Fnn X M, NEAREST, v) = $fvunop_(Fnn X M, $fnearest_, v)
def $vunop_(Jnn X M, ABS, v) = $ivunop_(Jnn X M, $iabs_, v)
def $vunop_(Jnn X M, NEG, v) = $ivunop_(Jnn X M, $ineg_, v)
def $vunop_(Jnn X M, POPCNT, v) = $ivunop_(Jnn X M, $ipopcnt_, v)
def $vbinop_(Jnn X M, ADD, v_1, v_2) = $ivbinop_(Jnn X M, $iadd_, v_1, v_2)
def $vbinop_(Jnn X M, SUB, v_1, v_2) = $ivbinop_(Jnn X M, $isub_, v_1, v_2)
def $vbinop_(Jnn X M, MUL, v_1, v_2) = $ivbinop_(Jnn X M, $imul_, v_1, v_2)
def $vbinop_(Jnn X M, ADD_SAT sx, v_1, v_2) = $ivbinopsx_(Jnn X M, $iadd_sat_, sx, v_1, v_2)
def $vbinop_(Jnn X M, SUB_SAT sx, v_1, v_2) = $ivbinopsx_(Jnn X M, $isub_sat_, sx, v_1, v_2)
def $vbinop_(Jnn X M, MIN sx, v_1, v_2) = $ivbinopsx_(Jnn X M, $imin_, sx, v_1, v_2)
def $vbinop_(Jnn X M, MAX sx, v_1, v_2) = $ivbinopsx_(Jnn X M, $imax_, sx, v_1, v_2)
def $vbinop_(Jnn X M, AVGR U, v_1, v_2) = $ivbinopsx_(Jnn X M, $iavgr_, U, v_1, v_2)
def $vbinop_(Jnn X M, Q15MULR_SAT S, v_1, v_2) = $ivbinopsx_(Jnn X M, $iq15mulr_sat_, S, v_1, v_2)
def $vbinop_(Jnn X M, RELAXED_Q15MULR S, v_1, v_2) = $ivbinopsxnd_(Jnn X M, $irelaxed_q15mulr_, S, v_1, v_2)
def $vbinop_(Fnn X M, ADD, v_1, v_2) = $fvbinop_(Fnn X M, $fadd_, v_1, v_2)
def $vbinop_(Fnn X M, SUB, v_1, v_2) = $fvbinop_(Fnn X M, $fsub_, v_1, v_2)
def $vbinop_(Fnn X M, MUL, v_1, v_2) = $fvbinop_(Fnn X M, $fmul_, v_1, v_2)
def $vbinop_(Fnn X M, DIV, v_1, v_2) = $fvbinop_(Fnn X M, $fdiv_, v_1, v_2)
def $vbinop_(Fnn X M, MIN, v_1, v_2) = $fvbinop_(Fnn X M, $fmin_, v_1, v_2)
def $vbinop_(Fnn X M, MAX, v_1, v_2) = $fvbinop_(Fnn X M, $fmax_, v_1, v_2)
def $vbinop_(Fnn X M, PMIN, v_1, v_2) = $fvbinop_(Fnn X M, $fpmin_, v_1, v_2)
def $vbinop_(Fnn X M, PMAX, v_1, v_2) = $fvbinop_(Fnn X M, $fpmax_, v_1, v_2)
def $vbinop_(Fnn X M, RELAXED_MIN, v_1, v_2) = $fvbinop_(Fnn X M, $frelaxed_min_, v_1, v_2)
def $vbinop_(Fnn X M, RELAXED_MAX, v_1, v_2) = $fvbinop_(Fnn X M, $frelaxed_max_, v_1, v_2)
def $vternop_(Jnn X M, RELAXED_LANESELECT, v_1, v_2, v_3) = $ivternopnd_(Jnn X M, $irelaxed_laneselect_, v_1, v_2, v_3)
def $vternop_(Fnn X M, RELAXED_MADD, v_1, v_2, v_3) = $fvternop_(Fnn X M, $frelaxed_madd_, v_1, v_2, v_3)
def $vternop_(Fnn X M, RELAXED_NMADD, v_1, v_2, v_3) = $fvternop_(Fnn X M, $frelaxed_nmadd_, v_1, v_2, v_3)
def $vrelop_(Jnn X M, EQ, v_1, v_2) = $ivrelop_(Jnn X M, $ieq_, v_1, v_2)
def $vrelop_(Jnn X M, NE, v_1, v_2) = $ivrelop_(Jnn X M, $ine_, v_1, v_2)
def $vrelop_(Jnn X M, LT sx, v_1, v_2) = $ivrelopsx_(Jnn X M, $ilt_, sx, v_1, v_2)
def $vrelop_(Jnn X M, GT sx, v_1, v_2) = $ivrelopsx_(Jnn X M, $igt_, sx, v_1, v_2)
def $vrelop_(Jnn X M, LE sx, v_1, v_2) = $ivrelopsx_(Jnn X M, $ile_, sx, v_1, v_2)
def $vrelop_(Jnn X M, GE sx, v_1, v_2) = $ivrelopsx_(Jnn X M, $ige_, sx, v_1, v_2)
def $vrelop_(Fnn X M, EQ, v_1, v_2) = $fvrelop_(Fnn X M, $feq_, v_1, v_2)
def $vrelop_(Fnn X M, NE, v_1, v_2) = $fvrelop_(Fnn X M, $fne_, v_1, v_2)
def $vrelop_(Fnn X M, LT, v_1, v_2) = $fvrelop_(Fnn X M, $flt_, v_1, v_2)
def $vrelop_(Fnn X M, GT, v_1, v_2) = $fvrelop_(Fnn X M, $fgt_, v_1, v_2)
def $vrelop_(Fnn X M, LE, v_1, v_2) = $fvrelop_(Fnn X M, $fle_, v_1, v_2)
def $vrelop_(Fnn X M, GE, v_1, v_2) = $fvrelop_(Fnn X M, $fge_, v_1, v_2)
def $vshiftop_(Jnn X M, SHL, v, i) = $ivshiftop_(Jnn X M, $ishl_, v, i)
def $vshiftop_(Jnn X M, SHR sx, v, i) = $ivshiftopsx_(Jnn X M, $ishr_, sx, v, i)
def $vbitmaskop_(Jnn X M, v) = $ivbitmaskop_(Jnn X M, v)
def $vswizzlop_(I8 X M, SWIZZLE, v_1, v_2) = $ivswizzlop_(I8 X M, $iswizzle_lane_, v_1, v_2)
def $vswizzlop_(I8 X M, RELAXED_SWIZZLE, v_1, v_2) = $ivswizzlop_(I8 X M, $irelaxed_swizzle_lane_, v_1, v_2)
def $vshufflop_(I8 X M, i*, v_1, v_2) = $ivshufflop_(I8 X M, i*, v_1, v_2)
def $lcvtop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTEND half sx , c_1) = c
-- if c = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, c_1)
def $lcvtop__(Jnn_1 X M_1, Fnn_2 X M_2, CONVERT half? sx, c_1) = c
-- if c = $convert__($lsizenn1(Jnn_1), $lsizenn2(Fnn_2), sx, c_1)
def $lcvtop__(Fnn_1 X M_1, Inn_2 X M_2, TRUNC_SAT sx zero?, c_1) = c?
-- if c? = $trunc_sat__($lsizenn1(Fnn_1), $lsizenn2(Inn_2), sx, c_1)
def $lcvtop__(Fnn_1 X M_1, Inn_2 X M_2, RELAXED_TRUNC sx zero?, c_1) = c?
-- if c? = $relaxed_trunc__($lsizenn1(Fnn_1), $lsizenn2(Inn_2), sx, c_1)
def $lcvtop__(Fnn_1 X M_1, Fnn_2 X M_2, DEMOTE ZERO, c_1) = c*
-- if c* = $demote__($lsizenn1(Fnn_1), $lsizenn2(Fnn_2), c_1)
def $lcvtop__(Fnn_1 X M_1, Fnn_2 X M_2, PROMOTE LOW, c_1) = c*
-- if c* = $promote__($lsizenn1(Fnn_1), $lsizenn2(Fnn_2), c_1)
def $vcvtop__(Lnn_1 X M, Lnn_2 X M, vcvtop, v_1) = v
-- if $halfop(Lnn_1 X M, Lnn_2 X M, vcvtop) = eps /\ $zeroop(Lnn_1 X M, Lnn_2 X M, vcvtop) = eps
-- if c_1* = $lanes_(Lnn_1 X M, v_1)
-- if c** = $setproduct_(lane_(Lnn_2), $lcvtop__(Lnn_1 X M, Lnn_2 X M, vcvtop, c_1)*)
-- if v <- $inv_lanes_(Lnn_2 X M, c*)*
def $vcvtop__(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop, v_1) = v
-- if $halfop(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop) = half
-- if c_1* = $lanes_(Lnn_1 X M_1, v_1)[$half(half, 0, M_2) : M_2]
-- if c** = $setproduct_(lane_(Lnn_2), $lcvtop__(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop, c_1)*)
-- if v <- $inv_lanes_(Lnn_2 X M_2, c*)*
def $vcvtop__(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop, v_1) = v
-- if $zeroop(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop) = ZERO
-- if c_1* = $lanes_(Lnn_1 X M_1, v_1)
-- if c** = $setproduct_(lane_(Lnn_2), ($lcvtop__(Lnn_1 X M_1, Lnn_2 X M_2, vcvtop, c_1)* ++ [$zero(Lnn_2)]^M_1))
-- if v <- $inv_lanes_(Lnn_2 X M_2, c*)*
def $vnarrowop__(Jnn_1 X M_1, Jnn_2 X M_2, sx, v_1, v_2) = v
-- if c_1* = $lanes_(Jnn_1 X M_1, v_1)
-- if c_2* = $lanes_(Jnn_1 X M_1, v_2)
-- if c'_1* = $narrow__($lsize(Jnn_1), $lsize(Jnn_2), sx, c_1)*
-- if c'_2* = $narrow__($lsize(Jnn_1), $lsize(Jnn_2), sx, c_2)*
-- if v = $inv_lanes_(Jnn_2 X M_2, c'_1* ++ c'_2*)
(;
;; TODO(2, rossberg): this is obsolete, clean up
def $vextunop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTADD_PAIRWISE sx, c_1) = c
-- if ci* = $lanes_(Jnn_1 X M_1, c_1)
-- if $concat_(iN($lsizenn2(Jnn_2)), (cj_1 cj_2)*) = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, ci)*
-- if c = $inv_lanes_(Jnn_2 X M_2, $iadd_($lsizenn2(Jnn_2), cj_1, cj_2)*)
def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTMUL half sx, c_1, c_2) = c
-- if ci_1* = $lanes_(Jnn_1 X M_1, c_1)[$half(half, 0, M_2) : M_2]
-- if ci_2* = $lanes_(Jnn_1 X M_1, c_2)[$half(half, 0, M_2) : M_2]
-- if c = $inv_lanes_(Jnn_2 X M_2, $imul_($lsizenn2(Jnn_2), $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, ci_1), $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, ci_2))*)
def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, DOT S, c_1, c_2) = c
-- if ci_1* = $lanes_(Jnn_1 X M_1, c_1)
-- if ci_2* = $lanes_(Jnn_1 X M_1, c_2)
-- if ci'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), S, ci_1)*
-- if ci'_2* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), S, ci_2)*
-- if $concat_(iN($lsizenn2(Jnn_2)), (cj_1 cj_2)*) = $imul_($lsizenn2(Jnn_2), ci'_1, ci'_2)*
-- if c = $inv_lanes_(Jnn_2 X M_2, $iadd_($lsizenn2(Jnn_2), cj_1, cj_2)*)
def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, RELAXED_DOT S, c_1, c_2) = c
-- if ci_1* = $lanes_(Jnn_1 X M_1, c_1)
-- if ci_2* = $lanes_(Jnn_1 X M_1, c_2)
-- if ci'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), S, ci_1)*
-- if ci'_2* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), $relaxed2($R_idot, sx, S, U), ci_2)*
-- if $concat_(iN($lsizenn2(Jnn_2)), (cj_1 cj_2)*) = $imul_($lsizenn2(Jnn_2), ci'_1, ci'_2)*
-- if c = $inv_lanes_(Jnn_2 X M_2, $iadd_sat_($lsizenn2(Jnn_2), S, cj_1, cj_2)*)
;)
;; TODO(2, rossberg): move to lanewise section
def $ivextunop__(shape_1, shape_2, def $f_(N, iN(N)*) : iN(N)*, sx, vec_(V128)) : vec_(V128)
def $ivextunop__(Jnn_1 X M_1, Jnn_2 X M_2, def $f_, sx, v_1) = $inv_lanes_(Jnn_2 X M_2, c*)
-- if c_1* = $lanes_(Jnn_1 X M_1, v_1)
-- if c'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx, c_1)*
-- if c* = $f_($lsizenn2(Jnn_2), c'_1*)
def $ivextbinop__(shape_1, shape_2, def $f_(N, iN(N)*, iN(N)*) : iN(N)*, sx, sx, laneidx, laneidx, vec_(V128), vec_(V128)) : vec_(V128)
def $ivextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, def $f_, sx_1, sx_2, i, k, v_1, v_2) = $inv_lanes_(Jnn_2 X M_2, c*)
-- if c_1* = $lanes_(Jnn_1 X M_1, v_1)[i : k]
-- if c_2* = $lanes_(Jnn_1 X M_1, v_2)[i : k]
-- if c'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx_1, c_1)*
-- if c'_2* = $extend__($lsizenn1(Jnn_1), $lsizenn2(Jnn_2), sx_2, c_2)*
-- if c* = $f_($lsizenn2(Jnn_2), c'_1*, c'_2*)
def $ivadd_pairwise_(N, iN(N)*) : iN(N)* hint(show $ivadd__pairwise_(%,%))
def $ivadd_pairwise_(N, i*) = $iadd_(N, j_1, j_2)*
-- if $concat_(N, (j_1 j_2)*) = i*
def $ivmul_(N, iN(N)*, iN(N)*) : iN(N)*
def $ivmul_(N, i_1*, i_2*) = $imul_(N, i_1, i_2)*
def $ivdot_(N, iN(N)*, iN(N)*) : iN(N)*
def $ivdot_(N, i_1*, i_2*) = $iadd_(N, j_1, j_2)*
-- if $concat_(iN(N), (j_1 j_2)*) = $imul_(N, i_1, i_2)*
def $ivdot_sat_(N, iN(N)*, iN(N)*) : iN(N)* hint(show $ivdot__sat_(%,%,%))
def $ivdot_sat_(N, i_1*, i_2*) = $iadd_sat_(N, S, j_1, j_2)*
-- if $concat_(iN(N), (j_1 j_2)*) = $imul_(N, i_1, i_2)*
def $vextunop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTADD_PAIRWISE sx, v_1) =
$ivextunop__(Jnn_1 X M_1, Jnn_2 X M_2, $ivadd_pairwise_, sx, v_1)
def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, EXTMUL half sx, v_1, v_2) =
$ivextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, $ivmul_, sx, sx, $half(half, 0, M_2), M_2, v_1, v_2)
def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, DOT S, v_1, v_2) =
$ivextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, $ivdot_, S, S, 0, M_1, v_1, v_2)
def $vextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, RELAXED_DOT S, v_1, v_2) =
$ivextbinop__(Jnn_1 X M_1, Jnn_2 X M_2, $ivdot_sat_, S, $relaxed2($R_idot, sx, S, U), 0, M_1, v_1, v_2)
(;
;; TODO(2, rossberg): this is obsolete, clean up
def $vextternop__(Jnn_1 X M_1, Jnn_2 X M_2, RELAXED_DOT_ADD S, c_1, c_2, c_3) = c
-- if $jsizenn(Jnn) = $(2*$lsizenn1(Jnn_1))
-- if ci_1* = $lanes_(Jnn_1 X M_1, c_1)
-- if ci_2* = $lanes_(Jnn_1 X M_1, c_2)
-- if ci_3* = $lanes_(Jnn_2 X M_2, c_3)
-- if ci'_1* = $extend__($lsizenn1(Jnn_1), $lsizenn(Jnn), S, ci_1)*
-- if ci'_2* = $extend__($lsizenn1(Jnn_1), $lsizenn(Jnn), $relaxed2($R_idot, sx, S, U), ci_2)*
-- if $concat_(iN($lsizenn(Jnn)), (cj_1 cj_2)*) = $imul_($lsizenn(Jnn), ci'_1, ci'_2)*
-- if $concat_(iN($lsizenn(Jnn)), (ck_1 ck_2)*) = $iadd_sat_($lsizenn(Jnn), S, cj_1, cj_2)*
-- if ck'_1* = $extend__($lsizenn(Jnn), $lsizenn2(Jnn_2), S, ck_1)*
-- if ck'_2* = $extend__($lsizenn(Jnn), $lsizenn2(Jnn_2), S, ck_2)*
-- if ck* = $iadd_($lsizenn2(Jnn_2), ck'_1, ck'_2)*
-- if c = $inv_lanes_(Jnn_2 X M_2, $iadd_($lsizenn2(Jnn_2), ck, ci_3)*)
;)
def $vextternop__(Jnn_1 X M_1, Jnn_2 X M_2, RELAXED_DOT_ADD S, c_1, c_2, c_3) = c
-- if $jsizenn(Jnn) = $(2*$lsizenn1(Jnn_1))
-- if M = $(2*M_2)
-- if c' = $vextbinop__(Jnn_1 X M_1, Jnn X M, RELAXED_DOT S, c_1, c_2)
-- if c'' = $vextunop__(Jnn X M, Jnn_2 X M_2, EXTADD_PAIRWISE S, c')
-- if c <- $vbinop_(Jnn_2 X M_2, ADD, c'', c_3)