c++ - sse constraint in inline assembly not working -
i getting problems while compiling below code snippet.
#include <iostream> #include <cstdint> using namespace std; union mxcsr { uint32_t v; struct { uint32_t ie : 1; uint32_t de : 1; uint32_t ze : 1; uint32_t oe : 1; uint32_t ue : 1; uint32_t pe : 1; uint32_t daz : 1; uint32_t im : 1; uint32_t dm : 1; uint32_t zm : 1; uint32_t om : 1; uint32_t um : 1; uint32_t pm : 1; uint32_t rn : 1; uint32_t rp : 1; uint32_t fz : 1; uint32_t rs0 : 15; }; }; std::ostream& operator<<(std::ostream& ostr, mxcsr &m){ ostr << std::hex << std::showbase; ostr << "mxcsr=" << m.v; ostr << std::dec << std::noshowbase; ostr << " [ie=" << m.ie << ",de=" << m.de << ",ze=" << m.ze << ",oe=" << m.oe << ",ue=" << m.ue << ",pe=" << m.pe << ",daz=" << m.daz << ",im=" << m.im << ",dm=" << m.dm << ",zm=" << m.zm << ",om=" << m.om << ",um=" << m.um << ",pm=" << m.pm << ",r-=" << m.rn << ",r+=" << m.rp << ",fz=" << m.fz << "] "; return ostr; } typedef union __attribute__((aligned(16))) vec_t { double f64[2]; float f32[4]; uint64_t u64[2]; uint32_t u32[4]; uint16_t u16[8]; uint8_t u8[16]; int64_t i64[2]; int32_t i32[4]; int16_t i16[8]; int8_t i8[16]; } vec_t; float add_vec_32f(float ra, float rb, mxcsr &f){ vec_t va, vb; va.f32[0] = ra; vb.f32[0] = rb; asm("addps %[vb], %[va];" "stmxcsr %[f];" : [va] "+x" (va), [f] "=m" (f) : [vb] "xm" (vb) : ); return va.f32[0]; } int main() { mxcsr val; float b = add_vec_32f(3.4, 5.6, val); std::cout << "b=" << b << " val=" << val << std::endl; return 0; } the compiler complains error "impossible constraint in 'asm'". verify this, went through description of addps instruction. says :-
addps xmm1, xmm2/m128 add packed single-precision floating-point values xmm2/m128 xmm1.
so, source can memory address or xmm register, destination has xmm register.i guess constraints kind of agree that. point me possible issue here ?
thanks.
so not sure why using union not work using simd vector instructions work, although need add 1 union. able work so:
typedef double xmmd __attribute__ ((vector_size (16))); typedef union __attribute__((aligned(16))) vec_t { xmmd xmm ; double f64[2]; float f32[4]; uint64_t u64[2]; uint32_t u32[4]; uint16_t u16[8]; uint8_t u8[16]; int64_t i64[2]; int32_t i32[4]; int16_t i16[8]; int8_t i8[16]; } vec_t; float add_vec_32f(float ra, float rb, mxcsr &f){ vec_t va, vb; va.f32[0] = ra; vb.f32[0] = rb; asm("addps %[v2], %[v1];" "stmxcsr %[f];" : [v1] "+x" (va.xmm), [f] "=m" (f) : [v2] "xm" (vb.xmm) : ); return va.f32[0]; }
Comments
Post a Comment