21 #include <xmmintrin.h>
22 #include <smmintrin.h>
24 #define V3COMP(p) (p).x,(p).y,(p).z
25 #define V2COMP(p) (p).x,(p).y
26 #define pBool(var) (var)? ("true"):("false")
28 #define GVECTOR_SSE_OPTIMIZATIONS
39 #define EPSILON (1.0E-10)
41 #ifdef GVECTOR_SSE_OPTIMIZATIONS
42 template <
typename num>
43 inline num sse_sqrt(num xin)
45 __m128d x = _mm_set_sd(
double(xin));
46 __m128d y = _mm_set_sd(
double(xin));
49 _mm_store_sd(&retVal,x);
53 template <>
inline float sse_sqrt<float>(
float xin)
55 __m128 x = _mm_set_ss((
float) xin);
58 _mm_store_ss(&retVal,x);
62 template <>
inline double sse_sqrt<double>(
double xin)
64 __m128d x = _mm_set_sd(xin);
65 __m128d y = _mm_set_sd(xin);
68 _mm_store_sd(&retVal,x);
72 template <
typename num>
73 inline num sse_dot_product(
const num* fv1,
const num* fv2,
const int mask)
75 __m128 v1 = _mm_set_ps(
float(fv1[3]),
float(fv1[2]),
float(fv1[1]),
float(fv1[0]));
76 __m128 v2 = _mm_set_ps(
float(fv2[3]),
float(fv2[2]),
float(fv2[1]),
float(fv2[0]));
77 __m128 dp = _mm_dp_ps(v1,v2,mask);
79 _mm_store_ss(&res,dp);
84 inline float sse_dot_product(
const float* fv1,
const float* fv2,
const int mask)
86 __m128 v1 = _mm_load_ps(fv1);
87 __m128 v2 = _mm_load_ps(fv2);
88 __m128 dp = _mm_dp_ps(v1,v2,mask);
90 _mm_store_ss(&res,dp);
95 inline double sse_dot_product(
const double* fv1,
const double* fv2,
const int mask)
97 __m128d v1 = _mm_load_pd(fv1);
98 __m128d v2 = _mm_load_pd(fv2);
99 __m128d dp1 = _mm_dp_pd(v1,v2,0x31);
100 v1 = _mm_load_pd(&fv1[2]);
101 v2 = _mm_load_pd(&fv2[2]);
102 __m128d dp2 = _mm_dp_pd(v1,v2,mask);
103 __m128d dp = _mm_add_sd(dp1,dp2);
105 _mm_store_sd(&res,dp);
111 template <
class num>
inline num mysqrt(num x)
113 #ifdef GVECTOR_SSE_OPTIMIZATIONS
114 return sse_sqrt<num>(x);
145 }__attribute__ ((aligned (16)));
147 void setxyzRotations(num rx, num ry, num rz);
148 void xyzRotationAndTransformation(num rx, num ry, num rz,
vector3d<num> t);
162 m11 = cz*cy; m12 = -sz; m13 = cz*sy; m14 = 0.0;
163 m21 = cx*sz*cy+sx*sy; m22 = cx*cz; m23 = cx*sz*sy-sx*cy; m24 = 0.0;
164 m31 = sx*sz*cy-cx*sy; m32 = sx*cz; m33 = cx*cy; m34 = 0.0;
165 m41 = 0.0; m42 = 0.0; m43 = 0.0; m44 = 1.0;
168 template <
class num>
void matrix3d<num>::xyzRotationAndTransformation(num rx, num ry, num rz,
vector3d< num > t)
177 m11 = cz*cy; m12 = -sz; m13 = cz*sy; m14 = t.x;
178 m21 = cx*sz*cy+sx*sy; m22 = cx*cz; m23 = cx*sz*sy-sx*cy; m24 = t.y;
179 m31 = sx*sz*cy-cx*sy; m32 = sx*cz; m33 = cx*cy; m34 = t.z;
180 m41 = 0.0; m42 = 0.0; m43 = 0.0; m44 = 1.0;
183 template <
class num> matrix3d<num> matrix3d<num>::operator*(
const matrix3d< num >& M2)
const
185 #define m1(i,j) m ## i ## j
186 #define m2(i,j) M2.m ## i ## j
187 #define m3(i,j) M3.m ## i ## j
191 m3(1,1) = m1(1,1)*m2(1,1) + m1(1,2)*m2(2,1) + m1(1,3)*m2(3,1) + m1(1,4)*m2(4,1);
192 m3(1,2) = m1(1,1)*m2(1,2) + m1(1,2)*m2(2,2) + m1(1,3)*m2(3,2) + m1(1,4)*m2(4,2);
193 m3(1,3) = m1(1,1)*m2(1,3) + m1(1,2)*m2(2,3) + m1(1,3)*m2(3,3) + m1(1,4)*m2(4,3);
194 m3(1,4) = m1(1,1)*m2(1,4) + m1(1,2)*m2(2,4) + m1(1,3)*m2(3,4) + m1(1,4)*m2(4,4);
196 m3(2,1) = m1(2,1)*m2(1,1) + m1(2,2)*m2(2,1) + m1(2,3)*m2(3,1) + m1(2,4)*m2(4,1);
197 m3(2,2) = m1(2,1)*m2(1,2) + m1(2,2)*m2(2,2) + m1(2,3)*m2(3,2) + m1(2,4)*m2(4,2);
198 m3(2,3) = m1(2,1)*m2(1,3) + m1(2,2)*m2(2,3) + m1(2,3)*m2(3,3) + m1(2,4)*m2(4,3);
199 m3(2,4) = m1(2,1)*m2(1,4) + m1(2,2)*m2(2,4) + m1(2,3)*m2(3,4) + m1(2,4)*m2(4,4);
201 m3(3,1) = m1(3,1)*m2(1,1) + m1(3,2)*m2(2,1) + m1(3,3)*m2(3,1) + m1(3,4)*m2(4,1);
202 m3(3,2) = m1(3,1)*m2(1,2) + m1(3,2)*m2(2,2) + m1(3,3)*m2(3,2) + m1(3,4)*m2(4,2);
203 m3(3,3) = m1(3,1)*m2(1,3) + m1(3,2)*m2(2,3) + m1(3,3)*m2(3,3) + m1(3,4)*m2(4,3);
204 m3(3,4) = m1(3,1)*m2(1,4) + m1(3,2)*m2(2,4) + m1(3,3)*m2(3,4) + m1(3,4)*m2(4,4);
206 m3(4,1) = m1(4,1)*m2(1,1) + m1(4,2)*m2(2,1) + m1(4,3)*m2(3,1) + m1(4,4)*m2(4,1);
207 m3(4,2) = m1(4,1)*m2(1,2) + m1(4,2)*m2(2,2) + m1(4,3)*m2(3,2) + m1(4,4)*m2(4,2);
208 m3(4,3) = m1(4,1)*m2(1,3) + m1(4,2)*m2(2,3) + m1(4,3)*m2(3,3) + m1(4,4)*m2(4,3);
209 m3(4,4) = m1(4,1)*m2(1,4) + m1(4,2)*m2(2,4) + m1(4,3)*m2(3,4) + m1(4,4)*m2(4,4);
214 template <
class num> matrix3d<num> matrix3d<num>::transpose()
const
216 matrix3d<num> M2 = *
this;
238 }__attribute__ ((aligned (16)));
244 {x=nx; y=ny; z=nz; w=1.0;}
245 vector3d(num nx,num ny,num nz,num nw)
246 {x=nx; y=ny; z=nz; w=nw;}
247 void set(num nx,num ny,num nz)
248 {x=nx; y=ny; z=nz; w=1.0;}
249 void set(num nx,num ny,num nz,num nw)
250 {x=nx; y=ny; z=nz; w=nw;}
254 {x=p.x; y=p.y; z=p.z; w = p.w;}
259 {set(p);
return(*
this);}
263 {
return(coefficients[idx]);}
265 {
return(coefficients[idx]);}
267 num length() const MustUseResult;
268 num sqlength() const MustUseResult;
269 vector3d<num> norm() const MustUseResult;
270 vector3d<num> norm(const num len) const MustUseResult;
272 bool nonzero() const MustUseResult
273 {
return(x!=0 || y!=0 || z!=0);}
297 bool operator< (const vector3d<num> p)
const;
299 bool operator<=(const vector3d<num> p)
const;
311 bool finite() const MustUseResult
312 {
return(::finite(x) && ::finite(y) && ::finite(z));}
318 #ifdef GVECTOR_SSE_OPTIMIZATIONS
328 return(mysqrt<num>(x*x + y*y + z*z));
334 #ifdef GVECTOR_SSE_OPTIMIZATIONS
335 __m128 p = _mm_set_ps((
float)x,(
float)y,(
float)z,0.0f);
337 __attribute__((aligned (16))) float x2[4];
339 return(x2[3]+x2[2]+x2[1]);
341 return(x*x + y*y + z*z);
347 #ifdef GVECTOR_SSE_OPTIMIZATIONS
349 num lInv = 1.0/(this->length());
360 l = mysqrt<num>(x*x + y*y + z*z);
371 #ifdef GVECTOR_SSE_OPTIMIZATIONS
374 __m128 f = _mm_set1_ps(((
float)len) / (this->length()));
375 __m128 pm = _mm_set_ps((
float)x,(
float)y,(
float)z,0.0f);
376 pm = _mm_mul_ps(pm,f);
377 __attribute__((aligned (16))) float x2[4];
388 f = len / mysqrt<num>(x*x + y*y + z*z);
399 #ifdef GVECTOR_SSE_OPTIMIZATIONS
400 __m128 f = _mm_set1_ps(1.0f / (this->length()));
401 __m128 pm = _mm_set_ps((
float)x,(
float)y,(
float)z,0.0f);
402 pm = _mm_mul_ps(pm,f);
403 __attribute__((aligned (16))) float x2[4];
412 l = mysqrt<num>(x*x + y*y + z*z);
421 return(x*p.x + y*p.y + z*p.z);
426 return(a.x*b.x + a.y*b.y + a.z*b.z);
431 return(fabs(a.x*b.x) + fabs(a.y*b.y) + fabs(a.z*b.z));
451 r.x = a.y*b.z - a.z*b.y;
452 r.y = a.z*b.x - a.x*b.z;
453 r.z = a.x*b.y - a.y*b.x;
458 #define VECTOR3D_EQUAL_BINARY_OPERATOR(opr) \
459 template <class num> \
460 vector3d<num> &vector3d<num>::operator opr (const vector3d<num> p) \
468 VECTOR3D_EQUAL_BINARY_OPERATOR(+=)
469 VECTOR3D_EQUAL_BINARY_OPERATOR(-=)
470 VECTOR3D_EQUAL_BINARY_OPERATOR(*=)
471 VECTOR3D_EQUAL_BINARY_OPERATOR(/=)
473 #define VECTOR3D_BINARY_OPERATOR(opr) \
474 template <class num> \
475 vector3d<num> vector3d<num>::operator opr (const vector3d<num> p) const \
484 VECTOR3D_BINARY_OPERATOR(+)
485 VECTOR3D_BINARY_OPERATOR(-)
486 VECTOR3D_BINARY_OPERATOR(*)
487 VECTOR3D_BINARY_OPERATOR(/)
489 #define VECTOR3D_SCALAR_OPERATOR(opr) \
490 template <class num> \
491 vector3d<num> vector3d<num>::operator opr (const num f) const \
500 VECTOR3D_SCALAR_OPERATOR(*)
501 VECTOR3D_SCALAR_OPERATOR(/)
503 #define VECTOR3D_EQUAL_SCALAR_OPERATOR(opr) \
504 template <class num> \
505 vector3d<num> &vector3d<num>::operator opr (num f) \
513 VECTOR3D_EQUAL_SCALAR_OPERATOR(*=)
514 VECTOR3D_EQUAL_SCALAR_OPERATOR(/=)
516 #define VECTOR3D_LOGIC_OPERATOR(opr,combine) \
517 template <class num> \
518 bool vector3d<num>::operator opr (const vector3d<num> p) const \
520 return((x opr p.x) combine \
521 (y opr p.y) combine \
525 VECTOR3D_LOGIC_OPERATOR(==,&&)
526 VECTOR3D_LOGIC_OPERATOR(!=,||)
528 VECTOR3D_LOGIC_OPERATOR(< ,&&)
529 VECTOR3D_LOGIC_OPERATOR(> ,&&)
530 VECTOR3D_LOGIC_OPERATOR(<=,&&)
531 VECTOR3D_LOGIC_OPERATOR(>=,&&)
568 v.x = ::min(a.x,b.x);
569 v.y = ::min(a.y,b.y);
570 v.z = ::min(a.z,b.z);
579 v.x = ::max(a.x,b.x);
580 v.y = ::max(a.y,b.y);
581 v.z = ::max(a.z,b.z);
588 v.x = ::bound(v.x,low,high);
589 v.y = ::bound(v.y,low,high);
590 v.z = ::bound(v.z,low,high);
605 q.x = (t * r.x * r.x + c ) * x
606 + (t * r.x * r.y - s * r.z) * y
607 + (t * r.x * r.z + s * r.y) * z;
609 q.y = (t * r.y * r.x + s * r.z) * x
610 + (t * r.y * r.y + c ) * y
611 + (t * r.y * r.z - s * r.x) * z;
613 q.z = (t * r.z * r.x - s * r.y) * x
614 + (t * r.z * r.y + s * r.x) * y
615 + (t * r.z * r.z + c ) * z;
623 q.x = M.m11*x + M.m12*y + M.m13*z + M.m14*w;
624 q.y = M.m21*x + M.m22*y + M.m23*z + M.m24*w;
625 q.z = M.m31*x + M.m32*y + M.m33*z + M.m34*w;
680 return(acos(std::max(-1.0,std::min(1.0,dot(a,b)/(a.length()*b.length())))));
685 return(cross(a,b).norm());
714 return(mysqrt<num>(dx*dx + dy*dy + dz*dz));
731 return(dx*dx + dy*dy + dz*dz);
746 t = ((p.x - x0.x) + (p.y - x0.y) + (p.z - x0.z)) / (x1.x + x1.y + x1.z);
747 x = x0 + (x1 - x0) * t;
749 return(distance(x,p));
774 template <
class vec>
void set(vec p)
782 {
set(p);
return(*
this);}
786 {
set(p.x,p.y);
return(*
this);}
790 {
return(((num*)
this)[idx]);}
792 {
return(((num*)
this)[idx]);}
795 num
length() const MustUseResult;
800 {
return(atan2(y,x));}
803 {x=cos(angle); y=sin(angle);}
812 vector2d<num>
bound(const num max_length) const MustUseResult;
815 {
return(x!=0 || y!=0);}
818 template <
class vec> num
dot(
const vec p)
const MustUseResult;
860 bool operator< (const vector2d<num> p)
const;
862 bool operator<=(const vector2d<num> p)
const;
884 return(mysqrt<num>(x*x + y*y));
897 l = mysqrt<num>(x*x + y*y);
909 f = len / mysqrt<num>(x*x + y*y);
920 l = mysqrt<num>(x*x + y*y);
932 if(lsq < sq(max_length)){
935 f = max_length / mysqrt<num>(lsq);
944 return(x*p.x + y*p.y);
950 return(x*p.y - y*p.x);
955 return(a.x*b.x + a.y*b.y);
963 l = mysqrt<num>(a.x*a.x + a.y*a.y) * mysqrt<num>(b.x*b.x + b.y*b.y);
965 return((a.x*b.x + a.y*b.y) / l);
971 return(x*p.y - p.x*y);
1006 template <
class num>
1010 q.x = x*p.x + y*p.y;
1011 q.y = x*p.y - y*p.x;
1017 template <
class num>
1021 q.x = x*p.x - y*p.y;
1022 q.y = y*p.x + x*p.y;
1026 #define VECTOR2D_EQUAL_BINARY_OPERATOR(opr) \
1027 template <class num> \
1028 vector2d<num> &vector2d<num>::operator opr (const vector2d<num> p) \
1035 VECTOR2D_EQUAL_BINARY_OPERATOR(+=)
1036 VECTOR2D_EQUAL_BINARY_OPERATOR(-=)
1037 VECTOR2D_EQUAL_BINARY_OPERATOR(*=)
1038 VECTOR2D_EQUAL_BINARY_OPERATOR(/=)
1040 #define VECTOR2D_BINARY_OPERATOR(opr) \
1041 template <class num> \
1042 vector2d<num> vector2d<num>::operator opr (const vector2d<num> p) const \
1050 VECTOR2D_BINARY_OPERATOR(+)
1051 VECTOR2D_BINARY_OPERATOR(-)
1052 VECTOR2D_BINARY_OPERATOR(*)
1053 VECTOR2D_BINARY_OPERATOR(/)
1055 #define VECTOR2D_SCALAR_OPERATOR(opr) \
1056 template <class num> \
1057 vector2d<num> vector2d<num>::operator opr (const num f) const \
1065 VECTOR2D_SCALAR_OPERATOR(*)
1066 VECTOR2D_SCALAR_OPERATOR(/)
1068 #define VECTOR2D_EQUAL_SCALAR_OPERATOR(opr) \
1069 template <class num> \
1070 vector2d<num> &vector2d<num>::operator opr (num f) \
1077 VECTOR2D_EQUAL_SCALAR_OPERATOR(*=)
1078 VECTOR2D_EQUAL_SCALAR_OPERATOR(/=)
1080 #define VECTOR2D_LOGIC_OPERATOR(opr,combine) \
1081 template <class num> \
1082 bool vector2d<num>::operator opr (const vector2d<num> p) const \
1084 return((x opr p.x) combine \
1088 VECTOR2D_LOGIC_OPERATOR(==,&&)
1089 VECTOR2D_LOGIC_OPERATOR(!=,||)
1091 VECTOR2D_LOGIC_OPERATOR(< ,&&)
1092 VECTOR2D_LOGIC_OPERATOR(> ,&&)
1093 VECTOR2D_LOGIC_OPERATOR(<=,&&)
1094 VECTOR2D_LOGIC_OPERATOR(>=,&&)
1097 template <class num>
1106 template <
class num1,
class num2>
1117 template <
class num>
1120 if(p.x < x) x = p.x;
1121 if(p.y < y) y = p.y;
1124 template <
class num>
1127 if(p.x > x) x = p.x;
1128 if(p.y > y) y = p.y;
1143 v.x = ::min(a.x,b.x);
1144 v.y = ::min(a.y,b.y);
1153 v.x = ::max(a.x,b.x);
1154 v.y = ::max(a.y,b.y);
1161 v.x = ::bound(v.x,low,high);
1162 v.y = ::bound(v.y,low,high);
1174 return(mysqrt<num>(dx*dx + dy*dy));
1190 return(dx*dx + dy*dy);
1195 return(sqdist(a,b));
vector2d< num > bound(const num max_length) const MustUseResult
bound vector to a maximum length
bool finite() const MustUseResult
return true if both elements are finite, otherwise return false
void set(vec p)
set the components of the vector
num & operator[](int idx)
element accessor
void set(num nx, num ny)
set the components of the vector
vector2d< num > & operator-=(const vector2d< num > p)
subtract a vector from the current element values
vector2d< num > operator*(const vector2d< num > p) const
return elementwise product of this vector and p
num angle() const MustUseResult
calculate the clockwise angle from <1,0>
void setAll(num nx)
set the components of the vector to the same value
vector2d< num > project_in(const vector2d< num > p) const MustUseResult
bool nonzero() const MustUseResult
return if vector has any length at all
void take_max(const vector2d< num > p)
set the vector to the maximum of its components and p's components
void take_min(const vector2d< num > p)
set the vector to the minimum of its components and p's components
vector2d< num > operator+(const vector2d< num > p) const
return vector sum of this vector and p
num perpdot(const vector2d< num > p) const MustUseResult
return dot product of vector's perp with p, equivalent to (this->perp()).dot(p)
vector2d< num > operator/(const vector2d< num > p) const
return elementwise division of this vector by p
vector2d< num > & operator=(vector2d< num > p)
copy constructor
void normalize()
normalize to unit length in place
num sqlength() const MustUseResult
calculate squared Euclidean length (faster than length())
vector2d< num > operator-() const
negate vector (reflect through origin) <x,y> -> <-x,-y>
vector2d< num > & operator/=(const vector2d< num > p)
divide (elementwise) a vector with the current element values
vector2d< num > project_out(const vector2d< num > p) const MustUseResult
num & operator[](int idx)
element accessor
num cross(const vector2d< num > p) const MustUseResult
return z component of 3D cross product on 2D vectors. right handed.
vector2d< num > norm() const MustUseResult
return a unit length vector in the same direction
vector2d< num > rotate(const double a) const MustUseResult
return vector rotated by angle a
vector2d< num > & operator*=(const vector2d< num > p)
multiply (elementwise) a vector with the current element values
vector2d< num > & operator=(vec p)
copy constructor from compatible classes
vector2d< num > perp() const MustUseResult
return the perpendicular of a vector (i.e. rotated 90 deg counterclockwise)
vector2d< num > & operator+=(const vector2d< num > p)
add a vector to the current element values
void heading(num angle)
make a unit vector at given angle
num length() const MustUseResult
calculate Euclidean length
num dot(const vec p) const MustUseResult
return dot product of vector with p
void zero()
zero all components of the vector