|
GDAL
|
00001 /****************************************************************************** 00002 * $Id: gdalsse_priv.h 34921 2016-08-04 22:26:31Z rouault $ 00003 * 00004 * Project: GDAL 00005 * Purpose: SSE2 helper 00006 * Author: Even Rouault <even dot rouault at spatialys dot com> 00007 * 00008 ****************************************************************************** 00009 * Copyright (c) 2014, Even Rouault <even dot rouault at spatialys dot com> 00010 * 00011 * Permission is hereby granted, free of charge, to any person obtaining a 00012 * copy of this software and associated documentation files (the "Software"), 00013 * to deal in the Software without restriction, including without limitation 00014 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 00015 * and/or sell copies of the Software, and to permit persons to whom the 00016 * Software is furnished to do so, subject to the following conditions: 00017 * 00018 * The above copyright notice and this permission notice shall be included 00019 * in all copies or substantial portions of the Software. 00020 * 00021 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00022 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00023 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 00024 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00025 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 00026 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00027 * DEALINGS IN THE SOFTWARE. 00028 ****************************************************************************/ 00029 00030 #ifndef GDALSSE_PRIV_H_INCLUDED 00031 #define GDALSSE_PRIV_H_INCLUDED 00032 00033 #ifndef DOXYGEN_SKIP 00034 00035 #include "cpl_port.h" 00036 00037 /* We restrict to 64bit processors because they are guaranteed to have SSE2 */ 00038 /* Could possibly be used too on 32bit, but we would need to check at runtime */ 00039 #if (defined(__x86_64) || defined(_M_X64)) && !defined(USE_SSE2_EMULATION) 00040 00041 /* Requires SSE2 */ 00042 #include <emmintrin.h> 00043 #include <string.h> 00044 00045 #ifdef __SSE4_1__ 00046 #include <smmintrin.h> 00047 #endif 00048 00049 class XMMReg2Double 00050 { 00051 public: 00052 __m128d xmm; 00053 00054 /* coverity[uninit_member] */ 00055 XMMReg2Double() {} 00056 00057 XMMReg2Double(double val) { xmm = _mm_load_sd (&val); } 00058 XMMReg2Double(const XMMReg2Double& other) : xmm(other.xmm) {} 00059 00060 static inline XMMReg2Double Zero() 00061 { 00062 XMMReg2Double reg; 00063 reg.Zeroize(); 00064 return reg; 00065 } 00066 00067 static inline XMMReg2Double Load1ValHighAndLow(const double* ptr) 00068 { 00069 XMMReg2Double reg; 00070 reg.nsLoad1ValHighAndLow(ptr); 00071 return reg; 00072 } 00073 00074 static inline XMMReg2Double Load2Val(const double* ptr) 00075 { 00076 XMMReg2Double reg; 00077 reg.nsLoad2Val(ptr); 00078 return reg; 00079 } 00080 00081 static inline XMMReg2Double Load2Val(const float* ptr) 00082 { 00083 XMMReg2Double reg; 00084 reg.nsLoad2Val(ptr); 00085 return reg; 00086 } 00087 00088 static inline XMMReg2Double Load2ValAligned(const double* ptr) 00089 { 00090 XMMReg2Double reg; 00091 reg.nsLoad2ValAligned(ptr); 00092 return reg; 00093 } 00094 00095 static inline XMMReg2Double Load2Val(const unsigned char* ptr) 00096 { 00097 XMMReg2Double reg; 00098 reg.nsLoad2Val(ptr); 00099 return reg; 00100 } 00101 00102 static inline XMMReg2Double Load2Val(const short* ptr) 00103 { 00104 XMMReg2Double reg; 00105 reg.nsLoad2Val(ptr); 00106 return reg; 00107 } 00108 00109 static inline XMMReg2Double Load2Val(const unsigned short* ptr) 00110 { 00111 XMMReg2Double reg; 00112 reg.nsLoad2Val(ptr); 00113 return reg; 00114 } 00115 00116 static inline XMMReg2Double Equals(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00117 { 00118 XMMReg2Double reg; 00119 reg.xmm = _mm_cmpeq_pd(expr1.xmm, expr2.xmm); 00120 return reg; 00121 } 00122 00123 static inline XMMReg2Double NotEquals(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00124 { 00125 XMMReg2Double reg; 00126 reg.xmm = _mm_cmpneq_pd(expr1.xmm, expr2.xmm); 00127 return reg; 00128 } 00129 00130 static inline XMMReg2Double Greater(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00131 { 00132 XMMReg2Double reg; 00133 reg.xmm = _mm_cmpgt_pd(expr1.xmm, expr2.xmm); 00134 return reg; 00135 } 00136 00137 static inline XMMReg2Double And(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00138 { 00139 XMMReg2Double reg; 00140 reg.xmm = _mm_and_pd(expr1.xmm, expr2.xmm); 00141 return reg; 00142 } 00143 00144 static inline XMMReg2Double Ternary(const XMMReg2Double& cond, const XMMReg2Double& true_expr, const XMMReg2Double& false_expr) 00145 { 00146 XMMReg2Double reg; 00147 reg.xmm = _mm_or_pd(_mm_and_pd (cond.xmm, true_expr.xmm), _mm_andnot_pd(cond.xmm, false_expr.xmm)); 00148 return reg; 00149 } 00150 00151 static inline XMMReg2Double Min(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00152 { 00153 XMMReg2Double reg; 00154 reg.xmm = _mm_min_pd(expr1.xmm, expr2.xmm); 00155 return reg; 00156 } 00157 00158 inline void nsLoad1ValHighAndLow(const double* ptr) 00159 { 00160 xmm = _mm_load1_pd(ptr); 00161 } 00162 00163 inline void nsLoad2Val(const double* ptr) 00164 { 00165 xmm = _mm_loadu_pd(ptr); 00166 } 00167 00168 inline void nsLoad2ValAligned(const double* pval) 00169 { 00170 xmm = _mm_load_pd(pval); 00171 } 00172 00173 inline void nsLoad2Val(const float* pval) 00174 { 00175 __m128 temp1 = _mm_load_ss(pval); 00176 __m128 temp2 = _mm_load_ss(pval + 1); 00177 temp1 = _mm_shuffle_ps(temp1, temp2, _MM_SHUFFLE(1,0,1,0)); 00178 temp1 = _mm_shuffle_ps(temp1, temp1, _MM_SHUFFLE(3,3,2,0)); 00179 xmm = _mm_cvtps_pd(temp1); 00180 } 00181 00182 inline void nsLoad2Val(const unsigned char* ptr) 00183 { 00184 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS 00185 unsigned short s; 00186 memcpy(&s, ptr, 2); 00187 __m128i xmm_i = _mm_cvtsi32_si128(s); 00188 #else 00189 __m128i xmm_i = _mm_cvtsi32_si128(*(unsigned short*)(ptr)); 00190 #endif 00191 #ifdef __SSE4_1__ 00192 xmm_i = _mm_cvtepu8_epi32(xmm_i); 00193 #else 00194 xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128()); 00195 xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128()); 00196 #endif 00197 xmm = _mm_cvtepi32_pd(xmm_i); 00198 } 00199 00200 inline void nsLoad2Val(const short* ptr) 00201 { 00202 int i; 00203 memcpy(&i, ptr, 4); 00204 __m128i xmm_i = _mm_cvtsi32_si128(i); 00205 #ifdef __SSE4_1__ 00206 xmm_i = _mm_cvtepi16_epi32(xmm_i); 00207 #else 00208 xmm_i = _mm_unpacklo_epi16(xmm_i,xmm_i); /* 0|0|0|0|0|0|b|a --> 0|0|0|0|b|b|a|a */ 00209 xmm_i = _mm_srai_epi32(xmm_i, 16); /* 0|0|0|0|b|b|a|a --> 0|0|0|0|sign(b)|b|sign(a)|a */ 00210 #endif 00211 xmm = _mm_cvtepi32_pd(xmm_i); 00212 } 00213 00214 inline void nsLoad2Val(const unsigned short* ptr) 00215 { 00216 int i; 00217 memcpy(&i, ptr, 4); 00218 __m128i xmm_i = _mm_cvtsi32_si128(i); 00219 #ifdef __SSE4_1__ 00220 xmm_i = _mm_cvtepu16_epi32(xmm_i); 00221 #else 00222 xmm_i = _mm_unpacklo_epi16(xmm_i,_mm_setzero_si128()); /* 0|0|0|0|0|0|b|a --> 0|0|0|0|0|b|0|a */ 00223 #endif 00224 xmm = _mm_cvtepi32_pd(xmm_i); 00225 } 00226 00227 static inline void Load4Val(const unsigned char* ptr, XMMReg2Double& low, XMMReg2Double& high) 00228 { 00229 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS 00230 int i; 00231 memcpy(&i, ptr, 4); 00232 __m128i xmm_i = _mm_cvtsi32_si128(i); 00233 #else 00234 __m128i xmm_i = _mm_cvtsi32_si128(*(int*)(ptr)); 00235 #endif 00236 #ifdef __SSE4_1__ 00237 xmm_i = _mm_cvtepu8_epi32(xmm_i); 00238 #else 00239 xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128()); 00240 xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128()); 00241 #endif 00242 low.xmm = _mm_cvtepi32_pd(xmm_i); 00243 high.xmm = _mm_cvtepi32_pd(_mm_shuffle_epi32(xmm_i,_MM_SHUFFLE(3,2,3,2))); 00244 } 00245 00246 static inline void Load4Val(const short* ptr, XMMReg2Double& low, XMMReg2Double& high) 00247 { 00248 low.nsLoad2Val(ptr); 00249 high.nsLoad2Val(ptr+2); 00250 } 00251 00252 static inline void Load4Val(const unsigned short* ptr, XMMReg2Double& low, XMMReg2Double& high) 00253 { 00254 low.nsLoad2Val(ptr); 00255 high.nsLoad2Val(ptr+2); 00256 } 00257 00258 static inline void Load4Val(const double* ptr, XMMReg2Double& low, XMMReg2Double& high) 00259 { 00260 low.nsLoad2Val(ptr); 00261 high.nsLoad2Val(ptr+2); 00262 } 00263 00264 static inline void Load4Val(const float* ptr, XMMReg2Double& low, XMMReg2Double& high) 00265 { 00266 __m128 temp1 = _mm_loadu_ps(ptr); 00267 __m128 temp2 = _mm_shuffle_ps(temp1, temp1, _MM_SHUFFLE(3,2,3,2)); 00268 low.xmm = _mm_cvtps_pd(temp1); 00269 high.xmm = _mm_cvtps_pd(temp2); 00270 } 00271 00272 inline void Zeroize() 00273 { 00274 xmm = _mm_setzero_pd(); 00275 } 00276 00277 inline XMMReg2Double& operator= (const XMMReg2Double& other) 00278 { 00279 xmm = other.xmm; 00280 return *this; 00281 } 00282 00283 inline XMMReg2Double& operator+= (const XMMReg2Double& other) 00284 { 00285 xmm = _mm_add_pd(xmm, other.xmm); 00286 return *this; 00287 } 00288 00289 inline XMMReg2Double& operator*= (const XMMReg2Double& other) 00290 { 00291 xmm = _mm_mul_pd(xmm, other.xmm); 00292 return *this; 00293 } 00294 00295 inline XMMReg2Double operator+ (const XMMReg2Double& other) const 00296 { 00297 XMMReg2Double ret; 00298 ret.xmm = _mm_add_pd(xmm, other.xmm); 00299 return ret; 00300 } 00301 00302 inline XMMReg2Double operator- (const XMMReg2Double& other) const 00303 { 00304 XMMReg2Double ret; 00305 ret.xmm = _mm_sub_pd(xmm, other.xmm); 00306 return ret; 00307 } 00308 00309 inline XMMReg2Double operator* (const XMMReg2Double& other) const 00310 { 00311 XMMReg2Double ret; 00312 ret.xmm = _mm_mul_pd(xmm, other.xmm); 00313 return ret; 00314 } 00315 00316 inline XMMReg2Double operator/ (const XMMReg2Double& other) const 00317 { 00318 XMMReg2Double ret; 00319 ret.xmm = _mm_div_pd(xmm, other.xmm); 00320 return ret; 00321 } 00322 00323 inline void AddLowAndHigh() 00324 { 00325 __m128d xmm2; 00326 xmm2 = _mm_shuffle_pd(xmm,xmm,_MM_SHUFFLE2(0,1)); /* transfer high word into low word of xmm2 */ 00327 xmm = _mm_add_pd(xmm, xmm2); 00328 } 00329 00330 inline void Store2Double(double* pval) const 00331 { 00332 _mm_storeu_pd(pval, xmm); 00333 } 00334 00335 inline void Store2DoubleAligned(double* pval) const 00336 { 00337 _mm_store_pd(pval, xmm); 00338 } 00339 00340 void Store2Val(unsigned short* ptr) const 00341 { 00342 __m128i tmp = _mm_cvtpd_epi32(xmm); /* Convert the 2 double values to 2 integers */ 00343 ptr[0] = (GUInt16)_mm_extract_epi16(tmp, 0); 00344 ptr[1] = (GUInt16)_mm_extract_epi16(tmp, 2); 00345 } 00346 00347 inline operator double () const 00348 { 00349 double val; 00350 _mm_store_sd(&val, xmm); 00351 return val; 00352 } 00353 }; 00354 00355 #else 00356 00357 #warning "Software emulation of SSE2 !" 00358 00359 class XMMReg2Double 00360 { 00361 public: 00362 double low; 00363 double high; 00364 00365 XMMReg2Double() {} 00366 XMMReg2Double(double val) { low = val; high = 0.0; } 00367 XMMReg2Double(const XMMReg2Double& other) : low(other.low), high(other.high) {} 00368 00369 static inline XMMReg2Double Zero() 00370 { 00371 XMMReg2Double reg; 00372 reg.Zeroize(); 00373 return reg; 00374 } 00375 00376 static inline XMMReg2Double Load1ValHighAndLow(const double* ptr) 00377 { 00378 XMMReg2Double reg; 00379 reg.nsLoad1ValHighAndLow(ptr); 00380 return reg; 00381 } 00382 00383 static inline XMMReg2Double Equals(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00384 { 00385 XMMReg2Double reg; 00386 00387 if (expr1.low == expr2.low) 00388 memset(&(reg.low), 0xFF, sizeof(double)); 00389 else 00390 reg.low = 0; 00391 00392 if (expr1.high == expr2.high) 00393 memset(&(reg.high), 0xFF, sizeof(double)); 00394 else 00395 reg.high = 0; 00396 00397 return reg; 00398 } 00399 00400 static inline XMMReg2Double NotEquals(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00401 { 00402 XMMReg2Double reg; 00403 00404 if (expr1.low != expr2.low) 00405 memset(&(reg.low), 0xFF, sizeof(double)); 00406 else 00407 reg.low = 0; 00408 00409 if (expr1.high != expr2.high) 00410 memset(&(reg.high), 0xFF, sizeof(double)); 00411 else 00412 reg.high = 0; 00413 00414 return reg; 00415 } 00416 00417 static inline XMMReg2Double Greater(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00418 { 00419 XMMReg2Double reg; 00420 00421 if (expr1.low > expr2.low) 00422 memset(&(reg.low), 0xFF, sizeof(double)); 00423 else 00424 reg.low = 0; 00425 00426 if (expr1.high > expr2.high) 00427 memset(&(reg.high), 0xFF, sizeof(double)); 00428 else 00429 reg.high = 0; 00430 00431 return reg; 00432 } 00433 00434 static inline XMMReg2Double And(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00435 { 00436 XMMReg2Double reg; 00437 int low1[2], high1[2]; 00438 int low2[2], high2[2]; 00439 memcpy(low1, &expr1.low, sizeof(double)); 00440 memcpy(high1, &expr1.high, sizeof(double)); 00441 memcpy(low2, &expr2.low, sizeof(double)); 00442 memcpy(high2, &expr2.high, sizeof(double)); 00443 low1[0] &= low2[0]; 00444 low1[1] &= low2[1]; 00445 high1[0] &= high2[0]; 00446 high1[1] &= high2[1]; 00447 memcpy(®.low, low1, sizeof(double)); 00448 memcpy(®.high, high1, sizeof(double)); 00449 return reg; 00450 } 00451 00452 static inline XMMReg2Double Ternary(const XMMReg2Double& cond, const XMMReg2Double& true_expr, const XMMReg2Double& false_expr) 00453 { 00454 XMMReg2Double reg; 00455 if( cond.low ) 00456 reg.low = true_expr.low; 00457 else 00458 reg.low = false_expr.low; 00459 if( cond.high ) 00460 reg.high = true_expr.high; 00461 else 00462 reg.high = false_expr.high; 00463 return reg; 00464 } 00465 00466 static inline XMMReg2Double Min(const XMMReg2Double& expr1, const XMMReg2Double& expr2) 00467 { 00468 XMMReg2Double reg; 00469 reg.low = (expr1.low < expr2.low) ? expr1.low : expr2.high; 00470 reg.high = (expr1.high < expr2.high) ? expr1.high : expr2.low; 00471 return reg; 00472 } 00473 00474 static inline XMMReg2Double Load2Val(const double* ptr) 00475 { 00476 XMMReg2Double reg; 00477 reg.nsLoad2Val(ptr); 00478 return reg; 00479 } 00480 00481 static inline XMMReg2Double Load2ValAligned(const double* ptr) 00482 { 00483 XMMReg2Double reg; 00484 reg.nsLoad2ValAligned(ptr); 00485 return reg; 00486 } 00487 00488 static inline XMMReg2Double Load2Val(const float* ptr) 00489 { 00490 XMMReg2Double reg; 00491 reg.nsLoad2Val(ptr); 00492 return reg; 00493 } 00494 00495 static inline XMMReg2Double Load2Val(const unsigned char* ptr) 00496 { 00497 XMMReg2Double reg; 00498 reg.nsLoad2Val(ptr); 00499 return reg; 00500 } 00501 00502 static inline XMMReg2Double Load2Val(const short* ptr) 00503 { 00504 XMMReg2Double reg; 00505 reg.nsLoad2Val(ptr); 00506 return reg; 00507 } 00508 00509 static inline XMMReg2Double Load2Val(const unsigned short* ptr) 00510 { 00511 XMMReg2Double reg; 00512 reg.nsLoad2Val(ptr); 00513 return reg; 00514 } 00515 00516 inline void nsLoad1ValHighAndLow(const double* pval) 00517 { 00518 low = pval[0]; 00519 high = pval[0]; 00520 } 00521 00522 inline void nsLoad2Val(const double* pval) 00523 { 00524 low = pval[0]; 00525 high = pval[1]; 00526 } 00527 00528 inline void nsLoad2ValAligned(const double* pval) 00529 { 00530 low = pval[0]; 00531 high = pval[1]; 00532 } 00533 00534 inline void nsLoad2Val(const float* pval) 00535 { 00536 low = pval[0]; 00537 high = pval[1]; 00538 } 00539 00540 inline void nsLoad2Val(const unsigned char* ptr) 00541 { 00542 low = ptr[0]; 00543 high = ptr[1]; 00544 } 00545 00546 inline void nsLoad2Val(const short* ptr) 00547 { 00548 low = ptr[0]; 00549 high = ptr[1]; 00550 } 00551 00552 inline void nsLoad2Val(const unsigned short* ptr) 00553 { 00554 low = ptr[0]; 00555 high = ptr[1]; 00556 } 00557 00558 static inline void Load4Val(const unsigned char* ptr, XMMReg2Double& low, XMMReg2Double& high) 00559 { 00560 low.low = ptr[0]; 00561 low.high = ptr[1]; 00562 high.low = ptr[2]; 00563 high.high = ptr[3]; 00564 } 00565 00566 static inline void Load4Val(const short* ptr, XMMReg2Double& low, XMMReg2Double& high) 00567 { 00568 low.nsLoad2Val(ptr); 00569 high.nsLoad2Val(ptr+2); 00570 } 00571 00572 static inline void Load4Val(const unsigned short* ptr, XMMReg2Double& low, XMMReg2Double& high) 00573 { 00574 low.nsLoad2Val(ptr); 00575 high.nsLoad2Val(ptr+2); 00576 } 00577 00578 static inline void Load4Val(const double* ptr, XMMReg2Double& low, XMMReg2Double& high) 00579 { 00580 low.nsLoad2Val(ptr); 00581 high.nsLoad2Val(ptr+2); 00582 } 00583 00584 static inline void Load4Val(const float* ptr, XMMReg2Double& low, XMMReg2Double& high) 00585 { 00586 low.nsLoad2Val(ptr); 00587 high.nsLoad2Val(ptr+2); 00588 } 00589 00590 inline void Zeroize() 00591 { 00592 low = 0.0; 00593 high = 0.0; 00594 } 00595 00596 inline XMMReg2Double& operator= (const XMMReg2Double& other) 00597 { 00598 low = other.low; 00599 high = other.high; 00600 return *this; 00601 } 00602 00603 inline XMMReg2Double& operator+= (const XMMReg2Double& other) 00604 { 00605 low += other.low; 00606 high += other.high; 00607 return *this; 00608 } 00609 00610 inline XMMReg2Double& operator*= (const XMMReg2Double& other) 00611 { 00612 low *= other.low; 00613 high *= other.high; 00614 return *this; 00615 } 00616 00617 inline XMMReg2Double operator+ (const XMMReg2Double& other) const 00618 { 00619 XMMReg2Double ret; 00620 ret.low = low + other.low; 00621 ret.high = high + other.high; 00622 return ret; 00623 } 00624 00625 inline XMMReg2Double operator- (const XMMReg2Double& other) const 00626 { 00627 XMMReg2Double ret; 00628 ret.low = low - other.low; 00629 ret.high = high - other.high; 00630 return ret; 00631 } 00632 00633 inline XMMReg2Double operator* (const XMMReg2Double& other) const 00634 { 00635 XMMReg2Double ret; 00636 ret.low = low * other.low; 00637 ret.high = high * other.high; 00638 return ret; 00639 } 00640 00641 inline XMMReg2Double operator/ (const XMMReg2Double& other) const 00642 { 00643 XMMReg2Double ret; 00644 ret.low = low / other.low; 00645 ret.high = high / other.high; 00646 return ret; 00647 } 00648 00649 inline void AddLowAndHigh() 00650 { 00651 double add = low + high; 00652 low = add; 00653 high = add; 00654 } 00655 00656 inline void Store2Double(double* pval) const 00657 { 00658 pval[0] = low; 00659 pval[1] = high; 00660 } 00661 00662 inline void Store2DoubleAligned(double* pval) const 00663 { 00664 pval[0] = low; 00665 pval[1] = high; 00666 } 00667 00668 void Store2Val(unsigned short* ptr) const 00669 { 00670 ptr[0] = (GUInt16)low; 00671 ptr[1] = (GUInt16)high; 00672 } 00673 00674 inline operator double () const 00675 { 00676 return low; 00677 } 00678 }; 00679 00680 #endif /* defined(__x86_64) || defined(_M_X64) */ 00681 00682 class XMMReg4Double 00683 { 00684 public: 00685 XMMReg2Double low, high; 00686 00687 XMMReg4Double() {} 00688 XMMReg4Double(const XMMReg4Double& other) : low(other.low), high(other.high) {} 00689 00690 static inline XMMReg4Double Zero() 00691 { 00692 XMMReg4Double reg; 00693 reg.low.Zeroize(); 00694 reg.high.Zeroize(); 00695 return reg; 00696 } 00697 00698 static inline XMMReg4Double Load1ValHighAndLow(const double* ptr) 00699 { 00700 XMMReg4Double reg; 00701 reg.low.nsLoad1ValHighAndLow(ptr); 00702 reg.high = reg.low; 00703 return reg; 00704 } 00705 00706 static inline XMMReg4Double Load4Val(const unsigned char* ptr) 00707 { 00708 XMMReg4Double reg; 00709 XMMReg2Double::Load4Val(ptr, reg.low, reg.high); 00710 return reg; 00711 } 00712 00713 static inline XMMReg4Double Load4Val(const short* ptr) 00714 { 00715 XMMReg4Double reg; 00716 reg.low.nsLoad2Val(ptr); 00717 reg.high.nsLoad2Val(ptr+2); 00718 return reg; 00719 } 00720 00721 static inline XMMReg4Double Load4Val(const unsigned short* ptr) 00722 { 00723 XMMReg4Double reg; 00724 reg.low.nsLoad2Val(ptr); 00725 reg.high.nsLoad2Val(ptr+2); 00726 return reg; 00727 } 00728 00729 static inline XMMReg4Double Load4Val(const double* ptr) 00730 { 00731 XMMReg4Double reg; 00732 reg.low.nsLoad2Val(ptr); 00733 reg.high.nsLoad2Val(ptr+2); 00734 return reg; 00735 } 00736 00737 static inline XMMReg4Double Load4ValAligned(const double* ptr) 00738 { 00739 XMMReg4Double reg; 00740 reg.low.nsLoad2ValAligned(ptr); 00741 reg.high.nsLoad2ValAligned(ptr+2); 00742 return reg; 00743 } 00744 00745 static inline XMMReg4Double Load4Val(const float* ptr) 00746 { 00747 XMMReg4Double reg; 00748 XMMReg2Double::Load4Val(ptr, reg.low, reg.high); 00749 return reg; 00750 } 00751 00752 static inline XMMReg4Double Equals(const XMMReg4Double& expr1, const XMMReg4Double& expr2) 00753 { 00754 XMMReg4Double reg; 00755 reg.low = XMMReg2Double::Equals(expr1.low, expr2.low); 00756 reg.high = XMMReg2Double::Equals(expr1.high, expr2.high); 00757 return reg; 00758 } 00759 00760 static inline XMMReg4Double NotEquals(const XMMReg4Double& expr1, const XMMReg4Double& expr2) 00761 { 00762 XMMReg4Double reg; 00763 reg.low = XMMReg2Double::NotEquals(expr1.low, expr2.low); 00764 reg.high = XMMReg2Double::NotEquals(expr1.high, expr2.high); 00765 return reg; 00766 } 00767 00768 static inline XMMReg4Double Greater(const XMMReg4Double& expr1, const XMMReg4Double& expr2) 00769 { 00770 XMMReg4Double reg; 00771 reg.low = XMMReg2Double::Greater(expr1.low, expr2.low); 00772 reg.high = XMMReg2Double::Greater(expr1.high, expr2.high); 00773 return reg; 00774 } 00775 00776 static inline XMMReg4Double And(const XMMReg4Double& expr1, const XMMReg4Double& expr2) 00777 { 00778 XMMReg4Double reg; 00779 reg.low = XMMReg2Double::And(expr1.low, expr2.low); 00780 reg.high = XMMReg2Double::And(expr1.high, expr2.high); 00781 return reg; 00782 } 00783 00784 static inline XMMReg4Double Ternary(const XMMReg4Double& cond, const XMMReg4Double& true_expr, const XMMReg4Double& false_expr) 00785 { 00786 XMMReg4Double reg; 00787 reg.low = XMMReg2Double::Ternary(cond.low, true_expr.low, false_expr.low); 00788 reg.high = XMMReg2Double::Ternary(cond.high, true_expr.high, false_expr.high); 00789 return reg; 00790 } 00791 00792 static inline XMMReg4Double Min(const XMMReg4Double& expr1, const XMMReg4Double& expr2) 00793 { 00794 XMMReg4Double reg; 00795 reg.low = XMMReg2Double::Min(expr1.low, expr2.low); 00796 reg.high = XMMReg2Double::Min(expr1.high, expr2.high); 00797 return reg; 00798 } 00799 00800 inline XMMReg4Double& operator= (const XMMReg4Double& other) 00801 { 00802 low = other.low; 00803 high = other.high; 00804 return *this; 00805 } 00806 00807 inline XMMReg4Double& operator+= (const XMMReg4Double& other) 00808 { 00809 low += other.low; 00810 high += other.high; 00811 return *this; 00812 } 00813 00814 inline XMMReg4Double& operator*= (const XMMReg4Double& other) 00815 { 00816 low *= other.low; 00817 high *= other.high; 00818 return *this; 00819 } 00820 00821 inline XMMReg4Double operator+ (const XMMReg4Double& other) const 00822 { 00823 XMMReg4Double ret; 00824 ret.low = low + other.low; 00825 ret.high = high + other.high; 00826 return ret; 00827 } 00828 00829 inline XMMReg4Double operator- (const XMMReg4Double& other) const 00830 { 00831 XMMReg4Double ret; 00832 ret.low = low - other.low; 00833 ret.high = high - other.high; 00834 return ret; 00835 } 00836 00837 inline XMMReg4Double operator* (const XMMReg4Double& other) const 00838 { 00839 XMMReg4Double ret; 00840 ret.low = low * other.low; 00841 ret.high = high * other.high; 00842 return ret; 00843 } 00844 00845 inline XMMReg4Double operator/ (const XMMReg4Double& other) const 00846 { 00847 XMMReg4Double ret; 00848 ret.low = low / other.low; 00849 ret.high = high / other.high; 00850 return ret; 00851 } 00852 00853 inline void AddLowAndHigh() 00854 { 00855 low = low + high; 00856 low.AddLowAndHigh(); 00857 } 00858 00859 inline XMMReg2Double& GetLow() 00860 { 00861 return low; 00862 } 00863 00864 inline XMMReg2Double& GetHigh() 00865 { 00866 return high; 00867 } 00868 00869 void Store4Val(unsigned short* ptr) const 00870 { 00871 low.Store2Val(ptr); 00872 high.Store2Val(ptr+2); 00873 } 00874 }; 00875 00876 #endif /* #ifndef DOXYGEN_SKIP */ 00877 00878 #endif /* GDALSSE_PRIV_H_INCLUDED */
1.7.6.1.