GDAL
gdalsse_priv.h
00001 /******************************************************************************
00002  * $Id: gdalsse_priv.h 34921 2016-08-04 22:26:31Z rouault $
00003  *
00004  * Project:  GDAL
00005  * Purpose:  SSE2 helper
00006  * Author:   Even Rouault <even dot rouault at spatialys dot com>
00007  *
00008  ******************************************************************************
00009  * Copyright (c) 2014, Even Rouault <even dot rouault at spatialys dot com>
00010  *
00011  * Permission is hereby granted, free of charge, to any person obtaining a
00012  * copy of this software and associated documentation files (the "Software"),
00013  * to deal in the Software without restriction, including without limitation
00014  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
00015  * and/or sell copies of the Software, and to permit persons to whom the
00016  * Software is furnished to do so, subject to the following conditions:
00017  *
00018  * The above copyright notice and this permission notice shall be included
00019  * in all copies or substantial portions of the Software.
00020  *
00021  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00022  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00023  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
00024  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00025  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00026  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00027  * DEALINGS IN THE SOFTWARE.
00028  ****************************************************************************/
00029 
00030 #ifndef GDALSSE_PRIV_H_INCLUDED
00031 #define GDALSSE_PRIV_H_INCLUDED
00032 
00033 #ifndef DOXYGEN_SKIP
00034 
00035 #include "cpl_port.h"
00036 
00037 /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
00038 /* Could possibly be used too on 32bit, but we would need to check at runtime */
00039 #if (defined(__x86_64) || defined(_M_X64)) && !defined(USE_SSE2_EMULATION)
00040 
00041 /* Requires SSE2 */
00042 #include <emmintrin.h>
00043 #include <string.h>
00044 
00045 #ifdef __SSE4_1__
00046 #include <smmintrin.h>
00047 #endif
00048 
00049 class XMMReg2Double
00050 {
00051   public:
00052     __m128d xmm;
00053 
00054     /* coverity[uninit_member] */
00055     XMMReg2Double() {}
00056 
00057     XMMReg2Double(double  val)  { xmm = _mm_load_sd (&val); }
00058     XMMReg2Double(const XMMReg2Double& other) : xmm(other.xmm) {}
00059 
00060     static inline XMMReg2Double Zero()
00061     {
00062         XMMReg2Double reg;
00063         reg.Zeroize();
00064         return reg;
00065     }
00066 
00067     static inline XMMReg2Double Load1ValHighAndLow(const double* ptr)
00068     {
00069         XMMReg2Double reg;
00070         reg.nsLoad1ValHighAndLow(ptr);
00071         return reg;
00072     }
00073 
00074     static inline XMMReg2Double Load2Val(const double* ptr)
00075     {
00076         XMMReg2Double reg;
00077         reg.nsLoad2Val(ptr);
00078         return reg;
00079     }
00080 
00081     static inline XMMReg2Double Load2Val(const float* ptr)
00082     {
00083         XMMReg2Double reg;
00084         reg.nsLoad2Val(ptr);
00085         return reg;
00086     }
00087 
00088     static inline XMMReg2Double Load2ValAligned(const double* ptr)
00089     {
00090         XMMReg2Double reg;
00091         reg.nsLoad2ValAligned(ptr);
00092         return reg;
00093     }
00094 
00095     static inline XMMReg2Double Load2Val(const unsigned char* ptr)
00096     {
00097         XMMReg2Double reg;
00098         reg.nsLoad2Val(ptr);
00099         return reg;
00100     }
00101 
00102     static inline XMMReg2Double Load2Val(const short* ptr)
00103     {
00104         XMMReg2Double reg;
00105         reg.nsLoad2Val(ptr);
00106         return reg;
00107     }
00108 
00109     static inline XMMReg2Double Load2Val(const unsigned short* ptr)
00110     {
00111         XMMReg2Double reg;
00112         reg.nsLoad2Val(ptr);
00113         return reg;
00114     }
00115 
00116     static inline XMMReg2Double Equals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00117     {
00118         XMMReg2Double reg;
00119         reg.xmm = _mm_cmpeq_pd(expr1.xmm, expr2.xmm);
00120         return reg;
00121     }
00122 
00123     static inline XMMReg2Double NotEquals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00124     {
00125         XMMReg2Double reg;
00126         reg.xmm = _mm_cmpneq_pd(expr1.xmm, expr2.xmm);
00127         return reg;
00128     }
00129 
00130     static inline XMMReg2Double Greater(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00131     {
00132         XMMReg2Double reg;
00133         reg.xmm = _mm_cmpgt_pd(expr1.xmm, expr2.xmm);
00134         return reg;
00135     }
00136 
00137     static inline XMMReg2Double And(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00138     {
00139         XMMReg2Double reg;
00140         reg.xmm = _mm_and_pd(expr1.xmm, expr2.xmm);
00141         return reg;
00142     }
00143 
00144     static inline XMMReg2Double Ternary(const XMMReg2Double& cond, const XMMReg2Double& true_expr, const XMMReg2Double& false_expr)
00145     {
00146         XMMReg2Double reg;
00147         reg.xmm = _mm_or_pd(_mm_and_pd (cond.xmm, true_expr.xmm), _mm_andnot_pd(cond.xmm, false_expr.xmm));
00148         return reg;
00149     }
00150 
00151     static inline XMMReg2Double Min(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00152     {
00153         XMMReg2Double reg;
00154         reg.xmm = _mm_min_pd(expr1.xmm, expr2.xmm);
00155         return reg;
00156     }
00157 
00158     inline void nsLoad1ValHighAndLow(const double* ptr)
00159     {
00160         xmm =  _mm_load1_pd(ptr);
00161     }
00162 
00163     inline void nsLoad2Val(const double* ptr)
00164     {
00165         xmm = _mm_loadu_pd(ptr);
00166     }
00167 
00168     inline void nsLoad2ValAligned(const double* pval)
00169     {
00170         xmm = _mm_load_pd(pval);
00171     }
00172 
00173     inline void nsLoad2Val(const float* pval)
00174     {
00175         __m128 temp1 = _mm_load_ss(pval);
00176         __m128 temp2 = _mm_load_ss(pval + 1);
00177         temp1 = _mm_shuffle_ps(temp1, temp2, _MM_SHUFFLE(1,0,1,0));
00178         temp1 = _mm_shuffle_ps(temp1, temp1, _MM_SHUFFLE(3,3,2,0));
00179         xmm = _mm_cvtps_pd(temp1);
00180     }
00181 
00182     inline void nsLoad2Val(const unsigned char* ptr)
00183     {
00184 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
00185         unsigned short s;
00186         memcpy(&s, ptr, 2);
00187         __m128i xmm_i = _mm_cvtsi32_si128(s);
00188 #else
00189         __m128i xmm_i = _mm_cvtsi32_si128(*(unsigned short*)(ptr));
00190 #endif
00191 #ifdef __SSE4_1__
00192         xmm_i = _mm_cvtepu8_epi32(xmm_i);
00193 #else
00194         xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
00195         xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
00196 #endif
00197         xmm = _mm_cvtepi32_pd(xmm_i);
00198     }
00199 
00200     inline void nsLoad2Val(const short* ptr)
00201     {
00202         int i;
00203         memcpy(&i, ptr, 4);
00204         __m128i xmm_i = _mm_cvtsi32_si128(i);
00205 #ifdef __SSE4_1__
00206         xmm_i = _mm_cvtepi16_epi32(xmm_i);
00207 #else
00208         xmm_i = _mm_unpacklo_epi16(xmm_i,xmm_i); /* 0|0|0|0|0|0|b|a --> 0|0|0|0|b|b|a|a */
00209         xmm_i = _mm_srai_epi32(xmm_i, 16);       /* 0|0|0|0|b|b|a|a --> 0|0|0|0|sign(b)|b|sign(a)|a */
00210 #endif
00211         xmm = _mm_cvtepi32_pd(xmm_i);
00212     }
00213 
00214     inline void nsLoad2Val(const unsigned short* ptr)
00215     {
00216         int i;
00217         memcpy(&i, ptr, 4);
00218         __m128i xmm_i = _mm_cvtsi32_si128(i);
00219 #ifdef __SSE4_1__
00220         xmm_i = _mm_cvtepu16_epi32(xmm_i);
00221 #else
00222         xmm_i = _mm_unpacklo_epi16(xmm_i,_mm_setzero_si128()); /* 0|0|0|0|0|0|b|a --> 0|0|0|0|0|b|0|a */
00223 #endif
00224         xmm = _mm_cvtepi32_pd(xmm_i);
00225     }
00226 
00227     static inline void Load4Val(const unsigned char* ptr, XMMReg2Double& low, XMMReg2Double& high)
00228     {
00229 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
00230         int i;
00231         memcpy(&i, ptr, 4);
00232         __m128i xmm_i = _mm_cvtsi32_si128(i);
00233 #else
00234         __m128i xmm_i = _mm_cvtsi32_si128(*(int*)(ptr));
00235 #endif
00236 #ifdef __SSE4_1__
00237         xmm_i = _mm_cvtepu8_epi32(xmm_i);
00238 #else
00239         xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
00240         xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
00241 #endif
00242         low.xmm = _mm_cvtepi32_pd(xmm_i);
00243         high.xmm =  _mm_cvtepi32_pd(_mm_shuffle_epi32(xmm_i,_MM_SHUFFLE(3,2,3,2)));
00244     }
00245 
00246     static inline void Load4Val(const short* ptr, XMMReg2Double& low, XMMReg2Double& high)
00247     {
00248         low.nsLoad2Val(ptr);
00249         high.nsLoad2Val(ptr+2);
00250     }
00251 
00252     static inline void Load4Val(const unsigned short* ptr, XMMReg2Double& low, XMMReg2Double& high)
00253     {
00254         low.nsLoad2Val(ptr);
00255         high.nsLoad2Val(ptr+2);
00256     }
00257 
00258     static inline void Load4Val(const double* ptr, XMMReg2Double& low, XMMReg2Double& high)
00259     {
00260         low.nsLoad2Val(ptr);
00261         high.nsLoad2Val(ptr+2);
00262     }
00263 
00264     static inline void Load4Val(const float* ptr, XMMReg2Double& low, XMMReg2Double& high)
00265     {
00266         __m128 temp1 = _mm_loadu_ps(ptr);
00267         __m128 temp2 = _mm_shuffle_ps(temp1, temp1, _MM_SHUFFLE(3,2,3,2));
00268         low.xmm = _mm_cvtps_pd(temp1);
00269         high.xmm = _mm_cvtps_pd(temp2);
00270     }
00271 
00272     inline void Zeroize()
00273     {
00274         xmm = _mm_setzero_pd();
00275     }
00276 
00277     inline XMMReg2Double& operator= (const XMMReg2Double& other)
00278     {
00279         xmm = other.xmm;
00280         return *this;
00281     }
00282 
00283     inline XMMReg2Double& operator+= (const XMMReg2Double& other)
00284     {
00285         xmm = _mm_add_pd(xmm, other.xmm);
00286         return *this;
00287     }
00288 
00289     inline XMMReg2Double& operator*= (const XMMReg2Double& other)
00290     {
00291         xmm = _mm_mul_pd(xmm, other.xmm);
00292         return *this;
00293     }
00294 
00295     inline XMMReg2Double operator+ (const XMMReg2Double& other) const
00296     {
00297         XMMReg2Double ret;
00298         ret.xmm = _mm_add_pd(xmm, other.xmm);
00299         return ret;
00300     }
00301 
00302     inline XMMReg2Double operator- (const XMMReg2Double& other) const
00303     {
00304         XMMReg2Double ret;
00305         ret.xmm = _mm_sub_pd(xmm, other.xmm);
00306         return ret;
00307     }
00308 
00309     inline XMMReg2Double operator* (const XMMReg2Double& other) const
00310     {
00311         XMMReg2Double ret;
00312         ret.xmm = _mm_mul_pd(xmm, other.xmm);
00313         return ret;
00314     }
00315 
00316     inline XMMReg2Double operator/ (const XMMReg2Double& other) const
00317     {
00318         XMMReg2Double ret;
00319         ret.xmm = _mm_div_pd(xmm, other.xmm);
00320         return ret;
00321     }
00322 
00323     inline void AddLowAndHigh()
00324     {
00325         __m128d xmm2;
00326         xmm2 = _mm_shuffle_pd(xmm,xmm,_MM_SHUFFLE2(0,1)); /* transfer high word into low word of xmm2 */
00327         xmm = _mm_add_pd(xmm, xmm2);
00328     }
00329 
00330     inline void Store2Double(double* pval) const
00331     {
00332         _mm_storeu_pd(pval, xmm);
00333     }
00334 
00335     inline void Store2DoubleAligned(double* pval) const
00336     {
00337         _mm_store_pd(pval, xmm);
00338     }
00339 
00340     void Store2Val(unsigned short* ptr) const
00341     {
00342         __m128i tmp = _mm_cvtpd_epi32(xmm); /* Convert the 2 double values to 2 integers */
00343         ptr[0] = (GUInt16)_mm_extract_epi16(tmp, 0);
00344         ptr[1] = (GUInt16)_mm_extract_epi16(tmp, 2);
00345     }
00346 
00347     inline operator double () const
00348     {
00349         double val;
00350         _mm_store_sd(&val, xmm);
00351         return val;
00352     }
00353 };
00354 
00355 #else
00356 
00357 #warning "Software emulation of SSE2 !"
00358 
00359 class XMMReg2Double
00360 {
00361   public:
00362     double low;
00363     double high;
00364 
00365     XMMReg2Double() {}
00366     XMMReg2Double(double  val)  { low = val; high = 0.0; }
00367     XMMReg2Double(const XMMReg2Double& other) : low(other.low), high(other.high) {}
00368 
00369     static inline XMMReg2Double Zero()
00370     {
00371         XMMReg2Double reg;
00372         reg.Zeroize();
00373         return reg;
00374     }
00375 
00376     static inline XMMReg2Double Load1ValHighAndLow(const double* ptr)
00377     {
00378         XMMReg2Double reg;
00379         reg.nsLoad1ValHighAndLow(ptr);
00380         return reg;
00381     }
00382 
00383     static inline XMMReg2Double Equals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00384     {
00385         XMMReg2Double reg;
00386 
00387         if (expr1.low == expr2.low)
00388             memset(&(reg.low), 0xFF, sizeof(double));
00389         else
00390             reg.low = 0;
00391 
00392         if (expr1.high == expr2.high)
00393             memset(&(reg.high), 0xFF, sizeof(double));
00394         else
00395             reg.high = 0;
00396 
00397         return reg;
00398     }
00399 
00400     static inline XMMReg2Double NotEquals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00401     {
00402         XMMReg2Double reg;
00403 
00404         if (expr1.low != expr2.low)
00405             memset(&(reg.low), 0xFF, sizeof(double));
00406         else
00407             reg.low = 0;
00408 
00409         if (expr1.high != expr2.high)
00410             memset(&(reg.high), 0xFF, sizeof(double));
00411         else
00412             reg.high = 0;
00413 
00414         return reg;
00415     }
00416 
00417     static inline XMMReg2Double Greater(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00418     {
00419         XMMReg2Double reg;
00420 
00421         if (expr1.low > expr2.low)
00422             memset(&(reg.low), 0xFF, sizeof(double));
00423         else
00424             reg.low = 0;
00425 
00426         if (expr1.high > expr2.high)
00427             memset(&(reg.high), 0xFF, sizeof(double));
00428         else
00429             reg.high = 0;
00430 
00431         return reg;
00432     }
00433 
00434     static inline XMMReg2Double And(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00435     {
00436         XMMReg2Double reg;
00437         int low1[2], high1[2];
00438         int low2[2], high2[2];
00439         memcpy(low1, &expr1.low, sizeof(double));
00440         memcpy(high1, &expr1.high, sizeof(double));
00441         memcpy(low2, &expr2.low, sizeof(double));
00442         memcpy(high2, &expr2.high, sizeof(double));
00443         low1[0] &= low2[0];
00444         low1[1] &= low2[1];
00445         high1[0] &= high2[0];
00446         high1[1] &= high2[1];
00447         memcpy(&reg.low, low1, sizeof(double));
00448         memcpy(&reg.high, high1, sizeof(double));
00449         return reg;
00450     }
00451 
00452     static inline XMMReg2Double Ternary(const XMMReg2Double& cond, const XMMReg2Double& true_expr, const XMMReg2Double& false_expr)
00453     {
00454         XMMReg2Double reg;
00455         if( cond.low )
00456             reg.low = true_expr.low;
00457         else
00458             reg.low = false_expr.low;
00459         if( cond.high )
00460             reg.high = true_expr.high;
00461         else
00462             reg.high = false_expr.high;
00463         return reg;
00464     }
00465 
00466     static inline XMMReg2Double Min(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
00467     {
00468         XMMReg2Double reg;
00469         reg.low = (expr1.low < expr2.low) ? expr1.low : expr2.high;
00470         reg.high = (expr1.high < expr2.high) ? expr1.high : expr2.low;
00471         return reg;
00472     }
00473 
00474     static inline XMMReg2Double Load2Val(const double* ptr)
00475     {
00476         XMMReg2Double reg;
00477         reg.nsLoad2Val(ptr);
00478         return reg;
00479     }
00480 
00481     static inline XMMReg2Double Load2ValAligned(const double* ptr)
00482     {
00483         XMMReg2Double reg;
00484         reg.nsLoad2ValAligned(ptr);
00485         return reg;
00486     }
00487 
00488     static inline XMMReg2Double Load2Val(const float* ptr)
00489     {
00490         XMMReg2Double reg;
00491         reg.nsLoad2Val(ptr);
00492         return reg;
00493     }
00494 
00495     static inline XMMReg2Double Load2Val(const unsigned char* ptr)
00496     {
00497         XMMReg2Double reg;
00498         reg.nsLoad2Val(ptr);
00499         return reg;
00500     }
00501 
00502     static inline XMMReg2Double Load2Val(const short* ptr)
00503     {
00504         XMMReg2Double reg;
00505         reg.nsLoad2Val(ptr);
00506         return reg;
00507     }
00508 
00509     static inline XMMReg2Double Load2Val(const unsigned short* ptr)
00510     {
00511         XMMReg2Double reg;
00512         reg.nsLoad2Val(ptr);
00513         return reg;
00514     }
00515 
00516     inline void nsLoad1ValHighAndLow(const double* pval)
00517     {
00518         low = pval[0];
00519         high = pval[0];
00520     }
00521 
00522     inline void nsLoad2Val(const double* pval)
00523     {
00524         low = pval[0];
00525         high = pval[1];
00526     }
00527 
00528     inline void nsLoad2ValAligned(const double* pval)
00529     {
00530         low = pval[0];
00531         high = pval[1];
00532     }
00533 
00534     inline void nsLoad2Val(const float* pval)
00535     {
00536         low = pval[0];
00537         high = pval[1];
00538     }
00539 
00540     inline void nsLoad2Val(const unsigned char* ptr)
00541     {
00542         low = ptr[0];
00543         high = ptr[1];
00544     }
00545 
00546     inline void nsLoad2Val(const short* ptr)
00547     {
00548         low = ptr[0];
00549         high = ptr[1];
00550     }
00551 
00552     inline void nsLoad2Val(const unsigned short* ptr)
00553     {
00554         low = ptr[0];
00555         high = ptr[1];
00556     }
00557 
00558     static inline void Load4Val(const unsigned char* ptr, XMMReg2Double& low, XMMReg2Double& high)
00559     {
00560         low.low = ptr[0];
00561         low.high = ptr[1];
00562         high.low = ptr[2];
00563         high.high = ptr[3];
00564     }
00565 
00566     static inline void Load4Val(const short* ptr, XMMReg2Double& low, XMMReg2Double& high)
00567     {
00568         low.nsLoad2Val(ptr);
00569         high.nsLoad2Val(ptr+2);
00570     }
00571 
00572     static inline void Load4Val(const unsigned short* ptr, XMMReg2Double& low, XMMReg2Double& high)
00573     {
00574         low.nsLoad2Val(ptr);
00575         high.nsLoad2Val(ptr+2);
00576     }
00577 
00578     static inline void Load4Val(const double* ptr, XMMReg2Double& low, XMMReg2Double& high)
00579     {
00580         low.nsLoad2Val(ptr);
00581         high.nsLoad2Val(ptr+2);
00582     }
00583 
00584     static inline void Load4Val(const float* ptr, XMMReg2Double& low, XMMReg2Double& high)
00585     {
00586         low.nsLoad2Val(ptr);
00587         high.nsLoad2Val(ptr+2);
00588     }
00589 
00590     inline void Zeroize()
00591     {
00592         low = 0.0;
00593         high = 0.0;
00594     }
00595 
00596     inline XMMReg2Double& operator= (const XMMReg2Double& other)
00597     {
00598         low = other.low;
00599         high = other.high;
00600         return *this;
00601     }
00602 
00603     inline XMMReg2Double& operator+= (const XMMReg2Double& other)
00604     {
00605         low += other.low;
00606         high += other.high;
00607         return *this;
00608     }
00609 
00610     inline XMMReg2Double& operator*= (const XMMReg2Double& other)
00611     {
00612         low *= other.low;
00613         high *= other.high;
00614         return *this;
00615     }
00616 
00617     inline XMMReg2Double operator+ (const XMMReg2Double& other) const
00618     {
00619         XMMReg2Double ret;
00620         ret.low = low + other.low;
00621         ret.high = high + other.high;
00622         return ret;
00623     }
00624 
00625     inline XMMReg2Double operator- (const XMMReg2Double& other) const
00626     {
00627         XMMReg2Double ret;
00628         ret.low = low - other.low;
00629         ret.high = high - other.high;
00630         return ret;
00631     }
00632 
00633     inline XMMReg2Double operator* (const XMMReg2Double& other) const
00634     {
00635         XMMReg2Double ret;
00636         ret.low = low * other.low;
00637         ret.high = high * other.high;
00638         return ret;
00639     }
00640 
00641     inline XMMReg2Double operator/ (const XMMReg2Double& other) const
00642     {
00643         XMMReg2Double ret;
00644         ret.low = low / other.low;
00645         ret.high = high / other.high;
00646         return ret;
00647     }
00648 
00649     inline void AddLowAndHigh()
00650     {
00651         double add = low + high;
00652         low = add;
00653         high = add;
00654     }
00655 
00656     inline void Store2Double(double* pval) const
00657     {
00658         pval[0] = low;
00659         pval[1] = high;
00660     }
00661 
00662     inline void Store2DoubleAligned(double* pval) const
00663     {
00664         pval[0] = low;
00665         pval[1] = high;
00666     }
00667 
00668     void Store2Val(unsigned short* ptr) const
00669     {
00670         ptr[0] = (GUInt16)low;
00671         ptr[1] = (GUInt16)high;
00672     }
00673 
00674     inline operator double () const
00675     {
00676         return low;
00677     }
00678 };
00679 
00680 #endif /*  defined(__x86_64) || defined(_M_X64) */
00681 
00682 class XMMReg4Double
00683 {
00684   public:
00685     XMMReg2Double low, high;
00686 
00687     XMMReg4Double() {}
00688     XMMReg4Double(const XMMReg4Double& other) : low(other.low), high(other.high) {}
00689 
00690     static inline XMMReg4Double Zero()
00691     {
00692         XMMReg4Double reg;
00693         reg.low.Zeroize();
00694         reg.high.Zeroize();
00695         return reg;
00696     }
00697 
00698     static inline XMMReg4Double Load1ValHighAndLow(const double* ptr)
00699     {
00700         XMMReg4Double reg;
00701         reg.low.nsLoad1ValHighAndLow(ptr);
00702         reg.high = reg.low;
00703         return reg;
00704     }
00705 
00706     static inline XMMReg4Double Load4Val(const unsigned char* ptr)
00707     {
00708         XMMReg4Double reg;
00709         XMMReg2Double::Load4Val(ptr, reg.low, reg.high);
00710         return reg;
00711     }
00712 
00713     static inline XMMReg4Double Load4Val(const short* ptr)
00714     {
00715         XMMReg4Double reg;
00716         reg.low.nsLoad2Val(ptr);
00717         reg.high.nsLoad2Val(ptr+2);
00718         return reg;
00719     }
00720 
00721     static inline XMMReg4Double Load4Val(const unsigned short* ptr)
00722     {
00723         XMMReg4Double reg;
00724         reg.low.nsLoad2Val(ptr);
00725         reg.high.nsLoad2Val(ptr+2);
00726         return reg;
00727     }
00728 
00729     static inline XMMReg4Double Load4Val(const double* ptr)
00730     {
00731         XMMReg4Double reg;
00732         reg.low.nsLoad2Val(ptr);
00733         reg.high.nsLoad2Val(ptr+2);
00734         return reg;
00735     }
00736 
00737     static inline XMMReg4Double Load4ValAligned(const double* ptr)
00738     {
00739         XMMReg4Double reg;
00740         reg.low.nsLoad2ValAligned(ptr);
00741         reg.high.nsLoad2ValAligned(ptr+2);
00742         return reg;
00743     }
00744 
00745     static inline XMMReg4Double Load4Val(const float* ptr)
00746     {
00747         XMMReg4Double reg;
00748         XMMReg2Double::Load4Val(ptr, reg.low, reg.high);
00749         return reg;
00750     }
00751 
00752     static inline XMMReg4Double Equals(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
00753     {
00754         XMMReg4Double reg;
00755         reg.low = XMMReg2Double::Equals(expr1.low, expr2.low);
00756         reg.high = XMMReg2Double::Equals(expr1.high, expr2.high);
00757         return reg;
00758     }
00759 
00760     static inline XMMReg4Double NotEquals(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
00761     {
00762         XMMReg4Double reg;
00763         reg.low = XMMReg2Double::NotEquals(expr1.low, expr2.low);
00764         reg.high = XMMReg2Double::NotEquals(expr1.high, expr2.high);
00765         return reg;
00766     }
00767 
00768     static inline XMMReg4Double Greater(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
00769     {
00770         XMMReg4Double reg;
00771         reg.low = XMMReg2Double::Greater(expr1.low, expr2.low);
00772         reg.high = XMMReg2Double::Greater(expr1.high, expr2.high);
00773         return reg;
00774     }
00775 
00776     static inline XMMReg4Double And(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
00777     {
00778         XMMReg4Double reg;
00779         reg.low = XMMReg2Double::And(expr1.low, expr2.low);
00780         reg.high = XMMReg2Double::And(expr1.high, expr2.high);
00781         return reg;
00782     }
00783 
00784     static inline XMMReg4Double Ternary(const XMMReg4Double& cond, const XMMReg4Double& true_expr, const XMMReg4Double& false_expr)
00785     {
00786         XMMReg4Double reg;
00787         reg.low = XMMReg2Double::Ternary(cond.low, true_expr.low, false_expr.low);
00788         reg.high = XMMReg2Double::Ternary(cond.high, true_expr.high, false_expr.high);
00789         return reg;
00790     }
00791 
00792     static inline XMMReg4Double Min(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
00793     {
00794         XMMReg4Double reg;
00795         reg.low = XMMReg2Double::Min(expr1.low, expr2.low);
00796         reg.high = XMMReg2Double::Min(expr1.high, expr2.high);
00797         return reg;
00798     }
00799 
00800     inline XMMReg4Double& operator= (const XMMReg4Double& other)
00801     {
00802         low = other.low;
00803         high = other.high;
00804         return *this;
00805     }
00806 
00807     inline XMMReg4Double& operator+= (const XMMReg4Double& other)
00808     {
00809         low += other.low;
00810         high += other.high;
00811         return *this;
00812     }
00813 
00814     inline XMMReg4Double& operator*= (const XMMReg4Double& other)
00815     {
00816         low *= other.low;
00817         high *= other.high;
00818         return *this;
00819     }
00820 
00821     inline XMMReg4Double operator+ (const XMMReg4Double& other) const
00822     {
00823         XMMReg4Double ret;
00824         ret.low = low + other.low;
00825         ret.high = high + other.high;
00826         return ret;
00827     }
00828 
00829     inline XMMReg4Double operator- (const XMMReg4Double& other) const
00830     {
00831         XMMReg4Double ret;
00832         ret.low = low - other.low;
00833         ret.high = high - other.high;
00834         return ret;
00835     }
00836 
00837     inline XMMReg4Double operator* (const XMMReg4Double& other) const
00838     {
00839         XMMReg4Double ret;
00840         ret.low = low * other.low;
00841         ret.high = high * other.high;
00842         return ret;
00843     }
00844 
00845     inline XMMReg4Double operator/ (const XMMReg4Double& other) const
00846     {
00847         XMMReg4Double ret;
00848         ret.low = low / other.low;
00849         ret.high = high / other.high;
00850         return ret;
00851     }
00852 
00853     inline void AddLowAndHigh()
00854     {
00855         low = low + high;
00856         low.AddLowAndHigh();
00857     }
00858 
00859     inline XMMReg2Double& GetLow()
00860     {
00861         return low;
00862     }
00863 
00864     inline XMMReg2Double& GetHigh()
00865     {
00866         return high;
00867     }
00868 
00869     void Store4Val(unsigned short* ptr) const
00870     {
00871         low.Store2Val(ptr);
00872         high.Store2Val(ptr+2);
00873     }
00874 };
00875 
00876 #endif /* #ifndef DOXYGEN_SKIP */
00877 
00878 #endif /* GDALSSE_PRIV_H_INCLUDED */

Generated for GDAL by doxygen 1.7.6.1.