Use compiler intrinsic functions (when available) for byteswapping in RTREE.

FossilOrigin-Name: 82fcd54a5941c20895ffc22d8009c1ebdae44eda
This commit is contained in:
drh 2017-02-01 15:24:32 +00:00
parent 1650fcb1b5
commit 03626e3812
3 changed files with 131 additions and 25 deletions

View File

@ -362,6 +362,65 @@ struct RtreeMatchArg {
# define MIN(x,y) ((x) > (y) ? (y) : (x))
#endif
/* What version of GCC is being used. 0 means GCC is not being used */
#ifndef GCC_VERSION
#ifdef __GNUC__
# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
#else
# define GCC_VERSION 0
#endif
#endif
/* What version of CLANG is being used. 0 means CLANG is not being used */
#ifndef CLANG_VERSION
#if defined(__clang__) && !defined(_WIN32)
# define CLANG_VERSION \
(__clang_major__*1000000+__clang_minor__*1000+__clang_patchlevel__)
#else
# define CLANG_VERSION 0
#endif
#endif
/* The testcase() macro should already be defined in the amalgamation. If
** it is not, make it a no-op.
*/
#ifndef SQLITE_AMALGMATION
# define testcase(X)
#endif
/*
** Macros to determine whether the machine is big or little endian,
** and whether or not that determination is run-time or compile-time.
**
** For best performance, an attempt is made to guess at the byte-order
** using C-preprocessor macros. If that is unsuccessful, or if
** -DSQLITE_RUNTIME_BYTEORDER=1 is set, then byte-order is determined
** at run-time.
*/
#ifndef SQLITE_BYTEORDER
#if (defined(i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \
defined(_M_AMD64) || defined(_M_ARM) || defined(__x86) || \
defined(__arm__)) && !defined(SQLITE_RUNTIME_BYTEORDER)
# define SQLITE_BYTEORDER 1234
#endif
#if (defined(sparc) || defined(__ppc__)) \
&& !defined(SQLITE_RUNTIME_BYTEORDER)
# define SQLITE_BYTEORDER 4321
#endif
# define SQLITE_BYTEORDER 0 /* 0 means "unknown at compile-time" */
#endif
/* What version of MSVC is being used. 0 means MSVC is not being used */
#ifndef MSVC_VERSION
#if defined(_MSC_VER)
# define MSVC_VERSION _MSC_VER
#else
# define MSVC_VERSION 0
#endif
#endif
/*
** Functions to deserialize a 16 bit integer, 32 bit real number and
** 64 bit integer. The deserialized value is returned.
@ -370,12 +429,16 @@ static int readInt16(u8 *p){
return (p[0]<<8) + p[1];
}
static void readCoord(u8 *p, RtreeCoord *pCoord){
#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234
memcpy(&pCoord->u, p, 4);
assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */
#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
pCoord->u = _byteswap_ulong(*(u32*)p);
#elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
pCoord->u = __builtin_bswap32(*(u32*)p);
#elif SQLITE_BYTEORDER==1234
pCoord->u = ((pCoord->u>>24)&0xff)|((pCoord->u>>8)&0xff00)|
((pCoord->u&0xff)<<24)|((pCoord->u&0xff00)<<8);
#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321
memcpy(&pCoord->u, p, 4);
#elif SQLITE_BYTEORDER==4321
pCoord->u = *(u32*)p;
#else
pCoord->u = (
(((u32)p[0]) << 24) +
@ -386,6 +449,20 @@ static void readCoord(u8 *p, RtreeCoord *pCoord){
#endif
}
static i64 readInt64(u8 *p){
testcase( ((((char*)p) - (char*)0)&7)!=0 ); /* not always 8-byte aligned */
#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
u64 x;
memcpy(&x, p, 8);
return (i64)_byteswap_uint64(x);
#elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
u64 x;
memcpy(&x, p, 8);
return (i64)__builtin_bswap64(x);
#elif SQLITE_BYTEORDER==4321
i64 x;
memcpy(&x, p, 8);
return x;
#else
return (
(((i64)p[0]) << 56) +
(((i64)p[1]) << 48) +
@ -396,6 +473,7 @@ static i64 readInt64(u8 *p){
(((i64)p[6]) << 8) +
(((i64)p[7]) << 0)
);
#endif
}
/*
@ -410,16 +488,38 @@ static int writeInt16(u8 *p, int i){
}
static int writeCoord(u8 *p, RtreeCoord *pCoord){
u32 i;
assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */
assert( sizeof(RtreeCoord)==4 );
assert( sizeof(u32)==4 );
#if SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
i = __builtin_bswap32(pCoord->u);
memcpy(p, &i, 4);
#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
i = _byteswap_ulong(pCoord->u);
memcpy(p, &i, 4);
#elif SQLITE_BYTEORDER==4321
i = pCoord->u;
memcpy(p, &i, 4);
#else
i = pCoord->u;
p[0] = (i>>24)&0xFF;
p[1] = (i>>16)&0xFF;
p[2] = (i>> 8)&0xFF;
p[3] = (i>> 0)&0xFF;
#endif
return 4;
}
static int writeInt64(u8 *p, i64 i){
testcase( ((((char*)p) - (char*)0)&7)!=0 ); /* Not always 8-byte aligned */
#if SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
i = (i64)__builtin_bswap64((u64)i);
memcpy(p, &i, 8);
#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
i = (i64)_byteswap_uint64((u64)i);
memcpy(p, &i, 8);
#elif SQLITE_BYTEORDER==4321
memcpy(p, &i, 8);
#else
p[0] = (i>>56)&0xFF;
p[1] = (i>>48)&0xFF;
p[2] = (i>>40)&0xFF;
@ -428,6 +528,7 @@ static int writeInt64(u8 *p, i64 i){
p[5] = (i>>16)&0xFF;
p[6] = (i>> 8)&0xFF;
p[7] = (i>> 0)&0xFF;
#endif
return 8;
}
@ -754,11 +855,6 @@ static void nodeGetCell(
pData += 8;
ii += 2;
}while( ii<pRtree->nDim*2 );
#if 0
for(ii=0; ii<pRtree->nDim*2; ii++){
readCoord(&pData[ii*4], &pCoord[ii]);
}
#endif
}
@ -927,15 +1023,22 @@ static int rtreeEof(sqlite3_vtab_cursor *cur){
** false. a[] is the four bytes of the on-disk record to be decoded.
** Store the results in "r".
**
** There are three versions of this macro, one each for little-endian and
** big-endian processors and a third generic implementation. The endian-
** specific implementations are much faster and are preferred if the
** processor endianness is known at compile-time. The SQLITE_BYTEORDER
** macro is part of sqliteInt.h and hence the endian-specific
** implementation will only be used if this module is compiled as part
** of the amalgamation.
** There are five versions of this macro. The last one is generic. The
** other four are various architectures-specific optimizations.
*/
#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234
#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
#define RTREE_DECODE_COORD(eInt, a, r) { \
RtreeCoord c; /* Coordinate decoded */ \
c.u = _byteswap_ulong(*(u32*)a); \
r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
}
#elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000)
#define RTREE_DECODE_COORD(eInt, a, r) { \
RtreeCoord c; /* Coordinate decoded */ \
c.u = __builtin_bswap32(*(u32*)a); \
r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
}
#elif SQLITE_BYTEORDER==1234
#define RTREE_DECODE_COORD(eInt, a, r) { \
RtreeCoord c; /* Coordinate decoded */ \
memcpy(&c.u,a,4); \
@ -943,7 +1046,7 @@ static int rtreeEof(sqlite3_vtab_cursor *cur){
((c.u&0xff)<<24)|((c.u&0xff00)<<8); \
r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
}
#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321
#elif SQLITE_BYTEORDER==4321
#define RTREE_DECODE_COORD(eInt, a, r) { \
RtreeCoord c; /* Coordinate decoded */ \
memcpy(&c.u,a,4); \
@ -986,6 +1089,7 @@ static int rtreeCallbackConstraint(
i = 0;
do{
pCellData += 8;
assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */
RTREE_DECODE_COORD(eInt, pCellData, aCoord[i]);
RTREE_DECODE_COORD(eInt, (pCellData+4), aCoord[i+1]);
i+= 2;
@ -1029,6 +1133,7 @@ static void rtreeNonleafConstraint(
assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE
|| p->op==RTREE_GT || p->op==RTREE_EQ );
assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */
switch( p->op ){
case RTREE_LE:
case RTREE_LT:
@ -1069,6 +1174,7 @@ static void rtreeLeafConstraint(
assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE
|| p->op==RTREE_GT || p->op==RTREE_EQ );
pCellData += 8 + p->iCoord*4;
assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */
RTREE_DECODE_COORD(eInt, pCellData, xN);
switch( p->op ){
case RTREE_LE: if( xN <= p->u.rValue ) return; break;

View File

@ -1,5 +1,5 @@
C Fix\sthe\sbuild\sby\smaking\sthe\sOPFLAG_ISNOOP\smacro\savailable\sunconditionally.
D 2017-02-01T15:19:29.202
C Use\scompiler\sintrinsic\sfunctions\s(when\savailable)\sfor\sbyteswapping\sin\sRTREE.
D 2017-02-01T15:24:32.835
F Makefile.in 5f415e7867296d678fed2e6779aea10c1318b4bc
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
F Makefile.msc b8ca53350ae545e3562403d5da2a69cec79308da
@ -264,7 +264,7 @@ F ext/rbu/sqlite3rbu.c bb0de6cdbdb14a7d55a097238a434b7e99caf318
F ext/rbu/sqlite3rbu.h 6fb6294c34a9ca93b5894a33bca530c6f08decba
F ext/rbu/test_rbu.c 5aa22616afac6f71ebd3d9bc9bf1006cfabcca88
F ext/rtree/README 6315c0d73ebf0ec40dedb5aa0e942bc8b54e3761
F ext/rtree/rtree.c f66b3d232ea98285548107caaac55f110a0b6709
F ext/rtree/rtree.c d8ef14e964a9390197ba7e511aa47b89dc39416c
F ext/rtree/rtree.h 834dbcb82dc85b2481cde6a07cdadfddc99e9b9e
F ext/rtree/rtree1.test 42dadfc7b44a436cd74a1bebc0b9b689e4eaf7ec
F ext/rtree/rtree2.test acbb3a4ce0f4fbc2c304d2b4b784cfa161856bba
@ -1552,7 +1552,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 7f8570208c06c056d426e9299d9930181a0464f8
R bbb1f490f7311ec1c087c3ef4607e5e0
P 510933cb24c5bf883265af3a6075e60a4b5ffa37
R 13778722a0ec2443f74339612adbbcf9
U drh
Z 645035379b970696e5aed344f8a4be19
Z b1e5b219635e474580d8132f194fff51

View File

@ -1 +1 @@
510933cb24c5bf883265af3a6075e60a4b5ffa37
82fcd54a5941c20895ffc22d8009c1ebdae44eda