sqlite/ext/fts5/fts5_varint.c
dan 0ad63e5e0c Fix another problem with corrupt databases in fts5.
FossilOrigin-Name: 9bf811ea89a133d481a86407683a7518e909eccb7aa3aa02f7932bdb33387572
2019-01-11 19:27:55 +00:00

345 lines
6.9 KiB
C

/*
** 2015 May 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Routines for varint serialization and deserialization.
*/
#include "fts5Int.h"
/*
** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
** Except, this version does handle the single byte case that the core
** version depends on being handled before its function is called.
*/
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
u32 a,b;
/* The 1-byte case. Overwhelmingly the most common. */
a = *p;
/* a: p0 (unmasked) */
if (!(a&0x80))
{
/* Values between 0 and 127 */
*v = a;
return 1;
}
/* The 2-byte case */
p++;
b = *p;
/* b: p1 (unmasked) */
if (!(b&0x80))
{
/* Values between 128 and 16383 */
a &= 0x7f;
a = a<<7;
*v = a | b;
return 2;
}
/* The 3-byte case */
p++;
a = a<<14;
a |= *p;
/* a: p0<<14 | p2 (unmasked) */
if (!(a&0x80))
{
/* Values between 16384 and 2097151 */
a &= (0x7f<<14)|(0x7f);
b &= 0x7f;
b = b<<7;
*v = a | b;
return 3;
}
/* A 32-bit varint is used to store size information in btrees.
** Objects are rarely larger than 2MiB limit of a 3-byte varint.
** A 3-byte varint is sufficient, for example, to record the size
** of a 1048569-byte BLOB or string.
**
** We only unroll the first 1-, 2-, and 3- byte cases. The very
** rare larger cases can be handled by the slower 64-bit varint
** routine.
*/
{
u64 v64;
u8 n;
p -= 2;
n = sqlite3Fts5GetVarint(p, &v64);
*v = ((u32)v64) & 0x7FFFFFFF;
assert( n>3 && n<=9 );
return n;
}
}
/*
** Bitmasks used by sqlite3GetVarint(). These precomputed constants
** are defined here rather than simply putting the constant expressions
** inline in order to work around bugs in the RVT compiler.
**
** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
**
** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
*/
#define SLOT_2_0 0x001fc07f
#define SLOT_4_2_0 0xf01fc07f
/*
** Read a 64-bit variable-length integer from memory starting at p[0].
** Return the number of bytes read. The value is stored in *v.
*/
u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
u32 a,b,s;
a = *p;
/* a: p0 (unmasked) */
if (!(a&0x80))
{
*v = a;
return 1;
}
p++;
b = *p;
/* b: p1 (unmasked) */
if (!(b&0x80))
{
a &= 0x7f;
a = a<<7;
a |= b;
*v = a;
return 2;
}
/* Verify that constants are precomputed correctly */
assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
p++;
a = a<<14;
a |= *p;
/* a: p0<<14 | p2 (unmasked) */
if (!(a&0x80))
{
a &= SLOT_2_0;
b &= 0x7f;
b = b<<7;
a |= b;
*v = a;
return 3;
}
/* CSE1 from below */
a &= SLOT_2_0;
p++;
b = b<<14;
b |= *p;
/* b: p1<<14 | p3 (unmasked) */
if (!(b&0x80))
{
b &= SLOT_2_0;
/* moved CSE1 up */
/* a &= (0x7f<<14)|(0x7f); */
a = a<<7;
a |= b;
*v = a;
return 4;
}
/* a: p0<<14 | p2 (masked) */
/* b: p1<<14 | p3 (unmasked) */
/* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
/* moved CSE1 up */
/* a &= (0x7f<<14)|(0x7f); */
b &= SLOT_2_0;
s = a;
/* s: p0<<14 | p2 (masked) */
p++;
a = a<<14;
a |= *p;
/* a: p0<<28 | p2<<14 | p4 (unmasked) */
if (!(a&0x80))
{
/* we can skip these cause they were (effectively) done above in calc'ing s */
/* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
/* b &= (0x7f<<14)|(0x7f); */
b = b<<7;
a |= b;
s = s>>18;
*v = ((u64)s)<<32 | a;
return 5;
}
/* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
s = s<<7;
s |= b;
/* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
p++;
b = b<<14;
b |= *p;
/* b: p1<<28 | p3<<14 | p5 (unmasked) */
if (!(b&0x80))
{
/* we can skip this cause it was (effectively) done above in calc'ing s */
/* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
a &= SLOT_2_0;
a = a<<7;
a |= b;
s = s>>18;
*v = ((u64)s)<<32 | a;
return 6;
}
p++;
a = a<<14;
a |= *p;
/* a: p2<<28 | p4<<14 | p6 (unmasked) */
if (!(a&0x80))
{
a &= SLOT_4_2_0;
b &= SLOT_2_0;
b = b<<7;
a |= b;
s = s>>11;
*v = ((u64)s)<<32 | a;
return 7;
}
/* CSE2 from below */
a &= SLOT_2_0;
p++;
b = b<<14;
b |= *p;
/* b: p3<<28 | p5<<14 | p7 (unmasked) */
if (!(b&0x80))
{
b &= SLOT_4_2_0;
/* moved CSE2 up */
/* a &= (0x7f<<14)|(0x7f); */
a = a<<7;
a |= b;
s = s>>4;
*v = ((u64)s)<<32 | a;
return 8;
}
p++;
a = a<<15;
a |= *p;
/* a: p4<<29 | p6<<15 | p8 (unmasked) */
/* moved CSE2 up */
/* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
b &= SLOT_2_0;
b = b<<8;
a |= b;
s = s<<4;
b = p[-4];
b &= 0x7f;
b = b>>3;
s |= b;
*v = ((u64)s)<<32 | a;
return 9;
}
/*
** The variable-length integer encoding is as follows:
**
** KEY:
** A = 0xxxxxxx 7 bits of data and one flag bit
** B = 1xxxxxxx 7 bits of data and one flag bit
** C = xxxxxxxx 8 bits of data
**
** 7 bits - A
** 14 bits - BA
** 21 bits - BBA
** 28 bits - BBBA
** 35 bits - BBBBA
** 42 bits - BBBBBA
** 49 bits - BBBBBBA
** 56 bits - BBBBBBBA
** 64 bits - BBBBBBBBC
*/
#ifdef SQLITE_NOINLINE
# define FTS5_NOINLINE SQLITE_NOINLINE
#else
# define FTS5_NOINLINE
#endif
/*
** Write a 64-bit variable-length integer to memory starting at p[0].
** The length of data write will be between 1 and 9 bytes. The number
** of bytes written is returned.
**
** A variable-length integer consists of the lower 7 bits of each byte
** for all bytes that have the 8th bit set and one byte with the 8th
** bit clear. Except, if we get to the 9th byte, it stores the full
** 8 bits and is the last byte.
*/
static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
int i, j, n;
u8 buf[10];
if( v & (((u64)0xff000000)<<32) ){
p[8] = (u8)v;
v >>= 8;
for(i=7; i>=0; i--){
p[i] = (u8)((v & 0x7f) | 0x80);
v >>= 7;
}
return 9;
}
n = 0;
do{
buf[n++] = (u8)((v & 0x7f) | 0x80);
v >>= 7;
}while( v!=0 );
buf[0] &= 0x7f;
assert( n<=9 );
for(i=0, j=n-1; j>=0; j--, i++){
p[i] = buf[j];
}
return n;
}
int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
if( v<=0x7f ){
p[0] = v&0x7f;
return 1;
}
if( v<=0x3fff ){
p[0] = ((v>>7)&0x7f)|0x80;
p[1] = v&0x7f;
return 2;
}
return fts5PutVarint64(p,v);
}
int sqlite3Fts5GetVarintLen(u32 iVal){
#if 0
if( iVal<(1 << 7 ) ) return 1;
#endif
assert( iVal>=(1 << 7) );
if( iVal<(1 << 14) ) return 2;
if( iVal<(1 << 21) ) return 3;
if( iVal<(1 << 28) ) return 4;
return 5;
}