Documentation of the new pager locking mechanism. (CVS 1570)
FossilOrigin-Name: 13cf1ba8256bf8cee0195dbaeac71a20cd2c2fc2
This commit is contained in:
parent
ce2663ccc8
commit
52619dfb7f
4
main.mk
4
main.mk
@ -419,6 +419,9 @@ index.html: $(TOP)/www/index.tcl last_change
|
||||
lang.html: $(TOP)/www/lang.tcl
|
||||
tclsh $(TOP)/www/lang.tcl >lang.html
|
||||
|
||||
lockingv3.html: $(TOP)/www/lockingv3.tcl
|
||||
tclsh $(TOP)/www/lockingv3.tcl >lockingv3.html
|
||||
|
||||
omitted.html: $(TOP)/www/omitted.tcl
|
||||
tclsh $(TOP)/www/omitted.tcl >omitted.html
|
||||
|
||||
@ -475,6 +478,7 @@ DOC = \
|
||||
formatchng.html \
|
||||
index.html \
|
||||
lang.html \
|
||||
lockingv3.html \
|
||||
mingw.html \
|
||||
nulls.html \
|
||||
omitted.html \
|
||||
|
21
manifest
21
manifest
@ -1,5 +1,5 @@
|
||||
C Have\sthe\svdbe\saggregator\suse\sa\sbtree\stable\sinstead\sof\sa\shash\stable.\s(CVS\s1569)
|
||||
D 2004-06-11T13:19:21
|
||||
C Documentation\sof\sthe\snew\spager\slocking\smechanism.\s(CVS\s1570)
|
||||
D 2004-06-11T17:48:03
|
||||
F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a
|
||||
F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457
|
||||
F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd
|
||||
@ -17,7 +17,7 @@ F doc/lemon.html f0f682f50210928c07e562621c3b7e8ab912a538
|
||||
F doc/report1.txt a031aaf37b185e4fa540223cb516d3bccec7eeac
|
||||
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
|
||||
F ltmain.sh f6b283068efa69f06eb8aa1fe4bddfdbdeb35826
|
||||
F main.mk 126f1ca2190fa19cc9944f54943cf3431891c330
|
||||
F main.mk e02f35c8354fe9a0ab48fc87b0bcb3fb7a86d713
|
||||
F mkdll.sh 68d34a961a1fdfa15ef27fc4f4740be583112124
|
||||
F publish.sh 2e579b7474d57b6debcead93c73a49eb8cb81718
|
||||
F spec.template a38492f1c1dd349fc24cb0565e08afc53045304b
|
||||
@ -56,7 +56,7 @@ F src/printf.c 63b15f1ea9fe3daa066bb7430fd20d4a2d717dc8
|
||||
F src/random.c eff68e3f257e05e81eae6c4d50a51eb88beb4ff3
|
||||
F src/select.c d29488f86e61e0d45dff318e1f04ba6a7e5782d0
|
||||
F src/shell.c ca519519dcbbc582f6d88f7d0e7583b857fd3469
|
||||
F src/sqlite.h.in 2b6afe1de6935d3dfbd6042f46a62f1b7c3b3992
|
||||
F src/sqlite.h.in 56c53344a6fbd76328d641d9ddab90d4e4ba9129
|
||||
F src/sqliteInt.h 625faf4c9ce2f99b9c85a2bca5c4e73736c30262
|
||||
F src/table.c af14284fa36c8d41f6829e3f2819dce07d3e2de2
|
||||
F src/tclsqlite.c e974c0b2479ed37334aeb268de331e0a1b21b5a8
|
||||
@ -194,7 +194,7 @@ F www/arch2.gif 49c6bb36160f03ca2b89eaa5bfb1f560c7d68ee7
|
||||
F www/audit.tcl 90e09d580f79c7efec0c7d6f447b7ec5c2dce5c0
|
||||
F www/c_interface.tcl 2176519fc2bd2d2cf6fe74fd806fc2d8362de2c8
|
||||
F www/capi3.tcl a940c5ca0b0ebafd5acfe2ceca5a388bd8cfb468
|
||||
F www/capi3ref.tcl 4248a45a0fd21fe0c116326300408d0999a028b5
|
||||
F www/capi3ref.tcl 2cb0861c5219c6e44298c217a6c904464b4403bc
|
||||
F www/changes.tcl cbe942607b2b9e5dc995395f37042dbb5a629c7f
|
||||
F www/common.tcl f786e6be86fb2627ceb30e770e9efa83b9c67a3a
|
||||
F www/conflict.tcl fb8a2ba83746c7fdfd9e52fa7f6aaf5c422b8246
|
||||
@ -203,7 +203,7 @@ F www/copyright-release.pdf cfca3558fc97095e57c6117d08f1f5b80d95125a
|
||||
F www/copyright.tcl 82c9670c7ddb0311912ab7fe24703f33c531066c
|
||||
F www/datatype3.tcl f48b05cafd5e54ae5c05e643169d5217ee51a244
|
||||
F www/datatypes.tcl 566004b81c36877397ddbe6e1907aae6065f6b41
|
||||
F www/docs.tcl 0dcbf954907bd5dbfb7f1e0220f4e50516e07cd3
|
||||
F www/docs.tcl a924a5043973a70c58b65abf1051ed8b1238864d
|
||||
F www/download.tcl 8c84f15695c92cb01486930055fdf5192995f474
|
||||
F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c
|
||||
F www/faq.tcl 3a1776818d9bd973ab0c3048ec7ad6b1ad091ae5
|
||||
@ -211,6 +211,7 @@ F www/fileformat.tcl f71a06a0d533c7df408539c64113b4adeaf29764
|
||||
F www/formatchng.tcl 7ed8a5c871ab105f01e5defe1822ec39f70675bb
|
||||
F www/index.tcl 64435502af780d7cd813365d443b9b9344662ce4
|
||||
F www/lang.tcl fc528581c4a406cabc138e2f17db5ef6f38615ff
|
||||
F www/lockingv3.tcl 1449cf3ff0249d6ed44e06b05244b423693d6265
|
||||
F www/mingw.tcl d96b451568c5d28545fefe0c80bee3431c73f69c
|
||||
F www/nulls.tcl f31330db8c978e675f5cd263067b32b822effa6f
|
||||
F www/omitted.tcl 7bd62b6f0f53b60c5360895b16b3af8407bbca03
|
||||
@ -222,7 +223,7 @@ F www/support.tcl 1801397edd271cc39a2aadd54e701184b5181248
|
||||
F www/tclsqlite.tcl 19191cf2a1010eaeff74c51d83fd5f5a4d899075
|
||||
F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9
|
||||
F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4
|
||||
P 66835ee67051027456a536e33b2f88a741654525
|
||||
R 17851c4c7a9d880622be1c17f4d88abf
|
||||
U danielk1977
|
||||
Z 37fd25919977ced5eda4bf17b983a171
|
||||
P 8d56118f64dbaf8c8006266fa7026f900a4a16bd
|
||||
R 452a2adcbc4985a31c30bc500c6b52fd
|
||||
U drh
|
||||
Z 469b4dfcfa6222fd78598800788c6259
|
||||
|
@ -1 +1 @@
|
||||
8d56118f64dbaf8c8006266fa7026f900a4a16bd
|
||||
13cf1ba8256bf8cee0195dbaeac71a20cd2c2fc2
|
@ -12,7 +12,7 @@
|
||||
** This header file defines the interface that the SQLite library
|
||||
** presents to client programs.
|
||||
**
|
||||
** @(#) $Id: sqlite.h.in,v 1.97 2004/06/10 10:50:30 danielk1977 Exp $
|
||||
** @(#) $Id: sqlite.h.in,v 1.98 2004/06/11 17:48:03 drh Exp $
|
||||
*/
|
||||
#ifndef _SQLITE_H_
|
||||
#define _SQLITE_H_
|
||||
@ -963,9 +963,15 @@ void sqlite3_result_text(sqlite3_context*, const char*, int n, int eCopy);
|
||||
void sqlite3_result_text16(sqlite3_context*, const void*, int n, int eCopy);
|
||||
void sqlite3_result_value(sqlite3_context*, sqlite3_value*);
|
||||
|
||||
/*
|
||||
** These are the allowed values for the eTextRep argument to
|
||||
** sqlite3_create_collation and sqlite3_create_function.
|
||||
*/
|
||||
#define SQLITE_UTF8 1
|
||||
#define SQLITE_UTF16LE 2
|
||||
#define SQLITE_UTF16BE 3
|
||||
#define SQLITE_UTF16 2 /* Use native byte order */
|
||||
#define SQLITE_UTF16LE 3
|
||||
#define SQLITE_UTF16BE 4
|
||||
#define SQLITE_ANY 5 /* sqlite3_create_function only */
|
||||
|
||||
/*
|
||||
** These two functions are used to add new collation sequences to the
|
||||
|
143
www/capi3ref.tcl
143
www/capi3ref.tcl
@ -1,4 +1,4 @@
|
||||
set rcsid {$Id: capi3ref.tcl,v 1.1 2004/06/01 01:22:38 drh Exp $}
|
||||
set rcsid {$Id: capi3ref.tcl,v 1.2 2004/06/11 17:48:04 drh Exp $}
|
||||
source common.tcl
|
||||
header {C/C++ Interface For SQLite Version 3}
|
||||
puts {
|
||||
@ -69,13 +69,15 @@ api {} {
|
||||
}
|
||||
|
||||
api {} {
|
||||
int sqlite3_bind_blob(sqlite3_stmt*, int, const void*, int n, int eCopy);
|
||||
int sqlite3_bind_blob(sqlite3_stmt*, int, const void*, int n, void(*)(void*));
|
||||
int sqlite3_bind_double(sqlite3_stmt*, int, double);
|
||||
int sqlite3_bind_int(sqlite3_stmt*, int, int);
|
||||
int sqlite3_bind_int64(sqlite3_stmt*, int, long long int);
|
||||
int sqlite3_bind_null(sqlite3_stmt*, int);
|
||||
int sqlite3_bind_text(sqlite3_stmt*, int, const char*, int n, int eCopy);
|
||||
int sqlite3_bind_text16(sqlite3_stmt*, int, const void*, int n, int eCopy);
|
||||
int sqlite3_bind_text(sqlite3_stmt*, int, const char*, int n, void(*)(void*));
|
||||
int sqlite3_bind_text16(sqlite3_stmt*, int, const void*, int n, void(*)(void*));
|
||||
#define SQLITE_STATIC ((void*)0)
|
||||
#define SQLITE_EPHEMERAL ((void*)8)
|
||||
} {
|
||||
In the SQL strings input to sqlite3_prepare() and sqlite3_prepare16(),
|
||||
one or more literals can be replace by a wildcard "?" or ":N:" where
|
||||
@ -87,10 +89,13 @@ api {} {
|
||||
index of the wildcard. The first "?" has an index of 1. ":N:" wildcards
|
||||
use the index N.
|
||||
|
||||
When the eCopy parameter is true, a copy of the value is made into
|
||||
memory obtained and managed by SQLite. When eCopy is false, SQLite
|
||||
assumes that the value is a constant and just stores a pointer to the
|
||||
value without making a copy.
|
||||
The fifth parameter to sqlite3_bind_blob(), sqlite3_bind_text(), and
|
||||
sqlite3_bind_text16() is a destructor used to dispose of the BLOB or
|
||||
text after SQLite has finished with it. If the fifth argument is the
|
||||
special value SQLITE_STATIC, then the library assumes that the information
|
||||
is in static, unmanaged space and does not need to be freed. If the
|
||||
fifth argument has the value SQLITE_EPHEMERAL, then SQLite makes its
|
||||
on private copy of the data.
|
||||
|
||||
The sqlite3_bind_*() routine must be called after
|
||||
sqlite3_prepare() or sqlite3_reset() and before sqlite3_step().
|
||||
@ -99,16 +104,16 @@ api {} {
|
||||
}
|
||||
|
||||
api {} {
|
||||
void sqlite3_busy_handler(sqlite*, int(*)(void*,const char*,int), void*);
|
||||
void sqlite3_busy_handler(sqlite*, int(*)(void*,int), void*);
|
||||
} {
|
||||
This routine identifies a callback function that is invoked
|
||||
whenever an attempt is made to open a database table that is
|
||||
currently locked by another process or thread. If the busy callback
|
||||
is NULL, then sqlite3_exec() returns SQLITE_BUSY immediately if
|
||||
it finds a locked table. If the busy callback is not NULL, then
|
||||
sqlite3_exec() invokes the callback with three arguments. The
|
||||
second argument is the name of the locked table and the third
|
||||
argument is the number of times the table has been busy. If the
|
||||
sqlite3_exec() invokes the callback with two arguments. The
|
||||
second argument is the number of prior calls to the busy callback
|
||||
for the same lock. If the
|
||||
busy callback returns 0, then sqlite3_exec() immediately returns
|
||||
SQLITE_BUSY. If the callback returns non-zero, then sqlite3_exec()
|
||||
tries to open the table again and the cycle repeats.
|
||||
@ -309,6 +314,90 @@ int sqlite3_complete16(const void *sql);
|
||||
false.
|
||||
} {}
|
||||
|
||||
api {} {
|
||||
int sqlite3_create_collation(
|
||||
sqlite3*,
|
||||
const char *zName,
|
||||
int pref16,
|
||||
void*,
|
||||
int(*xCompare)(void*,int,const void*,int,const void*)
|
||||
);
|
||||
int sqlite3_create_collation16(
|
||||
sqlite3*,
|
||||
const char *zName,
|
||||
int pref16,
|
||||
void*,
|
||||
int(*xCompare)(void*,int,const void*,int,const void*)
|
||||
);
|
||||
#define SQLITE_UTF8 1
|
||||
#define SQLITE_UTF16 2
|
||||
#define SQLITE_UTF16BE 3
|
||||
#define SQLITE_UTF16LE 4
|
||||
} {
|
||||
These two functions are used to add new collation sequences to the
|
||||
sqlite3 handle specified as the first argument.
|
||||
|
||||
The name of the new collation sequence is specified as a UTF-8 string
|
||||
for sqlite3_create_collation() and a UTF-16 string for
|
||||
sqlite3_create_collation16(). In both cases the name is passed as the
|
||||
second function argument.
|
||||
|
||||
The third argument must be one of the constants SQLITE_UTF8,
|
||||
SQLITE_UTF16LE or SQLITE_UTF16BE, indicating that the user-supplied
|
||||
routine expects to be passed pointers to strings encoded using UTF-8,
|
||||
UTF-16 little-endian or UTF-16 big-endian respectively.
|
||||
|
||||
A pointer to the user supplied routine must be passed as the fifth
|
||||
argument. If it is NULL, this is the same as deleting the collation
|
||||
sequence (so that SQLite cannot call it anymore). Each time the user
|
||||
supplied function is invoked, it is passed a copy of the void* passed as
|
||||
the fourth argument to sqlite3_create_collation() or
|
||||
sqlite3_create_collation16() as its first parameter.
|
||||
|
||||
The remaining arguments to the user-supplied routine are two strings,
|
||||
each represented by a [length, data] pair and encoded in the encoding
|
||||
that was passed as the third argument when the collation sequence was
|
||||
registered. The user routine should return negative, zero or positive if
|
||||
the first string is less than, equal to, or greater than the second
|
||||
string. i.e. (STRING1 - STRING2).
|
||||
}
|
||||
|
||||
api {} {
|
||||
int sqlite3_collation_needed(
|
||||
sqlite3*,
|
||||
void*,
|
||||
void(*)(void*,sqlite3*,int eTextRep,const char*)
|
||||
);
|
||||
int sqlite3_collation_needed16(
|
||||
sqlite3*,
|
||||
void*,
|
||||
void(*)(void*,sqlite3*,int eTextRep,const void*)
|
||||
);
|
||||
} {
|
||||
To avoid having to register all collation sequences before a database
|
||||
can be used, a single callback function may be registered with the
|
||||
database handle to be called whenever an undefined collation sequence is
|
||||
required.
|
||||
|
||||
If the function is registered using the sqlite3_collation_needed() API,
|
||||
then it is passed the names of undefined collation sequences as strings
|
||||
encoded in UTF-8. If sqlite3_collation_needed16() is used, the names
|
||||
are passed as UTF-16 in machine native byte order. A call to either
|
||||
function replaces any existing callback.
|
||||
|
||||
When the user-function is invoked, the first argument passed is a copy
|
||||
of the second argument to sqlite3_collation_needed() or
|
||||
sqlite3_collation_needed16(). The second argument is the database
|
||||
handle. The third argument is one of SQLITE_UTF8, SQLITE_UTF16BE or
|
||||
SQLITE_UTF16LE, indicating the most desirable form of the collation
|
||||
sequence function required. The fourth parameter is the name of the
|
||||
required collation sequence.
|
||||
|
||||
The collation sequence is returned to SQLite by a collation-needed
|
||||
callback using the sqlite3_create_collation() or
|
||||
sqlite3_create_collation16() APIs, described above.
|
||||
}
|
||||
|
||||
api {} {
|
||||
int sqlite3_create_function(
|
||||
sqlite3 *,
|
||||
@ -332,10 +421,11 @@ int sqlite3_create_function16(
|
||||
void (*xStep)(sqlite3_context*,int,sqlite3_value**),
|
||||
void (*xFinal)(sqlite3_context*)
|
||||
);
|
||||
#define SQLITE3_UTF8 1
|
||||
#define SQLITE3_UTF16LE 2
|
||||
#define SQLITE3_UTF16BE 3
|
||||
#define SQLITE3_ANY 4
|
||||
#define SQLITE_UTF8 1
|
||||
#define SQLITE_UTF16 2
|
||||
#define SQLITE_UTF16BE 3
|
||||
#define SQLITE_UTF16LE 4
|
||||
#define SQLITE_ANY 5
|
||||
} {
|
||||
These two functions are used to add user functions or aggregates
|
||||
implemented in C to the SQL langauge interpreted by SQLite. The
|
||||
@ -620,13 +710,11 @@ char *sqlite3_vmprintf(const char*, va_list);
|
||||
api {} {
|
||||
int sqlite3_open(
|
||||
const char *filename, /* Database filename (UTF-8) */
|
||||
sqlite3 **ppDb, /* OUT: SQLite db handle */
|
||||
const char **args /* Null terminated array of option strings */
|
||||
sqlite3 **ppDb /* OUT: SQLite db handle */
|
||||
);
|
||||
int sqlite3_open16(
|
||||
const void *filename, /* Database filename (UTF-16) */
|
||||
sqlite3 **ppDb, /* OUT: SQLite db handle */
|
||||
const char **args /* Null terminated array of option strings */
|
||||
sqlite3 **ppDb /* OUT: SQLite db handle */
|
||||
);
|
||||
} {
|
||||
Open the sqlite database file "filename". The "filename" is UTF-8
|
||||
@ -637,8 +725,9 @@ int sqlite3_open16(
|
||||
sqlite3_errmsg() or sqlite3_errmsg16() routines can be used to obtain
|
||||
an English language description of the error.
|
||||
|
||||
If the database file does not exist, then a new database is created.
|
||||
The encoding for the database is UTF-8 if sqlite3_open() is called and
|
||||
If the database file does not exist, then a new database will be created
|
||||
as needed.
|
||||
The encoding for the database will be UTF-8 if sqlite3_open() is called and
|
||||
UTF-16 if sqlite3_open16 is used.
|
||||
|
||||
Whether or not an error occurs when it is opened, resources associated
|
||||
@ -729,15 +818,17 @@ int sqlite3_reset(sqlite3_stmt *pStmt);
|
||||
}
|
||||
|
||||
api {} {
|
||||
void sqlite3_result_blob(sqlite3_context*, const void*, int n, int eCopy);
|
||||
void sqlite3_result_blob(sqlite3_context*, const void*, int n, void(*)(void*));
|
||||
void sqlite3_result_double(sqlite3_context*, double);
|
||||
void sqlite3_result_error(sqlite3_context*, const char*, int);
|
||||
void sqlite3_result_error16(sqlite3_context*, const void*, int);
|
||||
void sqlite3_result_int(sqlite3_context*, int);
|
||||
void sqlite3_result_int64(sqlite3_context*, long long int);
|
||||
void sqlite3_result_null(sqlite3_context*);
|
||||
void sqlite3_result_text(sqlite3_context*, const char*, int n, int eCopy);
|
||||
void sqlite3_result_text16(sqlite3_context*, const void*, int n, int eCopy);
|
||||
void sqlite3_result_text(sqlite3_context*, const char*, int n, void(*)(void*));
|
||||
void sqlite3_result_text16(sqlite3_context*, const void*, int n, void(*)(void*));
|
||||
void sqlite3_result_text16be(sqlite3_context*, const void*, int n, void(*)(void*));
|
||||
void sqlite3_result_text16le(sqlite3_context*, const void*, int n, void(*)(void*));
|
||||
void sqlite3_result_value(sqlite3_context*, sqlite3_value*);
|
||||
} {
|
||||
User-defined functions invoke the following routines in order to
|
||||
@ -864,6 +955,8 @@ int sqlite3_value_int(sqlite3_value*);
|
||||
long long int sqlite3_value_int64(sqlite3_value*);
|
||||
const unsigned char *sqlite3_value_text(sqlite3_value*);
|
||||
const void *sqlite3_value_text16(sqlite3_value*);
|
||||
const void *sqlite3_value_text16be(sqlite3_value*);
|
||||
const void *sqlite3_value_text16le(sqlite3_value*);
|
||||
int sqlite3_value_type(sqlite3_value*);
|
||||
} {
|
||||
This group of routines returns information about parameters to
|
||||
|
@ -1,7 +1,7 @@
|
||||
# This script generates the "docs.html" page that describes various
|
||||
# sources of documentation available for SQLite.
|
||||
#
|
||||
set rcsid {$Id: docs.tcl,v 1.3 2004/06/01 01:22:38 drh Exp $}
|
||||
set rcsid {$Id: docs.tcl,v 1.4 2004/06/11 17:48:04 drh Exp $}
|
||||
source common.tcl
|
||||
header {SQLite Documentation}
|
||||
puts {
|
||||
@ -40,6 +40,11 @@ doc {Tcl API} {tclsqlite.html} {
|
||||
A description of the TCL interface bindings for SQLite.
|
||||
}
|
||||
|
||||
doc {Locking And Concurrency<br>In SQLite Version 3} {lockingv3.html} {
|
||||
A description of how the new locking code in version 3 increases
|
||||
concurrancy and decreases the problem of writer starvation.
|
||||
}
|
||||
|
||||
doc {Version 2 DataTypes } {datatypes.html} {
|
||||
A description of how SQLite version 2 handles SQL datatypes.
|
||||
}
|
||||
|
515
www/lockingv3.tcl
Normal file
515
www/lockingv3.tcl
Normal file
@ -0,0 +1,515 @@
|
||||
#
|
||||
# Run this script to generated a lockingv3.html output file
|
||||
#
|
||||
set rcsid {$Id: }
|
||||
source common.tcl
|
||||
header {File Locking And Concurrency In SQLite Version 3}
|
||||
|
||||
proc HEADING {level title} {
|
||||
global pnum
|
||||
incr pnum($level)
|
||||
foreach i [array names pnum] {
|
||||
if {$i>$level} {set pnum($i) 0}
|
||||
}
|
||||
set h [expr {$level+1}]
|
||||
if {$h>6} {set h 6}
|
||||
set n $pnum(1).$pnum(2)
|
||||
for {set i 3} {$i<=$level} {incr i} {
|
||||
append n .$pnum($i)
|
||||
}
|
||||
puts "<h$h>$n $title</h$h>"
|
||||
}
|
||||
set pnum(1) 0
|
||||
set pnum(2) 0
|
||||
set pnum(3) 0
|
||||
set pnum(4) 0
|
||||
set pnum(5) 0
|
||||
set pnum(6) 0
|
||||
set pnum(7) 0
|
||||
set pnum(8) 0
|
||||
|
||||
HEADING 1 {File Locking And Concurrency In SQLite Version 3}
|
||||
|
||||
puts {
|
||||
<p>Version 3 of SQLite introduces a more sophisticated locking mechanism
|
||||
design to improve concurrency and reduce the writer starvation problem.
|
||||
This document describes the new locking mechanism.
|
||||
The intended audience is programmers who want to understand and/or modify
|
||||
the pager code and reviewers working to verify the design
|
||||
of SQLite version 3.
|
||||
</p>
|
||||
}
|
||||
|
||||
HEADING 1 {Overview}
|
||||
|
||||
puts {
|
||||
<p>
|
||||
Locking and concurrency control are handled by the the
|
||||
<a href="http://www.sqlite.org/cvstrac/getfile/sqlite/src/pager.c">
|
||||
pager module</a>.
|
||||
The pager module is responsible for make SQLite "ACID" (Atomic,
|
||||
Consistent, Isolated, and Durable). The pager module makes sure changes
|
||||
happen all at once, that either all changes occur or none of them do,
|
||||
that two or more threads or processes do not try to access the database
|
||||
in incompatible ways at the same time, and that once changes have been
|
||||
written they persist until explicitly deleted. The pager also provides
|
||||
an memory cache of some of the contents of the disk file.</p>
|
||||
|
||||
<p>The pager is unconcerned
|
||||
with the details of B-Trees, text encodings, indices, and so forth.
|
||||
From the point of view of the pager, the database consists of
|
||||
a single file of uniform-sized blocks. Each block is called a
|
||||
"page" is is usually 1024 bytes in size. The pages are numbered
|
||||
beginning with 1. So the first 1024 bytes of the database are called
|
||||
"page 1" and the second 1024 bytes are call "page 2" and so forth. All
|
||||
other encoding details are handled by higher layers of the library.
|
||||
The pager communicates with the operating system using one of several
|
||||
modules
|
||||
(Examples:
|
||||
<a href="http://www.sqlite.org/cvstrac/getfile/sqlite/src/os_unix.c">
|
||||
os_unix.c</a>,
|
||||
<a href="http://www.sqlite.org/cvstrac/getfile/sqlite/src/os_win.c">
|
||||
os_win.c</a>)
|
||||
that provides a uniform abstraction for operating system services.
|
||||
</p>
|
||||
}
|
||||
|
||||
HEADING 1 {Locking}
|
||||
|
||||
puts {
|
||||
<p>
|
||||
From the point of view of a single thread or process, a database file
|
||||
can be in one of five locking states:
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<table cellpadding="20">
|
||||
<tr><td valign="top">UNLOCKED</td>
|
||||
<td valign="top">
|
||||
No locks are held on the database. The database may be neither read nor
|
||||
written. Any internally cached data is considered suspect and subject to
|
||||
verification against the database file before being used. Other threads
|
||||
and processes can read or write the database as their own locking states
|
||||
permit. This is the default state.
|
||||
</td></tr>
|
||||
|
||||
<tr><td valign="top">SHARED</td>
|
||||
<td valign="top">
|
||||
The database may be read but not written. Any number of threads or
|
||||
processes can hold SHARED locks at the same time, hence there can be
|
||||
many simultaneous readers. But no other thread or process is allowed
|
||||
to write to the database file while one or more SHARED locks are active.
|
||||
</td></tr>
|
||||
|
||||
<tr><td valign="top">RESERVED</td>
|
||||
<td valign="top">
|
||||
A RESERVED lock means that the process is planning on writing to the
|
||||
database file at some point in the future but that it is currently just
|
||||
reading from the file. Only a single RESERVED lock may be active at one
|
||||
time, though multiple SHARED locks can coexist with a single RESERVED lock.
|
||||
RESERVED differs from PENDING in that new SHARED locks can be acquired
|
||||
while there is a RESERVED lock.
|
||||
</td></tr>
|
||||
|
||||
<tr><td valign="top">PENDING</td>
|
||||
<td valign="top">
|
||||
A PENDING lock means that the process holding the lock wants to write
|
||||
to the database as soon as possible and is just waiting on all current
|
||||
SHARED locks to clear so that it can get an EXCLUSIVE lock. No new
|
||||
SHARED locks are permitted against the database if
|
||||
a PENDING lock is active, though existing SHARED locks are allowed to
|
||||
continue.
|
||||
</td></tr>
|
||||
|
||||
<tr><td valign="top">EXCLUSIVE</td>
|
||||
<td valign="top">
|
||||
An EXCLUSIVE lock is needed in order to write to the database file.
|
||||
Only one EXCLUSIVE lock is allowed on the file and no other locks of
|
||||
any kind are allowed to coexist with an EXCLUSIVE lock. In order to
|
||||
maximize concurrency, SQLite works to minimize the amount of time that
|
||||
EXCLUSIVE locks are held.
|
||||
</td></tr>
|
||||
</table>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The operating system interface layer understands and tracks all five
|
||||
locking states described above. (It has to, since it is responsible
|
||||
for implementing the locks.) But the pager module only tracks four
|
||||
of the five locking states. A PENDING lock is always just a temporary
|
||||
stepping stone on the path to an EXCLUSIVE lock and so the pager module
|
||||
does not track PENDING locks.
|
||||
</p>
|
||||
}
|
||||
|
||||
HEADING 1 {The Rollback Journal}
|
||||
|
||||
puts {
|
||||
<p>Any time a process wants to make a changes to a database file, it
|
||||
first records enough information in the <em>rollback journal</em> to
|
||||
restore the database file back to its initial condition. Thus, before
|
||||
altering any page of the database, the original contents of that page
|
||||
must be written into the journal. The journal also records the initial
|
||||
size of the database so that if the database file grows it can be truncated
|
||||
back to its original size on a rollback.</p>
|
||||
|
||||
<p>The rollback journal is a ordinary disk file that has the same name as
|
||||
the database file with the suffix "<tt>-journal</tt>" added.</p>
|
||||
|
||||
<p>If SQLite is working with multiple databases at the same time
|
||||
(using the ATTACH command) then each database has its own journal.
|
||||
But there is also a separate aggregate journal called the "master journal".
|
||||
The master journal does not contain page data used for rolling back
|
||||
changes. Instead the master journal contains the names of the
|
||||
individual file journals for each of the ATTACHed databases. Each of
|
||||
the individual file journals also contain the name of the master journal.
|
||||
If there are no ATTACHed databases (or if none of the ATTACHed database
|
||||
is participating in the current transaction) no master journal is
|
||||
created and the normal rollback journal contains an empty string
|
||||
in the place normally reserved for recording the name of the master
|
||||
journal.</p>
|
||||
|
||||
<p>A individual file journal is said to be "hot" if it needs to be rolled back
|
||||
in order to restore the integrity of its database.
|
||||
A hot journal is created when a process is in the middle of a database
|
||||
update and a program or operating system crash or power failure prevents
|
||||
the update from completing.
|
||||
Hot journals are an exception condition.
|
||||
Hot journals exist to facility recovery from crashes and power failures.
|
||||
If everything is working correctly
|
||||
(that is, if there are no crashes or power failures)
|
||||
you will never get a hot journal.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
If no master journal is involved, then
|
||||
a journal is hot if it exists and its corresponding database file
|
||||
does not have a RESERVED lock.
|
||||
If a master journal is named in the file journal, then the file journal
|
||||
is hot if its master journal exists and there is no RESERVED
|
||||
lock on the corresponding database file.
|
||||
</p>
|
||||
}
|
||||
|
||||
HEADING 2 {Dealing with hot journals}
|
||||
|
||||
puts {
|
||||
<p>
|
||||
Before reading from a a database file, SQLite always checks to see if that
|
||||
file has a hot journal. If the file does have a hot journal, then the
|
||||
journal is rolled back before the file is read. In this way, we ensure
|
||||
that the database file is in a consistent state before it is read.
|
||||
</p>
|
||||
|
||||
<p>When a process wants to read from a database file, it followed
|
||||
the following sequence of steps:
|
||||
</p>
|
||||
|
||||
<ol>
|
||||
<li>Open the database file and obtain a SHARED lock. If the SHARED lock
|
||||
cannot be obtained, fail immediately and return SQLITE_BUSY.</li>
|
||||
<li>Check to see if the database file has a hot journal. If the file
|
||||
does not have a hot journal, we are done. Return immediately.
|
||||
If there is a hot journal, that journal must be rolled back by
|
||||
the subsequent steps of this algorithm.</li>
|
||||
<li>Acquire a PENDING then an EXCLUSIVE lock on the database file.
|
||||
(Note: do not acquire a RESERVED lock because that would make
|
||||
other processes think the journal was no longer hot.) If we
|
||||
fail to acquire this lock it means another process or thread
|
||||
is already trying to do the rollback. In that case,
|
||||
drop all locks, close the database, and return SQLITE_BUSY. </li>
|
||||
<li>Read the journal file and roll back the changes.</li>
|
||||
<li>Wait for the rolled back changes to be written onto
|
||||
the surface of the disk. This protects the integrity of the database
|
||||
in case another power failure or crash occurs.</li>
|
||||
<li>Delete the journal file.</li>
|
||||
<li>Delete the master journal file if it is safe to do so.
|
||||
This step is optional. It is here only to prevent stale
|
||||
master journals from cluttering up the disk drive.
|
||||
See the discussion below for details.</li>
|
||||
<li>Drop the EXCLUSIVE and PENDING locks but retain the SHARED lock.</li>
|
||||
</ol>
|
||||
|
||||
<p>After the algorithm above completes successfully, it is safe to
|
||||
read from the database file. Once all reading has completed, the
|
||||
SHARED lock is dropped.</p>
|
||||
}
|
||||
|
||||
HEADING 2 {Deleting stale master journals}
|
||||
|
||||
puts {
|
||||
<p>A stale master journal is a master journal that is no longer being
|
||||
used for anything. There is no requirement that stale master journals
|
||||
be deleted. The only reason for doing so is to free up disk space.</p>
|
||||
|
||||
<p>A master journal is stale if no individual file journals are pointing
|
||||
to it. To figure out if a master journal is stale, we first read the
|
||||
master journal to obtain the names of all of its file journals. Then
|
||||
we check each of those file journals. If any of the file journals named
|
||||
in the master journal exists and points back to the master journal, then
|
||||
the master journal is not stale. If all file journals are either missing
|
||||
or refer to other master journals or no master journal at all, then the
|
||||
master journal we are testing is stale and can be safely deleted.</p>
|
||||
}
|
||||
|
||||
HEADING 2 {Writing to a database file}
|
||||
|
||||
puts {
|
||||
<p>To write to a database, a process must first acquire a SHARED lock
|
||||
as described above (possibly rolling back incomplete changes if there
|
||||
is a hot journal).
|
||||
After a SHARED lock is obtained, a RESERVED lock must be acquired.
|
||||
The RESERVED lock signals that the process intentions to write to the
|
||||
database at some point in the future. Only one process at a time
|
||||
can hold a reserved lock. But other processes can continue to read
|
||||
the database while the RESERVED lock is held.
|
||||
</p>
|
||||
|
||||
<p>If the process that wants to write is unable to obtain a RESERVED
|
||||
lock, it must mean that another process already has a RESERVED lock.
|
||||
In that case, the write attempt fails and returns SQLITE_BUSY.</p>
|
||||
|
||||
<p>After obtaining a RESERVED lock, the process that wants to write
|
||||
creates a rollback journal. The header of the journal is initialized
|
||||
with the original size of the database file. Space in the journal header
|
||||
is also reserved for a master journal name, though the master journal
|
||||
name is initially empty.</p>
|
||||
|
||||
<p>Before making changes to any page of the database, the process writes
|
||||
the original value of that page into the rollback journal. Changes
|
||||
to pages are held in memory at first and are not written to the disk.
|
||||
The original database file remains unaltered, which means that other
|
||||
processes can continue to read the database.</p>
|
||||
|
||||
<p>Eventually, the writing process will want to update the database
|
||||
file, either because its memory cache has filled up or because it is
|
||||
ready to commit its changes. Before this happens, the writer must
|
||||
make sure no other process is reading the database and that the rollback
|
||||
journal data is safely on the disk surface so that it can be used to
|
||||
rollback incomplete changes in the event of a power failure.
|
||||
The steps are as follows:</p>
|
||||
|
||||
<ol>
|
||||
<li>Make sure all rollback journal data has actually been written to
|
||||
the surface of the disk (and is not just being held in the operating
|
||||
system's or disk controllers cache) so that if a power failure occurs
|
||||
the data will still be there after power is restored.</li>
|
||||
<li>Obtain a PENDING lock and then an EXCLUSIVE lock on the database file.
|
||||
If other processes are still have SHARED locks, the writer might have
|
||||
to wait until those SHARED locks clear before it is able to obtain
|
||||
an EXCLUSIVE lock.</li>
|
||||
<li>Write all page modifications currently held in memory out to the
|
||||
original database disk file.</li>
|
||||
</ol>
|
||||
|
||||
<p>
|
||||
If the reason for writing to the database file is because the memory
|
||||
cache was full, then the writer will not commit right away. Instead,
|
||||
the writer might continue to make changes to other pages. Before
|
||||
subsequent changes are written to the database file, the rollback
|
||||
journal must be flushed to disk again. Note also that the EXCLUSIVE
|
||||
lock that the writer obtained in order to write to the database initially
|
||||
must be held until all changes are committed. That means that from the
|
||||
time the memory cache first spills to disk up until the transaction
|
||||
commits, no other processes are able to access the database.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
When a writer is ready to commit its changes, it executes the following
|
||||
steps:
|
||||
</p>
|
||||
|
||||
<ol>
|
||||
<li value="4">
|
||||
Obtain an EXCLUSIVE lock on the database file and
|
||||
make sure all memory changes have been written to the database file
|
||||
using the algorithm of steps 1-3 above.</li>
|
||||
<li>Flush all database file changes to the disk. Wait for those changes
|
||||
to actually be written onto the disk surface.</li>
|
||||
<li>Delete the journal file. This is the instant when the changes are
|
||||
committed. Prior to deleting the journal file, if a power failure
|
||||
or crash occurs, the next process to open the database will see that
|
||||
it has a hot journal and will roll the changes back.
|
||||
After the journal is deleted, there will no longer be a hot journal
|
||||
and the changes will persist.
|
||||
</li>
|
||||
<li>Drop the EXCLUSIVE and PENDING locks from the database file.
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
<p>As soon as PENDING lock is released from the database file, other
|
||||
processes can begin reading the database again. In the current implementation,
|
||||
the RESERVED lock is also released, but that is not essential. Future
|
||||
versions of SQLite might provide a "CHECKPOINT" SQL command that will
|
||||
commit all changes made so far within a transaction but retain the
|
||||
RESERVED lock so that additional changes can be made without given
|
||||
any other process an opportunity to write.</p>
|
||||
|
||||
<p>If a transaction involves multiple databases, then a more complex
|
||||
commit sequence is used, as follows:</p>
|
||||
|
||||
<ol>
|
||||
<li value="4">
|
||||
Make sure all individual database files have an EXCLUSIVE lock and a
|
||||
valid journal.
|
||||
<li>Create a master-journal. The name of the master-journal is arbitrary.
|
||||
(The current implementation appends random suffixes to the name of the
|
||||
main database file until it finds a name that does not previously exist.)
|
||||
Fill the master journal with the names of all the individual journals
|
||||
and flush its contents to disk.
|
||||
<li>Write the name of the master journal into
|
||||
all individual journals (in space set aside for that purpose in the
|
||||
headers of the individual journals) and flush the contents of the
|
||||
individual journals to disk and wait for those changes to reach the
|
||||
disk surface.
|
||||
<li>Flush all database file changes to the disk. Wait for those changes
|
||||
to actually be written onto the disk surface.</li>
|
||||
<li>Delete the master journal file. This is the instant when the changes are
|
||||
committed. Prior to deleting the master journal file, if a power failure
|
||||
or crash occurs, the individual file journals will be considered hot
|
||||
and will be rolled back by the next process that
|
||||
attempts to read them. After the master journal has been deleted,
|
||||
the file journals will no longer be considered hot and the changes
|
||||
will persist.
|
||||
</li>
|
||||
<li>Delete all individual journal files.
|
||||
<li>Drop the EXCLUSIVE and PENDING locks from all database files.
|
||||
</li>
|
||||
</ol>
|
||||
}
|
||||
|
||||
HEADING 1 {How To Corrupt Your Database Files}
|
||||
|
||||
puts {
|
||||
<p>The pager module is robust but it is not completely failsafe.
|
||||
It can be subverted. This section attempt to identify and explain
|
||||
the risks.</p>
|
||||
|
||||
<p>
|
||||
Clearly, a hardware or operating system fault that introduces incorrect data
|
||||
into the middle of the database file or journal will cause problems.
|
||||
Likewise,
|
||||
if a rogue process opens a database file or journal and writes malformed
|
||||
data into the middle of it, then the database will become corrupt.
|
||||
There is not much that can be done about these kinds of problems so
|
||||
so they are given no further attention.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
SQLite uses POSIX advisory locks to implement locking on Unix. On
|
||||
windows it uses the LockFile(), LockFileEx(), and UnlockFile() system
|
||||
calls. SQLite assumes that these system calls all work as advertised. If
|
||||
that is not the case, then database corruption can result. One should
|
||||
note that POSIX advisory locking is known to be buggy or even unimplemented
|
||||
on many NFS implementations (including recent versions of Mac OS X)
|
||||
and that there are persistent reports of locking problems
|
||||
for network filesystems under windows. Your best defense is to not
|
||||
use SQLite for files on a network filesystem.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
SQLite uses the fsync() system call to flush data to the disk under Unix and
|
||||
it uses the FlushFileBuffers() to do the same under windows. Once again,
|
||||
SQLite assumes that these operating system services function as advertised.
|
||||
But it has been reported that fsync() and FlushFileBuffers() do not always
|
||||
work correctly, especially with inexpensive IDE disks. Apparently some
|
||||
manufactures of IDE disks have defective controller chips that report
|
||||
that data has reached the disk surface when in fact the data is still
|
||||
in volatile cache memory in the disk drive electronics. There are also
|
||||
reports that windows sometimes chooses to ignore FlushFileBuffers() for
|
||||
unspecified reasons. The author cannot verify any of these reports.
|
||||
But if they are true, it means that database corruption is a possibility
|
||||
following an unexpected power loss. These are hardware and/or operating
|
||||
system bugs that SQLite is unable to defend against.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
If a crash or power failure occurs and results in a hot journal, but that
|
||||
journal is deleted. The next process to open the database will not
|
||||
know that it contains changes that need to be rolled back. The rollback
|
||||
will not occur and the database will be left in an inconsistent state.
|
||||
Rollback journals might be deleted for any number of reasons:
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>An administrator might be cleaning up after an OS crash or power failure,
|
||||
see the journal file, think it is junk, and delete it.</li>
|
||||
<li>Someone (or some process) might rename the database file but fail to
|
||||
also rename its associated journal.</li>
|
||||
<li>If the database file has aliases (hard or soft links) and the file
|
||||
is opened by a different alias than the one used to create the journal,
|
||||
then the journal will not be found. To avoid this problem, you should
|
||||
not create links to SQLite database files.</li>
|
||||
<li>Filesystem corruption following a power failure might cause the
|
||||
journal to be renamed or deleted.</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
The last (fourth) bullet above merits additional comment. When SQLite creates
|
||||
a journal file on Unix, it opens the directory that contains that file and
|
||||
calls fsync() on the directory, in an effort to push the directory information
|
||||
to disk. But suppose some other process is adding or removing unrelated
|
||||
files to the directory that contains the database and journal at the the
|
||||
moment of a power failure. The supposedly unrelated actions of this other
|
||||
process might in the journal file being dropped from the directory and
|
||||
moved into "lost+found". This is an unlikely scenario, but it could happen.
|
||||
The best defenses are to use a journaling filesystem or to keep the
|
||||
database and journal in a directory by themselves.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
For a commit involving multiple databases and a master journal, if the
|
||||
various databases were on different disk volumes and a power failure occurs
|
||||
during the commit, then when the machine comes back up the disks might
|
||||
be remounted with different names. Or some disks might not be mounted
|
||||
at all. When this happens the individual file journals and the master
|
||||
journal might not be able to find each other. The worst outcome from
|
||||
this scenario is that the commit ceases to be atomic.
|
||||
Some databases might be rolled back and others might not.
|
||||
All databases will continue to be self-consistent.
|
||||
To defend against this problem, keep all databases
|
||||
on the same disk volume and/or remount disks using exactly the same names
|
||||
after a power failure.
|
||||
</p>
|
||||
}
|
||||
|
||||
HEADING 1 {Transaction Control At The SQL Level}
|
||||
|
||||
puts {
|
||||
<p>
|
||||
The changes to locking and concurrency control in SQLite version 3 also
|
||||
introduce some subtle changes in the way transactions work at the SQL
|
||||
language level.
|
||||
By default, SQLite version 3 operates in "autocommit" mode. In autocommit mode,
|
||||
all changes to the database are committed as soon as all operations associated
|
||||
with the current database connection complete.</p>
|
||||
|
||||
<p>The SQL command "BEGIN TRANSACTION" (the TRANSACTION keyword
|
||||
is optional) is used to take SQLite out of autocommit mode.
|
||||
Note that the BEGIN command does not acquire any locks on the database.
|
||||
After a BEGIN command, a SHARED lock will be acquired when the first
|
||||
SELECT statement is executed. A RESERVED lock will be acquired when
|
||||
the first INSERT, UPDATE, or DELETE statement is executed. No EXCLUSIVE
|
||||
locks is acquired until either the memory cache fills up and must
|
||||
be spilled to disk or until the transaction commits. In this way,
|
||||
the system delays blocking read access to the file file until the
|
||||
last possible moment.
|
||||
</p>
|
||||
|
||||
<p>The SQL command "COMMIT" does not actually commit the changes to
|
||||
disk. It just turns autocommit back on. Then, at the conclusion of
|
||||
the command, the regular autocommit logic takes over and causes the
|
||||
actual commit to disk to occur.
|
||||
The SQL command "ROLLBACK" also operates by turning autocommit back on,
|
||||
but it also sets a flag that tells the autocommit logic to rollback rather
|
||||
than commit.</p>
|
||||
|
||||
<p>If the SQL COMMIT command turns autocommit on and the autocommit logic
|
||||
then tries to commit change but fails because some other process is holding
|
||||
a SHARED lock, then autocommit is turned back off automatically. This
|
||||
allows the user to retry the COMMIT at a later time after the SHARED lock
|
||||
has had an opportunity to clear.</p>
|
||||
}
|
||||
|
||||
|
||||
footer $rcsid
|
Loading…
Reference in New Issue
Block a user