?? pager.c
字號:
/*
** 2001 September 15
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of the page cache subsystem or "pager".
**
** The pager is used to access a database disk file. It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file. The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.25 2006/10/12 21:34:21 rmsimpson Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
#include "os.h"
#include "pager.h"
#include <assert.h>
#include <string.h>
/*
** Macros for troubleshooting. Normally turned off
*/
#if 0
#define TRACE1(X) sqlite3DebugPrintf(X)
#define TRACE2(X,Y) sqlite3DebugPrintf(X,Y)
#define TRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z)
#define TRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
#define TRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
#else
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)
#define TRACE4(X,Y,Z,W)
#define TRACE5(X,Y,Z,W,V)
#endif
/*
** The following two macros are used within the TRACEX() macros above
** to print out file-descriptors.
**
** PAGERID() takes a pointer to a Pager struct as it's argument. The
** associated file-descriptor is returned. FILEHANDLEID() takes an OsFile
** struct as it's argument.
*/
#define PAGERID(p) ((int)(p->fd))
#define FILEHANDLEID(fd) ((int)fd)
/*
** The page cache as a whole is always in one of the following
** states:
**
** PAGER_UNLOCK The page cache is not currently reading or
** writing the database file. There is no
** data held in memory. This is the initial
** state.
**
** PAGER_SHARED The page cache is reading the database.
** Writing is not permitted. There can be
** multiple readers accessing the same database
** file at the same time.
**
** PAGER_RESERVED This process has reserved the database for writing
** but has not yet made any changes. Only one process
** at a time can reserve the database. The original
** database file has not been modified so other
** processes may still be reading the on-disk
** database file.
**
** PAGER_EXCLUSIVE The page cache is writing the database.
** Access is exclusive. No other processes or
** threads can be reading or writing while one
** process is writing.
**
** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE
** after all dirty pages have been written to the
** database file and the file has been synced to
** disk. All that remains to do is to remove the
** journal file and the transaction will be
** committed.
**
** The page cache comes up in PAGER_UNLOCK. The first time a
** sqlite3pager_get() occurs, the state transitions to PAGER_SHARED.
** After all pages have been released using sqlite_page_unref(),
** the state transitions back to PAGER_UNLOCK. The first time
** that sqlite3pager_write() is called, the state transitions to
** PAGER_RESERVED. (Note that sqlite_page_write() can only be
** called on an outstanding page which means that the pager must
** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
** The transition to PAGER_EXCLUSIVE occurs when before any changes
** are made to the database file. After an sqlite3pager_rollback()
** or sqlite_pager_commit(), the state goes back to PAGER_SHARED.
*/
#define PAGER_UNLOCK 0
#define PAGER_SHARED 1 /* same as SHARED_LOCK */
#define PAGER_RESERVED 2 /* same as RESERVED_LOCK */
#define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */
#define PAGER_SYNCED 5
/*
** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
** then failed attempts to get a reserved lock will invoke the busy callback.
** This is off by default. To see why, consider the following scenario:
**
** Suppose thread A already has a shared lock and wants a reserved lock.
** Thread B already has a reserved lock and wants an exclusive lock. If
** both threads are using their busy callbacks, it might be a long time
** be for one of the threads give up and allows the other to proceed.
** But if the thread trying to get the reserved lock gives up quickly
** (if it never invokes its busy callback) then the contention will be
** resolved quickly.
*/
#ifndef SQLITE_BUSY_RESERVED_LOCK
# define SQLITE_BUSY_RESERVED_LOCK 0
#endif
/*
** This macro rounds values up so that if the value is an address it
** is guaranteed to be an address that is aligned to an 8-byte boundary.
*/
#define FORCE_ALIGNMENT(X) (((X)+7)&~7)
/*
** Each in-memory image of a page begins with the following header.
** This header is only visible to this pager module. The client
** code that calls pager sees only the data that follows the header.
**
** Client code should call sqlite3pager_write() on a page prior to making
** any modifications to that page. The first time sqlite3pager_write()
** is called, the original page contents are written into the rollback
** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
** the journal page has made it onto the disk surface, PgHdr.needSync
** is cleared. The modified page cannot be written back into the original
** database file until the journal pages has been synced to disk and the
** PgHdr.needSync has been cleared.
**
** The PgHdr.dirty flag is set when sqlite3pager_write() is called and
** is cleared again when the page content is written back to the original
** database file.
*/
typedef struct PgHdr PgHdr;
struct PgHdr {
Pager *pPager; /* The pager to which this page belongs */
Pgno pgno; /* The page number for this page */
PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
PgHdr *pNextAll; /* A list of all pages */
PgHdr *pNextStmt, *pPrevStmt; /* List of pages in the statement journal */
u8 inJournal; /* TRUE if has been written to journal */
u8 inStmt; /* TRUE if in the statement subjournal */
u8 dirty; /* TRUE if we need to write back changes */
u8 needSync; /* Sync journal before writing this page */
u8 alwaysRollback; /* Disable dont_rollback() for this page */
short int nRef; /* Number of users of this page */
PgHdr *pDirty, *pPrevDirty; /* Dirty pages */
u32 notUsed; /* Buffer space */
#ifdef SQLITE_CHECK_PAGES
u32 pageHash;
#endif
/* pPager->pageSize bytes of page data follow this header */
/* Pager.nExtra bytes of local data follow the page data */
};
/*
** For an in-memory only database, some extra information is recorded about
** each page so that changes can be rolled back. (Journal files are not
** used for in-memory databases.) The following information is added to
** the end of every EXTRA block for in-memory databases.
**
** This information could have been added directly to the PgHdr structure.
** But then it would take up an extra 8 bytes of storage on every PgHdr
** even for disk-based databases. Splitting it out saves 8 bytes. This
** is only a savings of 0.8% but those percentages add up.
*/
typedef struct PgHistory PgHistory;
struct PgHistory {
u8 *pOrig; /* Original page text. Restore to this on a full rollback */
u8 *pStmt; /* Text as it was at the beginning of the current statement */
};
/*
** A macro used for invoking the codec if there is one
*/
#ifdef SQLITE_HAS_CODEC
# define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
# define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
#else
# define CODEC1(P,D,N,X) /* NO-OP */
# define CODEC2(P,D,N,X) ((char*)D)
#endif
/*
** Convert a pointer to a PgHdr into a pointer to its data
** and back again.
*/
#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
#define PGHDR_TO_EXTRA(G,P) ((void*)&((char*)(&(G)[1]))[(P)->pageSize])
#define PGHDR_TO_HIST(P,PGR) \
((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra])
/*
** A open page cache is an instance of the following structure.
**
** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, SQLITE_PROTOCOL
** or SQLITE_FULL. Once one of the first three errors occurs, it persists
** and is returned as the result of every major pager API call. The
** SQLITE_FULL return code is slightly different. It persists only until the
** next successful rollback is performed on the pager cache. Also,
** SQLITE_FULL does not affect the sqlite3pager_get() and sqlite3pager_lookup()
** APIs, they may still be used successfully.
*/
struct Pager {
u8 journalOpen; /* True if journal file descriptors is valid */
u8 journalStarted; /* True if header of journal is synced */
u8 useJournal; /* Use a rollback journal on this file */
u8 noReadlock; /* Do not bother to obtain readlocks */
u8 stmtOpen; /* True if the statement subjournal is open */
u8 stmtInUse; /* True we are in a statement subtransaction */
u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/
u8 noSync; /* Do not sync the journal if true */
u8 fullSync; /* Do extra syncs of the journal for robustness */
u8 full_fsync; /* Use F_FULLFSYNC when available */
u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
u8 tempFile; /* zFilename is a temporary file */
u8 readOnly; /* True for a read-only database */
u8 needSync; /* True if an fsync() is needed on the journal */
u8 dirtyCache; /* True if cached pages have changed */
u8 alwaysRollback; /* Disable dont_rollback() for all pages */
u8 memDb; /* True to inhibit all file I/O */
u8 setMaster; /* True if a m-j name has been written to jrnl */
int errCode; /* One of several kinds of errors */
int dbSize; /* Number of pages in the file */
int origDbSize; /* dbSize before the current change */
int stmtSize; /* Size of database (in pages) at stmt_begin() */
int nRec; /* Number of pages written to the journal */
u32 cksumInit; /* Quasi-random value added to every checksum */
int stmtNRec; /* Number of records in stmt subjournal */
int nExtra; /* Add this many bytes to each in-memory page */
int pageSize; /* Number of bytes in a page */
int nPage; /* Total number of in-memory pages */
int nMaxPage; /* High water mark of nPage */
int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
int mxPage; /* Maximum number of pages to hold in cache */
u8 *aInJournal; /* One bit for each page in the database file */
u8 *aInStmt; /* One bit for each page in the database */
char *zFilename; /* Name of the database file */
char *zJournal; /* Name of the journal file */
char *zDirectory; /* Directory hold database and journal files */
OsFile *fd, *jfd; /* File descriptors for database and journal */
OsFile *stfd; /* File descriptor for the statement subjournal*/
BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */
PgHdr *pFirst, *pLast; /* List of free pages */
PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
PgHdr *pAll; /* List of all pages */
PgHdr *pStmt; /* List of pages in the statement subjournal */
PgHdr *pDirty; /* List of all dirty pages */
i64 journalOff; /* Current byte offset in the journal file */
i64 journalHdr; /* Byte offset to previous journal header */
i64 stmtHdrOff; /* First journal header written this statement */
i64 stmtCksum; /* cksumInit when statement was started */
i64 stmtJSize; /* Size of journal at stmt_begin() */
int sectorSize; /* Assumed sector size during rollback */
#ifdef SQLITE_TEST
int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
int nRead,nWrite; /* Database pages read/written */
#endif
void (*xDestructor)(void*,int); /* Call this routine when freeing pages */
void (*xReiniter)(void*,int); /* Call this routine when reloading pages */
void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
void *pCodecArg; /* First argument to xCodec() */
int nHash; /* Size of the pager hash table */
PgHdr **aHash; /* Hash table to map page number to PgHdr */
#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
Pager *pNext; /* Linked list of pagers in this thread */
#endif
};
/*
** If SQLITE_TEST is defined then increment the variable given in
** the argument
*/
#ifdef SQLITE_TEST
# define TEST_INCR(x) x++
#else
# define TEST_INCR(x)
#endif
/*
** Journal files begin with the following magic string. The data
** was obtained from /dev/random. It is used only as a sanity check.
**
** Since version 2.8.0, the journal format contains additional sanity
** checking information. If the power fails while the journal is begin
** written, semi-random garbage data might appear in the journal
** file after power is restored. If an attempt is then made
** to roll the journal back, the database could be corrupted. The additional
** sanity checking data is an attempt to discover the garbage in the
** journal and ignore it.
**
** The sanity checking information for the new journal format consists
** of a 32-bit checksum on each page of data. The checksum covers both
** the page number and the pPager->pageSize bytes of data for the page.
** This cksum is initialized to a 32-bit random value that appears in the
** journal file right after the header. The random initializer is important,
** because garbage data that appears at the end of a journal is likely
** data that was once in other files that have now been deleted. If the
** garbage data came from an obsolete journal file, the checksums might
** be correct. But by initializing the checksum to random value which
** is different for every journal, we minimize that risk.
*/
static const unsigned char aJournalMagic[] = {
0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
};
/*
** The size of the header and of each page in the journal is determined
** by the following macros.
*/
#define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
/*
** The journal header size for this pager. In the future, this could be
** set to some value read from the disk controller. The important
** characteristic is that it is the same size as a disk sector.
*/
#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
/*
** The macro MEMDB is true if we are dealing with an in-memory database.
** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
** the value of MEMDB will be a constant and the compiler will optimize
** out code that would never execute.
*/
#ifdef SQLITE_OMIT_MEMORYDB
# define MEMDB 0
#else
# define MEMDB pPager->memDb
#endif
/*
** The default size of a disk sector
*/
#define PAGER_SECTOR_SIZE 512
/*
** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
** reserved for working around a windows/posix incompatibility). It is
** used in the journal to signify that the remainder of the journal file
** is devoted to storing a master journal name - there are no more pages to
** roll back. See comments for function writeMasterJournal() for details.
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -