?? pager.c
字號:
/*** 2001 September 15**** The author disclaims copyright to this source code. In place of** a legal notice, here is a blessing:**** May you do good and not evil.** May you find forgiveness for yourself and forgive others.** May you share freely, never taking more than you give.***************************************************************************** This is the implementation of the page cache subsystem or "pager".** ** The pager is used to access a database disk file. It implements** atomic commit and rollback through the use of a journal file that** is separate from the database file. The pager also implements file** locking to prevent two processes from writing the same database** file simultaneously, or one process from reading the database while** another is writing.**** @(#) $Id: pager.c,v 1.101.2.1 2005/12/19 17:37:10 drh Exp $*/#include "os.h" /* Must be first to enable large file support */#include "sqliteInt.h"#include "pager.h"#include <assert.h>#include <string.h>/*** Macros for troubleshooting. Normally turned off*/#if 0static Pager *mainPager = 0;#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)#else#define SET_PAGER(X)#define CLR_PAGER(X)#define TRACE1(X)#define TRACE2(X,Y)#define TRACE3(X,Y,Z)#endif/*** The page cache as a whole is always in one of the following** states:**** SQLITE_UNLOCK The page cache is not currently reading or ** writing the database file. There is no** data held in memory. This is the initial** state.**** SQLITE_READLOCK The page cache is reading the database.** Writing is not permitted. There can be** multiple readers accessing the same database** file at the same time.**** SQLITE_WRITELOCK The page cache is writing the database.** Access is exclusive. No other processes or** threads can be reading or writing while one** process is writing.**** The page cache comes up in SQLITE_UNLOCK. The first time a** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.** After all pages have been released using sqlite_page_unref(),** the state transitions back to SQLITE_UNLOCK. The first time** that sqlite_page_write() is called, the state transitions to** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be** called on an outstanding page which means that the pager must** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)** The sqlite_page_rollback() and sqlite_page_commit() functions ** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.*/#define SQLITE_UNLOCK 0#define SQLITE_READLOCK 1#define SQLITE_WRITELOCK 2/*** Each in-memory image of a page begins with the following header.** This header is only visible to this pager module. The client** code that calls pager sees only the data that follows the header.**** Client code should call sqlitepager_write() on a page prior to making** any modifications to that page. The first time sqlitepager_write()** is called, the original page contents are written into the rollback** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once** the journal page has made it onto the disk surface, PgHdr.needSync** is cleared. The modified page cannot be written back into the original** database file until the journal pages has been synced to disk and the** PgHdr.needSync has been cleared.**** The PgHdr.dirty flag is set when sqlitepager_write() is called and** is cleared again when the page content is written back to the original** database file.*/typedef struct PgHdr PgHdr;struct PgHdr { Pager *pPager; /* The pager to which this page belongs */ Pgno pgno; /* The page number for this page */ PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */ int nRef; /* Number of users of this page */ PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */ PgHdr *pNextAll, *pPrevAll; /* A list of all pages */ PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */ u8 inJournal; /* TRUE if has been written to journal */ u8 inCkpt; /* TRUE if written to the checkpoint journal */ u8 dirty; /* TRUE if we need to write back changes */ u8 needSync; /* Sync journal before writing this page */ u8 alwaysRollback; /* Disable dont_rollback() for this page */ PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */ /* SQLITE_PAGE_SIZE bytes of page data follow this header */ /* Pager.nExtra bytes of local data follow the page data */};/*** A macro used for invoking the codec if there is one*/#ifdef SQLITE_HAS_CODEC# define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); }#else# define CODEC(P,D,N,X)#endif/*** Convert a pointer to a PgHdr into a pointer to its data** and back again.*/#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])/*** How big to make the hash table used for locating in-memory pages** by page number.*/#define N_PG_HASH 2048/*** Hash a page number*/#define pager_hash(PN) ((PN)&(N_PG_HASH-1))/*** A open page cache is an instance of the following structure.*/struct Pager { char *zFilename; /* Name of the database file */ char *zJournal; /* Name of the journal file */ char *zDirectory; /* Directory hold database and journal files */ OsFile fd, jfd; /* File descriptors for database and journal */ OsFile cpfd; /* File descriptor for the checkpoint journal */ int dbSize; /* Number of pages in the file */ int origDbSize; /* dbSize before the current change */ int ckptSize; /* Size of database (in pages) at ckpt_begin() */ off_t ckptJSize; /* Size of journal at ckpt_begin() */ int nRec; /* Number of pages written to the journal */ u32 cksumInit; /* Quasi-random value added to every checksum */ int ckptNRec; /* Number of records in the checkpoint journal */ int nExtra; /* Add this many bytes to each in-memory page */ void (*xDestructor)(void*); /* Call this routine when freeing pages */ int nPage; /* Total number of in-memory pages */ int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */ int mxPage; /* Maximum number of pages to hold in cache */ int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */ void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */ void *pCodecArg; /* First argument to xCodec() */ u8 journalOpen; /* True if journal file descriptors is valid */ u8 journalStarted; /* True if header of journal is synced */ u8 useJournal; /* Use a rollback journal on this file */ u8 ckptOpen; /* True if the checkpoint journal is open */ u8 ckptInUse; /* True we are in a checkpoint */ u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/ u8 noSync; /* Do not sync the journal if true */ u8 fullSync; /* Do extra syncs of the journal for robustness */ u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */ u8 errMask; /* One of several kinds of errors */ u8 tempFile; /* zFilename is a temporary file */ u8 readOnly; /* True for a read-only database */ u8 needSync; /* True if an fsync() is needed on the journal */ u8 dirtyFile; /* True if database file has changed in any way */ u8 alwaysRollback; /* Disable dont_rollback() for all pages */ u8 *aInJournal; /* One bit for each page in the database file */ u8 *aInCkpt; /* One bit for each page in the database */ PgHdr *pFirst, *pLast; /* List of free pages */ PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */ PgHdr *pAll; /* List of all pages */ PgHdr *pCkpt; /* List of pages in the checkpoint journal */ PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */};/*** These are bits that can be set in Pager.errMask.*/#define PAGER_ERR_FULL 0x01 /* a write() failed */#define PAGER_ERR_MEM 0x02 /* malloc() failed */#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? *//*** The journal file contains page records in the following** format.**** Actually, this structure is the complete page record for pager** formats less than 3. Beginning with format 3, this record is surrounded** by two checksums.*/typedef struct PageRecord PageRecord;struct PageRecord { Pgno pgno; /* The page number */ char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */};/*** Journal files begin with the following magic string. The data** was obtained from /dev/random. It is used only as a sanity check.**** There are three journal formats (so far). The 1st journal format writes** 32-bit integers in the byte-order of the host machine. New** formats writes integers as big-endian. All new journals use the** new format, but we have to be able to read an older journal in order** to rollback journals created by older versions of the library.**** The 3rd journal format (added for 2.8.0) adds additional sanity** checking information to the journal. If the power fails while the** journal is being written, semi-random garbage data might appear in** the journal file after power is restored. If an attempt is then made** to roll the journal back, the database could be corrupted. The additional** sanity checking data is an attempt to discover the garbage in the** journal and ignore it.**** The sanity checking information for the 3rd journal format consists** of a 32-bit checksum on each page of data. The checksum covers both** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.** This cksum is initialized to a 32-bit random value that appears in the** journal file right after the header. The random initializer is important,** because garbage data that appears at the end of a journal is likely** data that was once in other files that have now been deleted. If the** garbage data came from an obsolete journal file, the checksums might** be correct. But by initializing the checksum to random value which** is different for every journal, we minimize that risk.*/static const unsigned char aJournalMagic1[] = { 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,};static const unsigned char aJournalMagic2[] = { 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,};static const unsigned char aJournalMagic3[] = { 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,};#define JOURNAL_FORMAT_1 1#define JOURNAL_FORMAT_2 2#define JOURNAL_FORMAT_3 3/*** The following integer determines what format to use when creating** new primary journal files. By default we always use format 3.** When testing, we can set this value to older journal formats in order to** make sure that newer versions of the library are able to rollback older** journal files.**** Note that checkpoint journals always use format 2 and omit the header.*/#ifdef SQLITE_TESTint journal_format = 3;#else# define journal_format 3#endif/*** The size of the header and of each page in the journal varies according** to which journal format is being used. The following macros figure out** the sizes based on format numbers.*/#define JOURNAL_HDR_SZ(X) \ (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))#define JOURNAL_PG_SZ(X) \ (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))/*** Enable reference count tracking here:*/#ifdef SQLITE_TEST int pager_refinfo_enable = 0; static void pager_refinfo(PgHdr *p){ static int cnt = 0; if( !pager_refinfo_enable ) return; printf( "REFCNT: %4d addr=0x%08x nRef=%d\n", p->pgno, (int)PGHDR_TO_DATA(p), p->nRef ); cnt++; /* Something to set a breakpoint on */ }# define REFINFO(X) pager_refinfo(X)#else# define REFINFO(X)#endif/*** Read a 32-bit integer from the given file descriptor. Store the integer** that is read in *pRes. Return SQLITE_OK if everything worked, or an** error code is something goes wrong.**** If the journal format is 2 or 3, read a big-endian integer. If the** journal format is 1, read an integer in the native byte-order of the** host machine.*/static int read32bits(int format, OsFile *fd, u32 *pRes){ u32 res; int rc; rc = sqliteOsRead(fd, &res, sizeof(res)); if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){ unsigned char ac[4]; memcpy(ac, &res, 4); res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3]; } *pRes = res; return rc;}/*** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK** on success or an error code is something goes wrong.**** If the journal format is 2 or 3, write the integer as 4 big-endian** bytes. If the journal format is 1, write the integer in the native** byte order. In normal operation, only formats 2 and 3 are used.** Journal format 1 is only used for testing.*/static int write32bits(OsFile *fd, u32 val){ unsigned char ac[4]; if( journal_format<=1 ){ return sqliteOsWrite(fd, &val, 4); } ac[0] = (val>>24) & 0xff; ac[1] = (val>>16) & 0xff; ac[2] = (val>>8) & 0xff; ac[3] = val & 0xff; return sqliteOsWrite(fd, ac, 4);}/*** Write a 32-bit integer into a page header right before the** page data. This will overwrite the PgHdr.pDirty pointer.**** The integer is big-endian for formats 2 and 3 and native byte order** for journal format 1.*/static void store32bits(u32 val, PgHdr *p, int offset){ unsigned char *ac; ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset]; if( journal_format<=1 ){ memcpy(ac, &val, 4); }else{ ac[0] = (val>>24) & 0xff; ac[1] = (val>>16) & 0xff; ac[2] = (val>>8) & 0xff; ac[3] = val & 0xff; }}/*** Convert the bits in the pPager->errMask into an approprate** return code.*/static int pager_errcode(Pager *pPager){ int rc = SQLITE_OK; if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL; if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR; if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL; if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM; if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT; return rc;}/*** Add or remove a page from the list of all pages that are in the** checkpoint journal.**
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -