?? pager.c
字號:
** The Pager keeps a separate list of pages that are currently in** the checkpoint journal. This helps the sqlitepager_ckpt_commit()** routine run MUCH faster for the common case where there are many** pages in memory but only a few are in the checkpoint journal.*/static void page_add_to_ckpt_list(PgHdr *pPg){ Pager *pPager = pPg->pPager; if( pPg->inCkpt ) return; assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 ); pPg->pPrevCkpt = 0; if( pPager->pCkpt ){ pPager->pCkpt->pPrevCkpt = pPg; } pPg->pNextCkpt = pPager->pCkpt; pPager->pCkpt = pPg; pPg->inCkpt = 1;}static void page_remove_from_ckpt_list(PgHdr *pPg){ if( !pPg->inCkpt ) return; if( pPg->pPrevCkpt ){ assert( pPg->pPrevCkpt->pNextCkpt==pPg ); pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt; }else{ assert( pPg->pPager->pCkpt==pPg ); pPg->pPager->pCkpt = pPg->pNextCkpt; } if( pPg->pNextCkpt ){ assert( pPg->pNextCkpt->pPrevCkpt==pPg ); pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt; } pPg->pNextCkpt = 0; pPg->pPrevCkpt = 0; pPg->inCkpt = 0;}/*** Find a page in the hash table given its page number. Return** a pointer to the page or NULL if not found.*/static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){ PgHdr *p = pPager->aHash[pager_hash(pgno)]; while( p && p->pgno!=pgno ){ p = p->pNextHash; } return p;}/*** Unlock the database and clear the in-memory cache. This routine** sets the state of the pager back to what it was when it was first** opened. Any outstanding pages are invalidated and subsequent attempts** to access those pages will likely result in a coredump.*/static void pager_reset(Pager *pPager){ PgHdr *pPg, *pNext; for(pPg=pPager->pAll; pPg; pPg=pNext){ pNext = pPg->pNextAll; sqliteFree(pPg); } pPager->pFirst = 0; pPager->pFirstSynced = 0; pPager->pLast = 0; pPager->pAll = 0; memset(pPager->aHash, 0, sizeof(pPager->aHash)); pPager->nPage = 0; if( pPager->state>=SQLITE_WRITELOCK ){ sqlitepager_rollback(pPager); } sqliteOsUnlock(&pPager->fd); pPager->state = SQLITE_UNLOCK; pPager->dbSize = -1; pPager->nRef = 0; assert( pPager->journalOpen==0 );}/*** When this routine is called, the pager has the journal file open and** a write lock on the database. This routine releases the database** write lock and acquires a read lock in its place. The journal file** is deleted and closed.**** TODO: Consider keeping the journal file open for temporary databases.** This might give a performance improvement on windows where opening** a file is an expensive operation.*/static int pager_unwritelock(Pager *pPager){ int rc; PgHdr *pPg; if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK; sqlitepager_ckpt_commit(pPager); if( pPager->ckptOpen ){ sqliteOsClose(&pPager->cpfd); pPager->ckptOpen = 0; } if( pPager->journalOpen ){ sqliteOsClose(&pPager->jfd); pPager->journalOpen = 0; sqliteOsDelete(pPager->zJournal); sqliteFree( pPager->aInJournal ); pPager->aInJournal = 0; for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ pPg->inJournal = 0; pPg->dirty = 0; pPg->needSync = 0; } }else{ assert( pPager->dirtyFile==0 || pPager->useJournal==0 ); } rc = sqliteOsReadLock(&pPager->fd); if( rc==SQLITE_OK ){ pPager->state = SQLITE_READLOCK; }else{ /* This can only happen if a process does a BEGIN, then forks and the ** child process does the COMMIT. Because of the semantics of unix ** file locking, the unlock will fail. */ pPager->state = SQLITE_UNLOCK; } return rc;}/*** Compute and return a checksum for the page of data.**** This is not a real checksum. It is really just the sum of the ** random initial value and the page number. We considered do a checksum** of the database, but that was found to be too slow.*/static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){ u32 cksum = pPager->cksumInit + pgno; return cksum;}/*** Read a single page from the journal file opened on file descriptor** jfd. Playback this one page.**** There are three different journal formats. The format parameter determines** which format is used by the journal that is played back.*/static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){ int rc; PgHdr *pPg; /* An existing page in the cache */ PageRecord pgRec; u32 cksum; rc = read32bits(format, jfd, &pgRec.pgno); if( rc!=SQLITE_OK ) return rc; rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData)); if( rc!=SQLITE_OK ) return rc; /* Sanity checking on the page. This is more important that I originally ** thought. If a power failure occurs while the journal is being written, ** it could cause invalid data to be written into the journal. We need to ** detect this invalid data (with high probability) and ignore it. */ if( pgRec.pgno==0 ){ return SQLITE_DONE; } if( pgRec.pgno>(unsigned)pPager->dbSize ){ return SQLITE_OK; } if( format>=JOURNAL_FORMAT_3 ){ rc = read32bits(format, jfd, &cksum); if( rc ) return rc; if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){ return SQLITE_DONE; } } /* Playback the page. Update the in-memory copy of the page ** at the same time, if there is one. */ pPg = pager_lookup(pPager, pgRec.pgno); TRACE2("PLAYBACK %d\n", pgRec.pgno); sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE); rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE); if( pPg ){ /* No page should ever be rolled back that is in use, except for page ** 1 which is held in use in order to keep the lock on the database ** active. */ assert( pPg->nRef==0 || pPg->pgno==1 ); memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE); memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra); pPg->dirty = 0; pPg->needSync = 0; CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3); } return rc;}/*** Playback the journal and thus restore the database file to** the state it was in before we started making changes. **** The journal file format is as follows: **** * 8 byte prefix. One of the aJournalMagic123 vectors defined** above. The format of the journal file is determined by which** of the three prefix vectors is seen.** * 4 byte big-endian integer which is the number of valid page records** in the journal. If this value is 0xffffffff, then compute the** number of page records from the journal size. This field appears** in format 3 only.** * 4 byte big-endian integer which is the initial value for the ** sanity checksum. This field appears in format 3 only.** * 4 byte integer which is the number of pages to truncate the** database to during a rollback.** * Zero or more pages instances, each as follows:** + 4 byte page number.** + SQLITE_PAGE_SIZE bytes of data.** + 4 byte checksum (format 3 only)**** When we speak of the journal header, we mean the first 4 bullets above.** Each entry in the journal is an instance of the 5th bullet. Note that** bullets 2 and 3 only appear in format-3 journals.**** Call the value from the second bullet "nRec". nRec is the number of** valid page entries in the journal. In most cases, you can compute the** value of nRec from the size of the journal file. But if a power** failure occurred while the journal was being written, it could be the** case that the size of the journal file had already been increased but** the extra entries had not yet made it safely to disk. In such a case,** the value of nRec computed from the file size would be too large. For** that reason, we always use the nRec value in the header.**** If the nRec value is 0xffffffff it means that nRec should be computed** from the file size. This value is used when the user selects the** no-sync option for the journal. A power failure could lead to corruption** in this case. But for things like temporary table (which will be** deleted when the power is restored) we don't care. **** Journal formats 1 and 2 do not have an nRec value in the header so we** have to compute nRec from the file size. This has risks (as described** above) which is why all persistent tables have been changed to use** format 3.**** If the file opened as the journal file is not a well-formed** journal file then the database will likely already be** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask** and SQLITE_CORRUPT is returned. If it all works, then this routine** returns SQLITE_OK.*/static int pager_playback(Pager *pPager, int useJournalSize){ off_t szJ; /* Size of the journal file in bytes */ int nRec; /* Number of Records in the journal */ int i; /* Loop counter */ Pgno mxPg = 0; /* Size of the original file in pages */ int format; /* Format of the journal file. */ unsigned char aMagic[sizeof(aJournalMagic1)]; int rc; /* Figure out how many records are in the journal. Abort early if ** the journal is empty. */ assert( pPager->journalOpen ); sqliteOsSeek(&pPager->jfd, 0); rc = sqliteOsFileSize(&pPager->jfd, &szJ); if( rc!=SQLITE_OK ){ goto end_playback; } /* If the journal file is too small to contain a complete header, ** it must mean that the process that created the journal was just ** beginning to write the journal file when it died. In that case, ** the database file should have still been completely unchanged. ** Nothing needs to be rolled back. We can safely ignore this journal. */ if( szJ < sizeof(aMagic)+sizeof(Pgno) ){ goto end_playback; } /* Read the beginning of the journal and truncate the ** database file back to its original size. */ rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic)); if( rc!=SQLITE_OK ){ rc = SQLITE_PROTOCOL; goto end_playback; } if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){ format = JOURNAL_FORMAT_3; }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){ format = JOURNAL_FORMAT_2; }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){ format = JOURNAL_FORMAT_1; }else{ rc = SQLITE_PROTOCOL; goto end_playback; } if( format>=JOURNAL_FORMAT_3 ){ if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){ /* Ignore the journal if it is too small to contain a complete ** header. We already did this test once above, but at the prior ** test, we did not know the journal format and so we had to assume ** the smallest possible header. Now we know the header is bigger ** than the minimum so we test again. */ goto end_playback; } rc = read32bits(format, &pPager->jfd, (u32*)&nRec); if( rc ) goto end_playback; rc = read32bits(format, &pPager->jfd, &pPager->cksumInit); if( rc ) goto end_playback; if( nRec==0xffffffff || useJournalSize ){ nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3); } }else{ nRec = (szJ - JOURNAL_HDR_SZ(2))/JOURNAL_PG_SZ(2); assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ ); } rc = read32bits(format, &pPager->jfd, &mxPg); if( rc!=SQLITE_OK ){ goto end_playback; } assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg ); rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg); if( rc!=SQLITE_OK ){ goto end_playback; } pPager->dbSize = mxPg; /* Copy original pages out of the journal and back into the database file. */ for(i=0; i<nRec; i++){ rc = pager_playback_one_page(pPager, &pPager->jfd, format); if( rc!=SQLITE_OK ){ if( rc==SQLITE_DONE ){ rc = SQLITE_OK; } break; } } /* Pages that have been written to the journal but never synced ** where not restored by the loop above. We have to restore those ** pages by reading them back from the original database. */ if( rc==SQLITE_OK ){ PgHdr *pPg; for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ char zBuf[SQLITE_PAGE_SIZE]; if( !pPg->dirty ) continue; if( (int)pPg->pgno <= pPager->origDbSize ){ sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1)); rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE); TRACE2("REFETCH %d\n", pPg->pgno); CODEC(pPager, zBuf, pPg->pgno, 2); if( rc ) break; }else{ memset(zBuf, 0, SQLITE_PAGE_SIZE); } if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){ memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE); memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra); } pPg->needSync = 0; pPg->dirty = 0; } }end_playback: if( rc!=SQLITE_OK ){ pager_unwritelock(pPager); pPager->errMask |= PAGER_ERR_CORRUPT; rc = SQLITE_CORRUPT; }else{ rc = pager_unwritelock(pPager); } return rc;}/*** Playback the checkpoint journal.**** This is similar to playing back the transaction journal but with** a few extra twists.**** (1) The number of pages in the database file at the start of** the checkpoint is stored in pPager->ckptSize, not in the** journal file itself.**** (2) In addition to playing back the checkpoint journal, also** playback all pages of the transaction journal beginning** at offset pPager->ckptJSize.*/
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -