?? tcbdb.c
字號:
/************************************************************************************************* * The B+ tree database API of Tokyo Cabinet * Copyright (C) 2006-2009 Mikio Hirabayashi * This file is part of Tokyo Cabinet. * Tokyo Cabinet is free software; you can redistribute it and/or modify it under the terms of * the GNU Lesser General Public License as published by the Free Software Foundation; either * version 2.1 of the License or any later version. Tokyo Cabinet is distributed in the hope * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * You should have received a copy of the GNU Lesser General Public License along with Tokyo * Cabinet; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, * Boston, MA 02111-1307 USA. *************************************************************************************************/#include "tcutil.h"#include "tchdb.h"#include "tcbdb.h"#include "myconf.h"#define BDBOPAQUESIZ 64 // size of using opaque field#define BDBLEFTOPQSIZ 64 // size of left opaque field#define BDBPAGEBUFSIZ 32768 // size of a buffer to read each page#define BDBNODEIDBASE ((1LL<<48)+1) // base number of node ID#define BDBLEVELMAX 64 // max level of B+ tree#define BDBCACHEOUT 8 // number of pages in a process of cacheout#define BDBDEFLMEMB 128 // default number of members in each leaf#define BDBMINLMEMB 4 // minimum number of members in each leaf#define BDBDEFNMEMB 256 // default number of members in each node#define BDBMINNMEMB 4 // minimum number of members in each node#define BDBDEFBNUM 32749 // default bucket number#define BDBDEFAPOW 8 // default alignment power#define BDBDEFFPOW 10 // default free block pool power#define BDBDEFLCNUM 1024 // default number of leaf cache#define BDBDEFNCNUM 512 // default number of node cache#define BDBDEFLSMAX 16384 // default maximum size of each leaf#define BDBMINLSMAX 512 // minimum maximum size of each leaftypedef struct { // type of structure for a record int ksiz; // size of the key region int vsiz; // size of the value region TCLIST *rest; // list of value objects} BDBREC;typedef struct { // type of structure for a leaf page uint64_t id; // ID number of the leaf TCPTRLIST *recs; // list of records int size; // predicted size of serialized buffer uint64_t prev; // ID number of the previous leaf uint64_t next; // ID number of the next leaf bool dirty; // whether to be written back bool dead; // whether to be removed} BDBLEAF;typedef struct { // type of structure for a page index uint64_t pid; // ID number of the referring page int ksiz; // size of the key region} BDBIDX;typedef struct { // type of structure for a node page uint64_t id; // ID number of the node uint64_t heir; // ID of the child before the first index TCPTRLIST *idxs; // list of indices bool dirty; // whether to be written back bool dead; // whether to be removed} BDBNODE;enum { // enumeration for duplication behavior BDBPDOVER, // overwrite an existing value BDBPDKEEP, // keep the existing value BDBPDCAT, // concatenate values BDBPDDUP, // allow duplication of keys BDBPDDUPB, // allow backward duplication BDBPDADDINT, // add an integer BDBPDADDDBL, // add a real number BDBPDPROC // process by a callback function};typedef struct { // type of structure for a duplication callback TCPDPROC proc; // function pointer void *op; // opaque pointer} BDBPDPROCOP;/* private macros */#define BDBLOCKMETHOD(TC_bdb, TC_wr) \ ((TC_bdb)->mmtx ? tcbdblockmethod((TC_bdb), (TC_wr)) : true)#define BDBUNLOCKMETHOD(TC_bdb) \ ((TC_bdb)->mmtx ? tcbdbunlockmethod(TC_bdb) : true)#define BDBLOCKCACHE(TC_bdb) \ ((TC_bdb)->mmtx ? tcbdblockcache(TC_bdb) : true)#define BDBUNLOCKCACHE(TC_bdb) \ ((TC_bdb)->mmtx ? tcbdbunlockcache(TC_bdb) : true)#define BDBTHREADYIELD(TC_bdb) \ do { if((TC_bdb)->mmtx) sched_yield(); } while(false)/* private function prototypes */static void tcbdbclear(TCBDB *bdb);static void tcbdbdumpmeta(TCBDB *bdb);static void tcbdbloadmeta(TCBDB *bdb);static BDBLEAF *tcbdbleafnew(TCBDB *bdb, uint64_t prev, uint64_t next);static bool tcbdbleafcacheout(TCBDB *bdb, BDBLEAF *leaf);static bool tcbdbleafsave(TCBDB *bdb, BDBLEAF *leaf);static BDBLEAF *tcbdbleafload(TCBDB *bdb, uint64_t id);static bool tcbdbleafcheck(TCBDB *bdb, uint64_t id);static BDBLEAF *tcbdbgethistleaf(TCBDB *bdb, const char *kbuf, int ksiz, uint64_t id);static bool tcbdbleafaddrec(TCBDB *bdb, BDBLEAF *leaf, int dmode, const char *kbuf, int ksiz, const char *vbuf, int vsiz);static BDBLEAF *tcbdbleafdivide(TCBDB *bdb, BDBLEAF *leaf);static bool tcbdbleafkill(TCBDB *bdb, BDBLEAF *leaf);static BDBNODE *tcbdbnodenew(TCBDB *bdb, uint64_t heir);static bool tcbdbnodecacheout(TCBDB *bdb, BDBNODE *node);static bool tcbdbnodesave(TCBDB *bdb, BDBNODE *node);static BDBNODE *tcbdbnodeload(TCBDB *bdb, uint64_t id);static void tcbdbnodeaddidx(TCBDB *bdb, BDBNODE *node, bool order, uint64_t pid, const char *kbuf, int ksiz);static bool tcbdbnodesubidx(TCBDB *bdb, BDBNODE *node, uint64_t pid);static uint64_t tcbdbsearchleaf(TCBDB *bdb, const char *kbuf, int ksiz);static BDBREC *tcbdbsearchrec(TCBDB *bdb, BDBLEAF *leaf, const char *kbuf, int ksiz, int *ip);static void tcbdbremoverec(TCBDB *bdb, BDBLEAF *leaf, BDBREC *rec, int ri);static bool tcbdbcacheadjust(TCBDB *bdb);static void tcbdbcachepurge(TCBDB *bdb);static bool tcbdbopenimpl(TCBDB *bdb, const char *path, int omode);static bool tcbdbcloseimpl(TCBDB *bdb);static bool tcbdbputimpl(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz, int dmode);static bool tcbdboutimpl(TCBDB *bdb, const char *kbuf, int ksiz);static bool tcbdboutlist(TCBDB *bdb, const char *kbuf, int ksiz);static const char *tcbdbgetimpl(TCBDB *bdb, const char *kbuf, int ksiz, int *sp);static int tcbdbgetnum(TCBDB *bdb, const char *kbuf, int ksiz);static TCLIST *tcbdbgetlist(TCBDB *bdb, const char *kbuf, int ksiz);static bool tcbdbrangeimpl(TCBDB *bdb, const char *bkbuf, int bksiz, bool binc, const char *ekbuf, int eksiz, bool einc, int max, TCLIST *keys);static bool tcbdbrangefwm(TCBDB *bdb, const char *pbuf, int psiz, int max, TCLIST *keys);static bool tcbdboptimizeimpl(TCBDB *bdb, int32_t lmemb, int32_t nmemb, int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts);static bool tcbdbvanishimpl(TCBDB *bdb);static bool tcbdblockmethod(TCBDB *bdb, bool wr);static bool tcbdbunlockmethod(TCBDB *bdb);static bool tcbdblockcache(TCBDB *bdb);static bool tcbdbunlockcache(TCBDB *bdb);static bool tcbdbcurfirstimpl(BDBCUR *cur);static bool tcbdbcurlastimpl(BDBCUR *cur);static bool tcbdbcurjumpimpl(BDBCUR *cur, const char *kbuf, int ksiz, bool forward);static bool tcbdbcuradjust(BDBCUR *cur, bool forward);static bool tcbdbcurprevimpl(BDBCUR *cur);static bool tcbdbcurnextimpl(BDBCUR *cur);static bool tcbdbcurputimpl(BDBCUR *cur, const char *vbuf, int vsiz, int mode);static bool tcbdbcuroutimpl(BDBCUR *cur);static bool tcbdbcurrecimpl(BDBCUR *cur, const char **kbp, int *ksp, const char **vbp, int *vsp);static bool tcbdbforeachimpl(TCBDB *bdb, TCITER iter, void *op);/* debugging function prototypes */void tcbdbprintmeta(TCBDB *bdb);void tcbdbprintleaf(TCBDB *bdb, BDBLEAF *leaf);void tcbdbprintnode(TCBDB *bdb, BDBNODE *node);/************************************************************************************************* * API *************************************************************************************************//* Get the message string corresponding to an error code. */const char *tcbdberrmsg(int ecode){ return tcerrmsg(ecode);}/* Create a B+ tree database object. */TCBDB *tcbdbnew(void){ TCBDB *bdb; TCMALLOC(bdb, sizeof(*bdb)); tcbdbclear(bdb); bdb->hdb = tchdbnew(); TCMALLOC(bdb->hist, sizeof(*bdb->hist) * BDBLEVELMAX); tchdbtune(bdb->hdb, BDBDEFBNUM, BDBDEFAPOW, BDBDEFFPOW, 0); tchdbsetxmsiz(bdb->hdb, 0); return bdb;}/* Delete a B+ tree database object. */void tcbdbdel(TCBDB *bdb){ assert(bdb); if(bdb->open) tcbdbclose(bdb); TCFREE(bdb->hist); tchdbdel(bdb->hdb); if(bdb->mmtx){ pthread_mutex_destroy(bdb->cmtx); pthread_rwlock_destroy(bdb->mmtx); TCFREE(bdb->cmtx); TCFREE(bdb->mmtx); } TCFREE(bdb);}/* Get the last happened error code of a B+ tree database object. */int tcbdbecode(TCBDB *bdb){ assert(bdb); return tchdbecode(bdb->hdb);}/* Set mutual exclusion control of a B+ tree database object for threading. */bool tcbdbsetmutex(TCBDB *bdb){ assert(bdb); if(!TCUSEPTHREAD) return true; if(bdb->mmtx || bdb->open){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); return false; } TCMALLOC(bdb->mmtx, sizeof(pthread_rwlock_t)); TCMALLOC(bdb->cmtx, sizeof(pthread_mutex_t)); bool err = false; if(pthread_rwlock_init(bdb->mmtx, NULL) != 0) err = true; if(pthread_mutex_init(bdb->cmtx, NULL) != 0) err = true; if(err){ TCFREE(bdb->cmtx); TCFREE(bdb->mmtx); bdb->cmtx = NULL; bdb->mmtx = NULL; return false; } return tchdbsetmutex(bdb->hdb);}/* Set the custom comparison function of a B+ tree database object. */bool tcbdbsetcmpfunc(TCBDB *bdb, TCCMP cmp, void *cmpop){ assert(bdb && cmp); if(bdb->open){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); return false; } bdb->cmp = cmp; bdb->cmpop = cmpop; return true;}/* Set the tuning parameters of a B+ tree database object. */bool tcbdbtune(TCBDB *bdb, int32_t lmemb, int32_t nmemb, int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts){ assert(bdb); if(bdb->open){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); return false; } bdb->lmemb = (lmemb > 0) ? tclmax(lmemb, BDBMINLMEMB) : BDBDEFLMEMB; bdb->nmemb = (nmemb > 0) ? tclmax(nmemb, BDBMINNMEMB) : BDBDEFNMEMB; bdb->opts = opts; uint8_t hopts = 0; if(opts & BDBTLARGE) hopts |= HDBTLARGE; if(opts & BDBTDEFLATE) hopts |= HDBTDEFLATE; if(opts & BDBTBZIP) hopts |= HDBTBZIP; if(opts & BDBTTCBS) hopts |= HDBTTCBS; if(opts & BDBTEXCODEC) hopts |= HDBTEXCODEC; bnum = (bnum > 0) ? bnum : BDBDEFBNUM; apow = (apow >= 0) ? apow : BDBDEFAPOW; fpow = (fpow >= 0) ? fpow : BDBDEFFPOW; return tchdbtune(bdb->hdb, bnum, apow, fpow, hopts);}/* Set the caching parameters of a B+ tree database object. */bool tcbdbsetcache(TCBDB *bdb, int32_t lcnum, int32_t ncnum){ assert(bdb); if(bdb->open){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); return false; } if(lcnum > 0) bdb->lcnum = tclmax(lcnum, BDBLEVELMAX); if(ncnum > 0) bdb->ncnum = tclmax(ncnum, BDBLEVELMAX); return true;}/* Set the size of the extra mapped memory of a B+ tree database object. */bool tcbdbsetxmsiz(TCBDB *bdb, int64_t xmsiz){ assert(bdb); if(bdb->open){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); return false; } return tchdbsetxmsiz(bdb->hdb, xmsiz);}/* Open a database file and connect a B+ tree database object. */bool tcbdbopen(TCBDB *bdb, const char *path, int omode){ assert(bdb && path); if(!BDBLOCKMETHOD(bdb, true)) return false; if(bdb->open){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); BDBUNLOCKMETHOD(bdb); return false; } bool rv = tcbdbopenimpl(bdb, path, omode); BDBUNLOCKMETHOD(bdb); return rv;}/* Close a B+ tree database object. */bool tcbdbclose(TCBDB *bdb){ assert(bdb); if(!BDBLOCKMETHOD(bdb, true)) return false; if(!bdb->open){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); BDBUNLOCKMETHOD(bdb); return false; } bool rv = tcbdbcloseimpl(bdb); BDBUNLOCKMETHOD(bdb); return rv;}/* Store a record into a B+ tree database object. */bool tcbdbput(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){ assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0); if(!BDBLOCKMETHOD(bdb, true)) return false; if(!bdb->open || !bdb->wmode){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); BDBUNLOCKMETHOD(bdb); return false; } bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDOVER); BDBUNLOCKMETHOD(bdb); return rv;}/* Store a string record into a B+ tree database object. */bool tcbdbput2(TCBDB *bdb, const char *kstr, const char *vstr){ assert(bdb && kstr && vstr); return tcbdbput(bdb, kstr, strlen(kstr), vstr, strlen(vstr));}/* Store a new record into a B+ tree database object. */bool tcbdbputkeep(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){ assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0); if(!BDBLOCKMETHOD(bdb, true)) return false; if(!bdb->open || !bdb->wmode){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); BDBUNLOCKMETHOD(bdb); return false; } bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDKEEP); BDBUNLOCKMETHOD(bdb); return rv;}/* Store a new string record into a B+ tree database object. */bool tcbdbputkeep2(TCBDB *bdb, const char *kstr, const char *vstr){ assert(bdb && kstr && vstr); return tcbdbputkeep(bdb, kstr, strlen(kstr), vstr, strlen(vstr));}/* Concatenate a value at the end of the existing record in a B+ tree database object. */bool tcbdbputcat(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){ assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0); if(!BDBLOCKMETHOD(bdb, true)) return false; if(!bdb->open || !bdb->wmode){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); BDBUNLOCKMETHOD(bdb); return false; } bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDCAT); BDBUNLOCKMETHOD(bdb); return rv;}/* Concatenate a string value at the end of the existing record in a B+ tree database object. */bool tcbdbputcat2(TCBDB *bdb, const char *kstr, const char *vstr){ assert(bdb && kstr && vstr); return tcbdbputcat(bdb, kstr, strlen(kstr), vstr, strlen(vstr));}/* Store a record into a B+ tree database object with allowing duplication of keys. */bool tcbdbputdup(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){ assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0); if(!BDBLOCKMETHOD(bdb, true)) return false; if(!bdb->open || !bdb->wmode){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); BDBUNLOCKMETHOD(bdb); return false; } bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDDUP); BDBUNLOCKMETHOD(bdb); return rv;}/* Store a string record into a B+ tree database object with allowing duplication of keys. */bool tcbdbputdup2(TCBDB *bdb, const char *kstr, const char *vstr){ assert(bdb && kstr && vstr); return tcbdbputdup(bdb, kstr, strlen(kstr), vstr, strlen(vstr));}/* Store records into a B+ tree database object with allowing duplication of keys. */bool tcbdbputdup3(TCBDB *bdb, const void *kbuf, int ksiz, const TCLIST *vals){ assert(bdb && kbuf && ksiz >= 0 && vals); if(!BDBLOCKMETHOD(bdb, true)) return false; if(!bdb->open || !bdb->wmode){ tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__); BDBUNLOCKMETHOD(bdb); return false; } bool err = false; int ln = TCLISTNUM(vals); for(int i = 0; i < ln; i++){ const char *vbuf; int vsiz; TCLISTVAL(vbuf, vals, i, vsiz); if(!tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDDUP)) err = true; } BDBUNLOCKMETHOD(bdb); return !err;}/* Remove a record of a B+ tree database object. */bool tcbdbout(TCBDB *bdb, const void *kbuf, int ksiz){ assert(bdb && kbuf && ksiz >= 0); if(!BDBLOCKMETHOD(bdb, true)) return false; if(!bdb->open || !bdb->wmode){
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -