?? btr0btr.c
字號:
/******************************************************The B-tree(c) 1994-1996 Innobase OyCreated 6/2/1994 Heikki Tuuri*******************************************************/ #include "btr0btr.h"#ifdef UNIV_NONINL#include "btr0btr.ic"#endif#include "fsp0fsp.h"#include "page0page.h"#include "btr0cur.h"#include "btr0sea.h"#include "btr0pcur.h"#include "rem0cmp.h"#include "lock0lock.h"#include "ibuf0ibuf.h"#include "trx0trx.h"/*Latching strategy of the InnoDB B-tree--------------------------------------A tree latch protects all non-leaf nodes of the tree. Each node of a treealso has a latch of its own.A B-tree operation normally first acquires an S-latch on the tree. Itsearches down the tree and releases the tree latch when it has theleaf node latch. To save CPU time we do not acquire any latch onnon-leaf nodes of the tree during a search, those pages are only bufferfixed.If an operation needs to restructure the tree, it acquires an X-latch onthe tree before searching to a leaf node. If it needs, for example, tosplit a leaf,(1) InnoDB decides the split point in the leaf,(2) allocates a new page,(3) inserts the appropriate node pointer to the first non-leaf level,(4) releases the tree X-latch,(5) and then moves records from the leaf to the new allocated page.Node pointers-------------Leaf pages of a B-tree contain the index records stored in thetree. On levels n > 0 we store 'node pointers' to pages on leveln - 1. For each page there is exactly one node pointer stored:thus the our tree is an ordinary B-tree, not a B-link tree.A node pointer contains a prefix P of an index record. The prefixis long enough so that it determines an index record uniquely.The file page number of the child page is added as the lastfield. To the child page we can store node pointers or index recordswhich are >= P in the alphabetical order, but < P1 if there isa next node pointer on the level, and P1 is its prefix.If a node pointer with a prefix P points to a non-leaf child, then the leftmost record in the child must have the sameprefix P. If it points to a leaf node, the child is not requiredto contain any record with a prefix equal to P. The leaf caseis decided this way to allow arbitrary deletions in a leaf nodewithout touching upper levels of the tree.We have predefined a special minimum record which wedefine as the smallest record in any alphabetical order.A minimum record is denoted by setting a bit in the recordheader. A minimum record acts as the prefix of a node pointerwhich points to a leftmost node on any level of the tree.File page allocation--------------------In the root node of a B-tree there are two file segment headers.The leaf pages of a tree are allocated from one file segment, tomake them consecutive on disk if possible. From the other file segmentwe allocate pages for the non-leaf levels of the tree.*//******************************************************************Creates a new index page to the tree (not the root, and also notused in page reorganization). */staticvoidbtr_page_create(/*============*/ page_t* page, /* in: page to be created */ dict_tree_t* tree, /* in: index tree */ mtr_t* mtr); /* in: mtr *//****************************************************************Returns the upper level node pointer to a page. It is assumed thatmtr holds an x-latch on the tree. */staticrec_t*btr_page_get_father_node_ptr(/*=========================*/ /* out: pointer to node pointer record */ dict_tree_t* tree, /* in: index tree */ page_t* page, /* in: page: must contain at least one user record */ mtr_t* mtr); /* in: mtr *//*****************************************************************Empties an index page. */staticvoidbtr_page_empty(/*===========*/ page_t* page, /* in: page to be emptied */ mtr_t* mtr); /* in: mtr *//*****************************************************************Returns TRUE if the insert fits on the appropriate half-pagewith the chosen split_rec. */staticiboolbtr_page_insert_fits(/*=================*/ /* out: TRUE if fits */ btr_cur_t* cursor, /* in: cursor at which insert should be made */ rec_t* split_rec, /* in: suggestion for first record on upper half-page, or NULL if tuple should be first */ const ulint* offsets, /* in: rec_get_offsets( split_rec, cursor->index) */ dtuple_t* tuple, /* in: tuple to insert */ mem_heap_t* heap); /* in: temporary memory heap *//******************************************************************Gets the root node of a tree and x-latches it. */page_t*btr_root_get(/*=========*/ /* out: root page, x-latched */ dict_tree_t* tree, /* in: index tree */ mtr_t* mtr) /* in: mtr */{ ulint space; ulint root_page_no; page_t* root; space = dict_tree_get_space(tree); root_page_no = dict_tree_get_page(tree); root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); ut_a((ibool)!!page_is_comp(root) == UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp); return(root);}/*****************************************************************Gets pointer to the previous user record in the tree. It is assumed thatthe caller has appropriate latches on the page and its neighbor. */rec_t*btr_get_prev_user_rec(/*==================*/ /* out: previous user record, NULL if there is none */ rec_t* rec, /* in: record on leaf level */ mtr_t* mtr) /* in: mtr holding a latch on the page, and if needed, also to the previous page */{ page_t* page; page_t* prev_page; ulint prev_page_no; ulint space; if (!page_rec_is_infimum(rec)) { rec_t* prev_rec = page_rec_get_prev(rec); if (!page_rec_is_infimum(prev_rec)) { return(prev_rec); } } page = buf_frame_align(rec); prev_page_no = btr_page_get_prev(page, mtr); space = buf_frame_get_space_id(page); if (prev_page_no != FIL_NULL) { prev_page = buf_page_get_with_no_latch(space, prev_page_no, mtr); /* The caller must already have a latch to the brother */ ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page), MTR_MEMO_PAGE_S_FIX)) || (mtr_memo_contains(mtr, buf_block_align(prev_page), MTR_MEMO_PAGE_X_FIX))); ut_a(page_is_comp(prev_page) == page_is_comp(page)); return(page_rec_get_prev(page_get_supremum_rec(prev_page))); } return(NULL);}/*****************************************************************Gets pointer to the next user record in the tree. It is assumed that thecaller has appropriate latches on the page and its neighbor. */rec_t*btr_get_next_user_rec(/*==================*/ /* out: next user record, NULL if there is none */ rec_t* rec, /* in: record on leaf level */ mtr_t* mtr) /* in: mtr holding a latch on the page, and if needed, also to the next page */{ page_t* page; page_t* next_page; ulint next_page_no; ulint space; if (!page_rec_is_supremum(rec)) { rec_t* next_rec = page_rec_get_next(rec); if (!page_rec_is_supremum(next_rec)) { return(next_rec); } } page = buf_frame_align(rec); next_page_no = btr_page_get_next(page, mtr); space = buf_frame_get_space_id(page); if (next_page_no != FIL_NULL) { next_page = buf_page_get_with_no_latch(space, next_page_no, mtr); /* The caller must already have a latch to the brother */ ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page), MTR_MEMO_PAGE_S_FIX)) || (mtr_memo_contains(mtr, buf_block_align(next_page), MTR_MEMO_PAGE_X_FIX))); ut_a(page_is_comp(next_page) == page_is_comp(page)); return(page_rec_get_next(page_get_infimum_rec(next_page))); } return(NULL);}/******************************************************************Creates a new index page to the tree (not the root, and also not used inpage reorganization). */staticvoidbtr_page_create(/*============*/ page_t* page, /* in: page to be created */ dict_tree_t* tree, /* in: index tree */ mtr_t* mtr) /* in: mtr */{ ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); page_create(page, mtr, UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp); buf_block_align(page)->check_index_page_at_flush = TRUE; btr_page_set_index_id(page, tree->id, mtr);}/******************************************************************Allocates a new file page to be used in an ibuf tree. Takes the page fromthe free list of the tree, which must contain pages! */staticpage_t*btr_page_alloc_for_ibuf(/*====================*/ /* out: new allocated page, x-latched */ dict_tree_t* tree, /* in: index tree */ mtr_t* mtr) /* in: mtr */{ fil_addr_t node_addr; page_t* root; page_t* new_page; root = btr_root_get(tree, mtr); node_addr = flst_get_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr); ut_a(node_addr.page != FIL_NULL); new_page = buf_page_get(dict_tree_get_space(tree), node_addr.page, RW_X_LATCH, mtr);#ifdef UNIV_SYNC_DEBUG buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);#endif /* UNIV_SYNC_DEBUG */ flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); return(new_page);}/******************************************************************Allocates a new file page to be used in an index tree. NOTE: we assumethat the caller has made the reservation for free extents! */page_t*btr_page_alloc(/*===========*/ /* out: new allocated page, x-latched; NULL if out of space */ dict_tree_t* tree, /* in: index tree */ ulint hint_page_no, /* in: hint of a good page */ byte file_direction, /* in: direction where a possible page split is made */ ulint level, /* in: level where the page is placed in the tree */ mtr_t* mtr) /* in: mtr */{ fseg_header_t* seg_header; page_t* root; page_t* new_page; ulint new_page_no; if (tree->type & DICT_IBUF) { return(btr_page_alloc_for_ibuf(tree, mtr)); } root = btr_root_get(tree, mtr); if (level == 0) { seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; } else { seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; } /* Parameter TRUE below states that the caller has made the reservation for free extents, and thus we know that a page can be allocated: */ new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no, file_direction, TRUE, mtr); if (new_page_no == FIL_NULL) { return(NULL); } new_page = buf_page_get(dict_tree_get_space(tree), new_page_no, RW_X_LATCH, mtr);#ifdef UNIV_SYNC_DEBUG buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);#endif /* UNIV_SYNC_DEBUG */ return(new_page);} /******************************************************************Gets the number of pages in a B-tree. */ulintbtr_get_size(/*=========*/ /* out: number of pages */ dict_index_t* index, /* in: index */ ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */{ fseg_header_t* seg_header; page_t* root; ulint n; ulint dummy; mtr_t mtr; mtr_start(&mtr); mtr_s_lock(dict_tree_get_lock(index->tree), &mtr); root = btr_root_get(index->tree, &mtr); if (flag == BTR_N_LEAF_PAGES) { seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; fseg_n_reserved_pages(seg_header, &n, &mtr); } else if (flag == BTR_TOTAL_SIZE) { seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; n = fseg_n_reserved_pages(seg_header, &dummy, &mtr); seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; n += fseg_n_reserved_pages(seg_header, &dummy, &mtr); } else { ut_error; } mtr_commit(&mtr); return(n);} /******************************************************************Frees a page used in an ibuf tree. Puts the page to the free list of theibuf tree. */staticvoidbtr_page_free_for_ibuf(/*===================*/ dict_tree_t* tree, /* in: index tree */ page_t* page, /* in: page to be freed, x-latched */ mtr_t* mtr) /* in: mtr */{ page_t* root; ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); root = btr_root_get(tree, mtr); flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));}/******************************************************************Frees a file page used in an index tree. Can be used also to (BLOB)external storage pages, because the page level 0 can be given as anargument. */voidbtr_page_free_low(/*==============*/ dict_tree_t* tree, /* in: index tree */ page_t* page, /* in: page to be freed, x-latched */ ulint level, /* in: page level */ mtr_t* mtr) /* in: mtr */{ fseg_header_t* seg_header; page_t* root; ulint space; ulint page_no; ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); /* The page gets invalid for optimistic searches: increment the frame modify clock */ buf_frame_modify_clock_inc(page); if (tree->type & DICT_IBUF) { btr_page_free_for_ibuf(tree, page, mtr); return; } root = btr_root_get(tree, mtr); if (level == 0) { seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -