?? hugetlb.c
字號:
/* * Generic hugetlb support. * (C) William Irwin, April 2004 */#include <linux/gfp.h>#include <linux/list.h>#include <linux/init.h>#include <linux/module.h>#include <linux/mm.h>#include <linux/seq_file.h>#include <linux/sysctl.h>#include <linux/highmem.h>#include <linux/mmu_notifier.h>#include <linux/nodemask.h>#include <linux/pagemap.h>#include <linux/mempolicy.h>#include <linux/cpuset.h>#include <linux/mutex.h>#include <linux/bootmem.h>#include <linux/sysfs.h>#include <asm/page.h>#include <asm/pgtable.h>#include <asm/io.h>#include <linux/hugetlb.h>#include "internal.h"const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;static gfp_t htlb_alloc_mask = GFP_HIGHUSER;unsigned long hugepages_treat_as_movable;static int max_hstate;unsigned int default_hstate_idx;struct hstate hstates[HUGE_MAX_HSTATE];__initdata LIST_HEAD(huge_boot_pages);/* for command line parsing */static struct hstate * __initdata parsed_hstate;static unsigned long __initdata default_hstate_max_huge_pages;static unsigned long __initdata default_hstate_size;#define for_each_hstate(h) \ for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++)/* * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages */static DEFINE_SPINLOCK(hugetlb_lock);/* * Region tracking -- allows tracking of reservations and instantiated pages * across the pages in a mapping. * * The region data structures are protected by a combination of the mmap_sem * and the hugetlb_instantion_mutex. To access or modify a region the caller * must either hold the mmap_sem for write, or the mmap_sem for read and * the hugetlb_instantiation mutex: * * down_write(&mm->mmap_sem); * or * down_read(&mm->mmap_sem); * mutex_lock(&hugetlb_instantiation_mutex); */struct file_region { struct list_head link; long from; long to;};static long region_add(struct list_head *head, long f, long t){ struct file_region *rg, *nrg, *trg; /* Locate the region we are either in or before. */ list_for_each_entry(rg, head, link) if (f <= rg->to) break; /* Round our left edge to the current segment if it encloses us. */ if (f > rg->from) f = rg->from; /* Check for and consume any regions we now overlap with. */ nrg = rg; list_for_each_entry_safe(rg, trg, rg->link.prev, link) { if (&rg->link == head) break; if (rg->from > t) break; /* If this area reaches higher then extend our area to * include it completely. If this is not the first area * which we intend to reuse, free it. */ if (rg->to > t) t = rg->to; if (rg != nrg) { list_del(&rg->link); kfree(rg); } } nrg->from = f; nrg->to = t; return 0;}static long region_chg(struct list_head *head, long f, long t){ struct file_region *rg, *nrg; long chg = 0; /* Locate the region we are before or in. */ list_for_each_entry(rg, head, link) if (f <= rg->to) break; /* If we are below the current region then a new region is required. * Subtle, allocate a new region at the position but make it zero * size such that we can guarantee to record the reservation. */ if (&rg->link == head || t < rg->from) { nrg = kmalloc(sizeof(*nrg), GFP_KERNEL); if (!nrg) return -ENOMEM; nrg->from = f; nrg->to = f; INIT_LIST_HEAD(&nrg->link); list_add(&nrg->link, rg->link.prev); return t - f; } /* Round our left edge to the current segment if it encloses us. */ if (f > rg->from) f = rg->from; chg = t - f; /* Check for and consume any regions we now overlap with. */ list_for_each_entry(rg, rg->link.prev, link) { if (&rg->link == head) break; if (rg->from > t) return chg; /* We overlap with this area, if it extends futher than * us then we must extend ourselves. Account for its * existing reservation. */ if (rg->to > t) { chg += rg->to - t; t = rg->to; } chg -= rg->to - rg->from; } return chg;}static long region_truncate(struct list_head *head, long end){ struct file_region *rg, *trg; long chg = 0; /* Locate the region we are either in or before. */ list_for_each_entry(rg, head, link) if (end <= rg->to) break; if (&rg->link == head) return 0; /* If we are in the middle of a region then adjust it. */ if (end > rg->from) { chg = rg->to - end; rg->to = end; rg = list_entry(rg->link.next, typeof(*rg), link); } /* Drop any remaining regions. */ list_for_each_entry_safe(rg, trg, rg->link.prev, link) { if (&rg->link == head) break; chg += rg->to - rg->from; list_del(&rg->link); kfree(rg); } return chg;}static long region_count(struct list_head *head, long f, long t){ struct file_region *rg; long chg = 0; /* Locate each segment we overlap with, and count that overlap. */ list_for_each_entry(rg, head, link) { int seg_from; int seg_to; if (rg->to <= f) continue; if (rg->from >= t) break; seg_from = max(rg->from, f); seg_to = min(rg->to, t); chg += seg_to - seg_from; } return chg;}/* * Convert the address within this vma to the page offset within * the mapping, in pagecache page units; huge pages here. */static pgoff_t vma_hugecache_offset(struct hstate *h, struct vm_area_struct *vma, unsigned long address){ return ((address - vma->vm_start) >> huge_page_shift(h)) + (vma->vm_pgoff >> huge_page_order(h));}/* * Return the size of the pages allocated when backing a VMA. In the majority * cases this will be same size as used by the page table entries. */unsigned long vma_kernel_pagesize(struct vm_area_struct *vma){ struct hstate *hstate; if (!is_vm_hugetlb_page(vma)) return PAGE_SIZE; hstate = hstate_vma(vma); return 1UL << (hstate->order + PAGE_SHIFT);}/* * Return the page size being used by the MMU to back a VMA. In the majority * of cases, the page size used by the kernel matches the MMU size. On * architectures where it differs, an architecture-specific version of this * function is required. */#ifndef vma_mmu_pagesizeunsigned long vma_mmu_pagesize(struct vm_area_struct *vma){ return vma_kernel_pagesize(vma);}#endif/* * Flags for MAP_PRIVATE reservations. These are stored in the bottom * bits of the reservation map pointer, which are always clear due to * alignment. */#define HPAGE_RESV_OWNER (1UL << 0)#define HPAGE_RESV_UNMAPPED (1UL << 1)#define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED)/* * These helpers are used to track how many pages are reserved for * faults in a MAP_PRIVATE mapping. Only the process that called mmap() * is guaranteed to have their future faults succeed. * * With the exception of reset_vma_resv_huge_pages() which is called at fork(), * the reserve counters are updated with the hugetlb_lock held. It is safe * to reset the VMA at fork() time as it is not in use yet and there is no * chance of the global counters getting corrupted as a result of the values. * * The private mapping reservation is represented in a subtly different * manner to a shared mapping. A shared mapping has a region map associated * with the underlying file, this region map represents the backing file * pages which have ever had a reservation assigned which this persists even * after the page is instantiated. A private mapping has a region map * associated with the original mmap which is attached to all VMAs which * reference it, this region map represents those offsets which have consumed * reservation ie. where pages have been instantiated. */static unsigned long get_vma_private_data(struct vm_area_struct *vma){ return (unsigned long)vma->vm_private_data;}static void set_vma_private_data(struct vm_area_struct *vma, unsigned long value){ vma->vm_private_data = (void *)value;}struct resv_map { struct kref refs; struct list_head regions;};static struct resv_map *resv_map_alloc(void){ struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL); if (!resv_map) return NULL; kref_init(&resv_map->refs); INIT_LIST_HEAD(&resv_map->regions); return resv_map;}static void resv_map_release(struct kref *ref){ struct resv_map *resv_map = container_of(ref, struct resv_map, refs); /* Clear out any active regions before we release the map. */ region_truncate(&resv_map->regions, 0); kfree(resv_map);}static struct resv_map *vma_resv_map(struct vm_area_struct *vma){ VM_BUG_ON(!is_vm_hugetlb_page(vma)); if (!(vma->vm_flags & VM_SHARED)) return (struct resv_map *)(get_vma_private_data(vma) & ~HPAGE_RESV_MASK); return NULL;}static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map){ VM_BUG_ON(!is_vm_hugetlb_page(vma)); VM_BUG_ON(vma->vm_flags & VM_SHARED); set_vma_private_data(vma, (get_vma_private_data(vma) & HPAGE_RESV_MASK) | (unsigned long)map);}static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags){ VM_BUG_ON(!is_vm_hugetlb_page(vma)); VM_BUG_ON(vma->vm_flags & VM_SHARED); set_vma_private_data(vma, get_vma_private_data(vma) | flags);}static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag){ VM_BUG_ON(!is_vm_hugetlb_page(vma)); return (get_vma_private_data(vma) & flag) != 0;}/* Decrement the reserved pages in the hugepage pool by one */static void decrement_hugepage_resv_vma(struct hstate *h, struct vm_area_struct *vma){ if (vma->vm_flags & VM_NORESERVE) return; if (vma->vm_flags & VM_SHARED) { /* Shared mappings always use reserves */ h->resv_huge_pages--; } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { /* * Only the process that called mmap() has reserves for * private mappings. */ h->resv_huge_pages--; }}/* Reset counters to 0 and clear all HPAGE_RESV_* flags */void reset_vma_resv_huge_pages(struct vm_area_struct *vma){ VM_BUG_ON(!is_vm_hugetlb_page(vma)); if (!(vma->vm_flags & VM_SHARED)) vma->vm_private_data = (void *)0;}/* Returns true if the VMA has associated reserve pages */static int vma_has_reserves(struct vm_area_struct *vma){ if (vma->vm_flags & VM_SHARED) return 1; if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) return 1; return 0;}static void clear_gigantic_page(struct page *page, unsigned long addr, unsigned long sz){ int i; struct page *p = page; might_sleep(); for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { cond_resched(); clear_user_highpage(p, addr + i * PAGE_SIZE); }}static void clear_huge_page(struct page *page, unsigned long addr, unsigned long sz){ int i; if (unlikely(sz > MAX_ORDER_NR_PAGES)) { clear_gigantic_page(page, addr, sz); return; } might_sleep(); for (i = 0; i < sz/PAGE_SIZE; i++) { cond_resched(); clear_user_highpage(page + i, addr + i * PAGE_SIZE); }}static void copy_gigantic_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma){ int i; struct hstate *h = hstate_vma(vma); struct page *dst_base = dst; struct page *src_base = src; might_sleep(); for (i = 0; i < pages_per_huge_page(h); ) { cond_resched(); copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); i++; dst = mem_map_next(dst, dst_base, i); src = mem_map_next(src, src_base, i); }}static void copy_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma){ int i; struct hstate *h = hstate_vma(vma); if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { copy_gigantic_page(dst, src, addr, vma); return; } might_sleep(); for (i = 0; i < pages_per_huge_page(h); i++) { cond_resched(); copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); }}static void enqueue_huge_page(struct hstate *h, struct page *page){ int nid = page_to_nid(page); list_add(&page->lru, &h->hugepage_freelists[nid]); h->free_huge_pages++; h->free_huge_pages_node[nid]++;}static struct page *dequeue_huge_page(struct hstate *h){ int nid; struct page *page = NULL; for (nid = 0; nid < MAX_NUMNODES; ++nid) { if (!list_empty(&h->hugepage_freelists[nid])) { page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); list_del(&page->lru); h->free_huge_pages--; h->free_huge_pages_node[nid]--; break;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -