?? hugetlb.c
字號:
h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); h->nr_huge_pages = 0; h->free_huge_pages = 0; for (i = 0; i < MAX_NUMNODES; ++i) INIT_LIST_HEAD(&h->hugepage_freelists[i]); h->hugetlb_next_nid = first_node(node_online_map); snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", huge_page_size(h)/1024); parsed_hstate = h;}static int __init hugetlb_nrpages_setup(char *s){ unsigned long *mhp; static unsigned long *last_mhp; /* * !max_hstate means we haven't parsed a hugepagesz= parameter yet, * so this hugepages= parameter goes to the "default hstate". */ if (!max_hstate) mhp = &default_hstate_max_huge_pages; else mhp = &parsed_hstate->max_huge_pages; if (mhp == last_mhp) { printk(KERN_WARNING "hugepages= specified twice without " "interleaving hugepagesz=, ignoring\n"); return 1; } if (sscanf(s, "%lu", mhp) <= 0) *mhp = 0; /* * Global state is always initialized later in hugetlb_init. * But we need to allocate >= MAX_ORDER hstates here early to still * use the bootmem allocator. */ if (max_hstate && parsed_hstate->order >= MAX_ORDER) hugetlb_hstate_alloc_pages(parsed_hstate); last_mhp = mhp; return 1;}__setup("hugepages=", hugetlb_nrpages_setup);static int __init hugetlb_default_setup(char *s){ default_hstate_size = memparse(s, &s); return 1;}__setup("default_hugepagesz=", hugetlb_default_setup);static unsigned int cpuset_mems_nr(unsigned int *array){ int node; unsigned int nr = 0; for_each_node_mask(node, cpuset_current_mems_allowed) nr += array[node]; return nr;}#ifdef CONFIG_SYSCTLint hugetlb_sysctl_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos){ struct hstate *h = &default_hstate; unsigned long tmp; if (!write) tmp = h->max_huge_pages; table->data = &tmp; table->maxlen = sizeof(unsigned long); proc_doulongvec_minmax(table, write, file, buffer, length, ppos); if (write) h->max_huge_pages = set_max_huge_pages(h, tmp); return 0;}int hugetlb_treat_movable_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos){ proc_dointvec(table, write, file, buffer, length, ppos); if (hugepages_treat_as_movable) htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; else htlb_alloc_mask = GFP_HIGHUSER; return 0;}int hugetlb_overcommit_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos){ struct hstate *h = &default_hstate; unsigned long tmp; if (!write) tmp = h->nr_overcommit_huge_pages; table->data = &tmp; table->maxlen = sizeof(unsigned long); proc_doulongvec_minmax(table, write, file, buffer, length, ppos); if (write) { spin_lock(&hugetlb_lock); h->nr_overcommit_huge_pages = tmp; spin_unlock(&hugetlb_lock); } return 0;}#endif /* CONFIG_SYSCTL */void hugetlb_report_meminfo(struct seq_file *m){ struct hstate *h = &default_hstate; seq_printf(m, "HugePages_Total: %5lu\n" "HugePages_Free: %5lu\n" "HugePages_Rsvd: %5lu\n" "HugePages_Surp: %5lu\n" "Hugepagesize: %8lu kB\n", h->nr_huge_pages, h->free_huge_pages, h->resv_huge_pages, h->surplus_huge_pages, 1UL << (huge_page_order(h) + PAGE_SHIFT - 10));}int hugetlb_report_node_meminfo(int nid, char *buf){ struct hstate *h = &default_hstate; return sprintf(buf, "Node %d HugePages_Total: %5u\n" "Node %d HugePages_Free: %5u\n" "Node %d HugePages_Surp: %5u\n", nid, h->nr_huge_pages_node[nid], nid, h->free_huge_pages_node[nid], nid, h->surplus_huge_pages_node[nid]);}/* Return the number pages of memory we physically have, in PAGE_SIZE units. */unsigned long hugetlb_total_pages(void){ struct hstate *h = &default_hstate; return h->nr_huge_pages * pages_per_huge_page(h);}static int hugetlb_acct_memory(struct hstate *h, long delta){ int ret = -ENOMEM; spin_lock(&hugetlb_lock); /* * When cpuset is configured, it breaks the strict hugetlb page * reservation as the accounting is done on a global variable. Such * reservation is completely rubbish in the presence of cpuset because * the reservation is not checked against page availability for the * current cpuset. Application can still potentially OOM'ed by kernel * with lack of free htlb page in cpuset that the task is in. * Attempt to enforce strict accounting with cpuset is almost * impossible (or too ugly) because cpuset is too fluid that * task or memory node can be dynamically moved between cpusets. * * The change of semantics for shared hugetlb mapping with cpuset is * undesirable. However, in order to preserve some of the semantics, * we fall back to check against current free page availability as * a best attempt and hopefully to minimize the impact of changing * semantics that cpuset has. */ if (delta > 0) { if (gather_surplus_pages(h, delta) < 0) goto out; if (delta > cpuset_mems_nr(h->free_huge_pages_node)) { return_unused_surplus_pages(h, delta); goto out; } } ret = 0; if (delta < 0) return_unused_surplus_pages(h, (unsigned long) -delta);out: spin_unlock(&hugetlb_lock); return ret;}static void hugetlb_vm_op_open(struct vm_area_struct *vma){ struct resv_map *reservations = vma_resv_map(vma); /* * This new VMA should share its siblings reservation map if present. * The VMA will only ever have a valid reservation map pointer where * it is being copied for another still existing VMA. As that VMA * has a reference to the reservation map it cannot dissappear until * after this open call completes. It is therefore safe to take a * new reference here without additional locking. */ if (reservations) kref_get(&reservations->refs);}static void hugetlb_vm_op_close(struct vm_area_struct *vma){ struct hstate *h = hstate_vma(vma); struct resv_map *reservations = vma_resv_map(vma); unsigned long reserve; unsigned long start; unsigned long end; if (reservations) { start = vma_hugecache_offset(h, vma, vma->vm_start); end = vma_hugecache_offset(h, vma, vma->vm_end); reserve = (end - start) - region_count(&reservations->regions, start, end); kref_put(&reservations->refs, resv_map_release); if (reserve) { hugetlb_acct_memory(h, -reserve); hugetlb_put_quota(vma->vm_file->f_mapping, reserve); } }}/* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get * this far. */static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf){ BUG(); return 0;}struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, .close = hugetlb_vm_op_close,};static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, int writable){ pte_t entry; if (writable) { entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); } else { entry = huge_pte_wrprotect(mk_pte(page, vma->vm_page_prot)); } entry = pte_mkyoung(entry); entry = pte_mkhuge(entry); return entry;}static void set_huge_ptep_writable(struct vm_area_struct *vma, unsigned long address, pte_t *ptep){ pte_t entry; entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) { update_mmu_cache(vma, address, entry); }}int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma){ pte_t *src_pte, *dst_pte, entry; struct page *ptepage; unsigned long addr; int cow; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { src_pte = huge_pte_offset(src, addr); if (!src_pte) continue; dst_pte = huge_pte_alloc(dst, addr, sz); if (!dst_pte) goto nomem; /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) continue; spin_lock(&dst->page_table_lock); spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); if (!huge_pte_none(huge_ptep_get(src_pte))) { if (cow) huge_ptep_set_wrprotect(src, addr, src_pte); entry = huge_ptep_get(src_pte); ptepage = pte_page(entry); get_page(ptepage); set_huge_pte_at(dst, addr, dst_pte, entry); } spin_unlock(&src->page_table_lock); spin_unlock(&dst->page_table_lock); } return 0;nomem: return -ENOMEM;}void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page){ struct mm_struct *mm = vma->vm_mm; unsigned long address; pte_t *ptep; pte_t pte; struct page *page; struct page *tmp; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); /* * A page gathering list, protected by per file i_mmap_lock. The * lock is used to avoid list corruption from multiple unmapping * of the same page since we are using page->lru. */ LIST_HEAD(page_list); WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); BUG_ON(end & ~huge_page_mask(h)); mmu_notifier_invalidate_range_start(mm, start, end); spin_lock(&mm->page_table_lock); for (address = start; address < end; address += sz) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; if (huge_pmd_unshare(mm, &address, ptep)) continue; /* * If a reference page is supplied, it is because a specific * page is being unmapped, not a range. Ensure the page we * are about to unmap is the actual page of interest. */ if (ref_page) { pte = huge_ptep_get(ptep); if (huge_pte_none(pte)) continue; page = pte_page(pte); if (page != ref_page) continue; /* * Mark the VMA as having unmapped its page so that * future faults in this VMA will fail rather than * looking like data was lost */ set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED); } pte = huge_ptep_get_and_clear(mm, address, ptep); if (huge_pte_none(pte)) continue; page = pte_page(pte); if (pte_dirty(pte)) set_page_dirty(page); list_add(&page->lru, &page_list); } spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); mmu_notifier_invalidate_range_end(mm, start, end); list_for_each_entry_safe(page, tmp, &page_list, lru) { list_del(&page->lru); put_page(page); }}void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page){ spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); __unmap_hugepage_range(vma, start, end, ref_page); spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);}/* * This is called when the original mapper is failing to COW a MAP_PRIVATE * mappping it owns the reserve page for. The intention is to unmap the page * from other VMAs and let the children be SIGKILLed if they are faulting the * same region. */static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, unsigned long address){ struct hstate *h = hstate_vma(vma); struct vm_area_struct *iter_vma; struct address_space *mapping; struct prio_tree_iter iter; pgoff_t pgoff; /* * vm_pgoff is in PAGE_SIZE units, hence the different calculation * from page cache lookup which is in HPAGE_SIZE units. */ address = address & huge_page_mask(h); pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + (vma->vm_pgoff >> PAGE_SHIFT); mapping = (struct address_space *)page_private(page); vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) { /* Do not unmap the current VMA */ if (iter_vma == vma) continue; /* * Unmap the page from other VMAs without their own reserves. * They get marked to be SIGKILLed if they fault in these * areas. This is because a future no-page fault on this VMA * could insert a zeroed page instead of the data existing * from the time of fork. This would look like data corruption */ if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) unmap_hugepage_range(iter_vma, address, address + huge_page_size(h), page); } return 1;}static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t pte, struct page *pagecache_page){ struct hstate *h = hstate_vma(vma); struct page *old_page, *new_page; int avoidcopy; int outside_reserve = 0; old_page = pte_page(pte);retry_avoidcopy: /* If no-one else is actually using this page, avoid the copy * and just make the page writable */
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -