?? mmap.c
字號(hào):
/* * mm/mmap.c * * Written by obz. * * Address space accounting code <alan@lxorguk.ukuu.org.uk> */#include <linux/slab.h>#include <linux/backing-dev.h>#include <linux/mm.h>#include <linux/shm.h>#include <linux/mman.h>#include <linux/pagemap.h>#include <linux/swap.h>#include <linux/syscalls.h>#include <linux/capability.h>#include <linux/init.h>#include <linux/file.h>#include <linux/fs.h>#include <linux/personality.h>#include <linux/security.h>#include <linux/hugetlb.h>#include <linux/profile.h>#include <linux/module.h>#include <linux/mount.h>#include <linux/mempolicy.h>#include <linux/rmap.h>#include <linux/mmu_notifier.h>#include <asm/uaccess.h>#include <asm/cacheflush.h>#include <asm/tlb.h>#include <asm/mmu_context.h>#include "internal.h"#ifndef arch_mmap_check#define arch_mmap_check(addr, len, flags) (0)#endif#ifndef arch_rebalance_pgtables#define arch_rebalance_pgtables(addr, len) (addr)#endifstatic void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, unsigned long start, unsigned long end);/* * WARNING: the debugging will use recursive algorithms so never enable this * unless you know what you are doing. */#undef DEBUG_MM_RB/* description of effects of mapping type and prot in current implementation. * this is due to the limited x86 page protection hardware. The expected * behavior is in parens: * * map_type prot * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (yes) yes w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes * * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes * */pgprot_t protection_map[16] = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111};pgprot_t vm_get_page_prot(unsigned long vm_flags){ return __pgprot(pgprot_val(protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | pgprot_val(arch_vm_get_page_prot(vm_flags)));}EXPORT_SYMBOL(vm_get_page_prot);int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */int sysctl_overcommit_ratio = 50; /* default is 50% */int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);/* * Check that a process has enough memory to allocate a new virtual * mapping. 0 means there is enough memory for the allocation to * succeed and -ENOMEM implies there is not. * * We currently support three overcommit policies, which are set via the * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting * * Strict overcommit modes added 2002 Feb 26 by Alan Cox. * Additional code 2002 Jul 20 by Robert Love. * * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. * * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin){ unsigned long free, allowed; vm_acct_memory(pages); /* * Sometimes we want to use more memory than we have */ if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) return 0; if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { unsigned long n; free = global_page_state(NR_FILE_PAGES); free += nr_swap_pages; /* * Any slabs which are created with the * SLAB_RECLAIM_ACCOUNT flag claim to have contents * which are reclaimable, under pressure. The dentry * cache and most inode caches should fall into this */ free += global_page_state(NR_SLAB_RECLAIMABLE); /* * Leave the last 3% for root */ if (!cap_sys_admin) free -= free / 32; if (free > pages) return 0; /* * nr_free_pages() is very expensive on large systems, * only call if we're about to fail. */ n = nr_free_pages(); /* * Leave reserved pages. The pages are not for anonymous pages. */ if (n <= totalreserve_pages) goto error; else n -= totalreserve_pages; /* * Leave the last 3% for root */ if (!cap_sys_admin) n -= n / 32; free += n; if (free > pages) return 0; goto error; } allowed = (totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100; /* * Leave the last 3% for root */ if (!cap_sys_admin) allowed -= allowed / 32; allowed += total_swap_pages; /* Don't let a single process grow too big: leave 3% of the size of this process for other processes */ if (mm) allowed -= mm->total_vm / 32; /* * cast `allowed' as a signed long because vm_committed_space * sometimes has a negative value */ if (atomic_long_read(&vm_committed_space) < (long)allowed) return 0;error: vm_unacct_memory(pages); return -ENOMEM;}/* * Requires inode->i_mapping->i_mmap_lock */static void __remove_shared_vm_struct(struct vm_area_struct *vma, struct file *file, struct address_space *mapping){ if (vma->vm_flags & VM_DENYWRITE) atomic_inc(&file->f_path.dentry->d_inode->i_writecount); if (vma->vm_flags & VM_SHARED) mapping->i_mmap_writable--; flush_dcache_mmap_lock(mapping); if (unlikely(vma->vm_flags & VM_NONLINEAR)) list_del_init(&vma->shared.vm_set.list); else vma_prio_tree_remove(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping);}/* * Unlink a file-based vm structure from its prio_tree, to hide * vma from rmap and vmtruncate before freeing its page tables. */void unlink_file_vma(struct vm_area_struct *vma){ struct file *file = vma->vm_file; if (file) { struct address_space *mapping = file->f_mapping; spin_lock(&mapping->i_mmap_lock); __remove_shared_vm_struct(vma, file, mapping); spin_unlock(&mapping->i_mmap_lock); }}/* * Close a vm structure and free it, returning the next. */static struct vm_area_struct *remove_vma(struct vm_area_struct *vma){ struct vm_area_struct *next = vma->vm_next; might_sleep(); if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); if (vma->vm_file) { fput(vma->vm_file); if (vma->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(vma->vm_mm); } mpol_put(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); return next;}SYSCALL_DEFINE1(brk, unsigned long, brk){ unsigned long rlim, retval; unsigned long newbrk, oldbrk; struct mm_struct *mm = current->mm; unsigned long min_brk; down_write(&mm->mmap_sem);#ifdef CONFIG_COMPAT_BRK min_brk = mm->end_code;#else min_brk = mm->start_brk;#endif if (brk < min_brk) goto out; /* * Check against rlimit here. If this check is done later after the test * of oldbrk with newbrk then it can escape the test and let the data * segment grow beyond its set limit the in case where the limit is * not page aligned -Ram Gupta */ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; if (rlim < RLIM_INFINITY && (brk - mm->start_brk) + (mm->end_data - mm->start_data) > rlim) goto out; newbrk = PAGE_ALIGN(brk); oldbrk = PAGE_ALIGN(mm->brk); if (oldbrk == newbrk) goto set_brk; /* Always allow shrinking brk. */ if (brk <= mm->brk) { if (!do_munmap(mm, newbrk, oldbrk-newbrk)) goto set_brk; goto out; } /* Check against existing mmap mappings. */ if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) goto out; /* Ok, looks good - let it rip. */ if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) goto out;set_brk: mm->brk = brk;out: retval = mm->brk; up_write(&mm->mmap_sem); return retval;}#ifdef DEBUG_MM_RBstatic int browse_rb(struct rb_root *root){ int i = 0, j; struct rb_node *nd, *pn = NULL; unsigned long prev = 0, pend = 0; for (nd = rb_first(root); nd; nd = rb_next(nd)) { struct vm_area_struct *vma; vma = rb_entry(nd, struct vm_area_struct, vm_rb); if (vma->vm_start < prev) printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1; if (vma->vm_start < pend) printk("vm_start %lx pend %lx\n", vma->vm_start, pend); if (vma->vm_start > vma->vm_end) printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start); i++; pn = nd; prev = vma->vm_start; pend = vma->vm_end; } j = 0; for (nd = pn; nd; nd = rb_prev(nd)) { j++; } if (i != j) printk("backwards %d, forwards %d\n", j, i), i = 0; return i;}void validate_mm(struct mm_struct *mm){ int bug = 0; int i = 0; struct vm_area_struct *tmp = mm->mmap; while (tmp) { tmp = tmp->vm_next; i++; } if (i != mm->map_count) printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1; i = browse_rb(&mm->mm_rb); if (i != mm->map_count) printk("map_count %d rb %d\n", mm->map_count, i), bug = 1; BUG_ON(bug);}#else#define validate_mm(mm) do { } while (0)#endifstatic struct vm_area_struct *find_vma_prepare(struct mm_struct *mm, unsigned long addr, struct vm_area_struct **pprev, struct rb_node ***rb_link, struct rb_node ** rb_parent){ struct vm_area_struct * vma; struct rb_node ** __rb_link, * __rb_parent, * rb_prev; __rb_link = &mm->mm_rb.rb_node; rb_prev = __rb_parent = NULL; vma = NULL; while (*__rb_link) { struct vm_area_struct *vma_tmp; __rb_parent = *__rb_link; vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb); if (vma_tmp->vm_end > addr) { vma = vma_tmp; if (vma_tmp->vm_start <= addr) break; __rb_link = &__rb_parent->rb_left; } else { rb_prev = __rb_parent; __rb_link = &__rb_parent->rb_right; } } *pprev = NULL; if (rb_prev) *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb); *rb_link = __rb_link; *rb_parent = __rb_parent; return vma;}static inline void__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent){ if (prev) { vma->vm_next = prev->vm_next; prev->vm_next = vma; } else { mm->mmap = vma; if (rb_parent) vma->vm_next = rb_entry(rb_parent, struct vm_area_struct, vm_rb); else vma->vm_next = NULL; }}void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent){ rb_link_node(&vma->vm_rb, rb_parent, rb_link); rb_insert_color(&vma->vm_rb, &mm->mm_rb);}static void __vma_link_file(struct vm_area_struct *vma){ struct file *file; file = vma->vm_file; if (file) { struct address_space *mapping = file->f_mapping; if (vma->vm_flags & VM_DENYWRITE) atomic_dec(&file->f_path.dentry->d_inode->i_writecount); if (vma->vm_flags & VM_SHARED) mapping->i_mmap_writable++; flush_dcache_mmap_lock(mapping); if (unlikely(vma->vm_flags & VM_NONLINEAR)) vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); else vma_prio_tree_insert(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); }}static void__vma_link(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node **rb_link, struct rb_node *rb_parent){ __vma_link_list(mm, vma, prev, rb_parent); __vma_link_rb(mm, vma, rb_link, rb_parent); __anon_vma_link(vma);}static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node **rb_link, struct rb_node *rb_parent){ struct address_space *mapping = NULL; if (vma->vm_file) mapping = vma->vm_file->f_mapping; if (mapping) { spin_lock(&mapping->i_mmap_lock); vma->vm_truncate_count = mapping->truncate_count; } anon_vma_lock(vma); __vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link_file(vma); anon_vma_unlock(vma); if (mapping) spin_unlock(&mapping->i_mmap_lock); mm->map_count++; validate_mm(mm);}/* * Helper for vma_adjust in the split_vma insert case: * insert vm structure into list and rbtree and anon_vma, * but it has already been inserted into prio_tree earlier. */static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma){ struct vm_area_struct *__vma, *prev; struct rb_node **rb_link, *rb_parent; __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent); BUG_ON(__vma && __vma->vm_start < vma->vm_end); __vma_link(mm, vma, prev, rb_link, rb_parent); mm->map_count++;}static inline void__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev){ prev->vm_next = vma->vm_next; rb_erase(&vma->vm_rb, &mm->mm_rb); if (mm->mmap_cache == vma)
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -