vma.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. /*
  2. * Copyright 2007 Andi Kleen, SUSE Labs.
  3. * Subject to the GPL, v.2
  4. *
  5. * This contains most of the x86 vDSO kernel-side code.
  6. */
  7. #include <linux/mm.h>
  8. #include <linux/err.h>
  9. #include <linux/sched.h>
  10. #include <linux/slab.h>
  11. #include <linux/init.h>
  12. #include <linux/random.h>
  13. #include <linux/elf.h>
  14. #include <linux/cpu.h>
  15. #include <asm/pvclock.h>
  16. #include <asm/vgtod.h>
  17. #include <asm/proto.h>
  18. #include <asm/vdso.h>
  19. #include <asm/vvar.h>
  20. #include <asm/page.h>
  21. #include <asm/hpet.h>
  22. #include <asm/desc.h>
  23. #include <asm/cpufeature.h>
  24. #if defined(CONFIG_X86_64)
  25. unsigned int __read_mostly vdso64_enabled = 1;
  26. #endif
  27. void __init init_vdso_image(const struct vdso_image *image)
  28. {
  29. int i;
  30. int npages = (image->size) / PAGE_SIZE;
  31. BUG_ON(image->size % PAGE_SIZE != 0);
  32. for (i = 0; i < npages; i++)
  33. image->text_mapping.pages[i] =
  34. virt_to_page(image->data + i*PAGE_SIZE);
  35. apply_alternatives((struct alt_instr *)(image->data + image->alt),
  36. (struct alt_instr *)(image->data + image->alt +
  37. image->alt_len));
  38. }
  39. struct linux_binprm;
  40. /*
  41. * Put the vdso above the (randomized) stack with another randomized
  42. * offset. This way there is no hole in the middle of address space.
  43. * To save memory make sure it is still in the same PTE as the stack
  44. * top. This doesn't give that many random bits.
  45. *
  46. * Note that this algorithm is imperfect: the distribution of the vdso
  47. * start address within a PMD is biased toward the end.
  48. *
  49. * Only used for the 64-bit and x32 vdsos.
  50. */
  51. static unsigned long vdso_addr(unsigned long start, unsigned len)
  52. {
  53. #ifdef CONFIG_X86_32
  54. return 0;
  55. #else
  56. unsigned long addr, end;
  57. unsigned offset;
  58. /*
  59. * Round up the start address. It can start out unaligned as a result
  60. * of stack start randomization.
  61. */
  62. start = PAGE_ALIGN(start);
  63. /* Round the lowest possible end address up to a PMD boundary. */
  64. end = (start + len + PMD_SIZE - 1) & PMD_MASK;
  65. if (end >= TASK_SIZE_MAX)
  66. end = TASK_SIZE_MAX;
  67. end -= len;
  68. if (end > start) {
  69. offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
  70. addr = start + (offset << PAGE_SHIFT);
  71. } else {
  72. addr = start;
  73. }
  74. /*
  75. * Forcibly align the final address in case we have a hardware
  76. * issue that requires alignment for performance reasons.
  77. */
  78. addr = align_vdso_addr(addr);
  79. return addr;
  80. #endif
  81. }
  82. static int map_vdso(const struct vdso_image *image, bool calculate_addr)
  83. {
  84. struct mm_struct *mm = current->mm;
  85. struct vm_area_struct *vma;
  86. unsigned long addr, text_start;
  87. int ret = 0;
  88. static struct page *no_pages[] = {NULL};
  89. static struct vm_special_mapping vvar_mapping = {
  90. .name = "[vvar]",
  91. .pages = no_pages,
  92. };
  93. struct pvclock_vsyscall_time_info *pvti;
  94. if (calculate_addr) {
  95. addr = vdso_addr(current->mm->start_stack,
  96. image->size - image->sym_vvar_start);
  97. } else {
  98. addr = 0;
  99. }
  100. down_write(&mm->mmap_sem);
  101. addr = get_unmapped_area(NULL, addr,
  102. image->size - image->sym_vvar_start, 0, 0);
  103. if (IS_ERR_VALUE(addr)) {
  104. ret = addr;
  105. goto up_fail;
  106. }
  107. text_start = addr - image->sym_vvar_start;
  108. current->mm->context.vdso = (void __user *)text_start;
  109. /*
  110. * MAYWRITE to allow gdb to COW and set breakpoints
  111. */
  112. vma = _install_special_mapping(mm,
  113. text_start,
  114. image->size,
  115. VM_READ|VM_EXEC|
  116. VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
  117. &image->text_mapping);
  118. if (IS_ERR(vma)) {
  119. ret = PTR_ERR(vma);
  120. goto up_fail;
  121. }
  122. vma = _install_special_mapping(mm,
  123. addr,
  124. -image->sym_vvar_start,
  125. VM_READ|VM_MAYREAD,
  126. &vvar_mapping);
  127. if (IS_ERR(vma)) {
  128. ret = PTR_ERR(vma);
  129. goto up_fail;
  130. }
  131. if (image->sym_vvar_page)
  132. ret = remap_pfn_range(vma,
  133. text_start + image->sym_vvar_page,
  134. __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
  135. PAGE_SIZE,
  136. PAGE_READONLY);
  137. if (ret)
  138. goto up_fail;
  139. #ifdef CONFIG_HPET_TIMER
  140. if (hpet_address && image->sym_hpet_page) {
  141. ret = io_remap_pfn_range(vma,
  142. text_start + image->sym_hpet_page,
  143. hpet_address >> PAGE_SHIFT,
  144. PAGE_SIZE,
  145. pgprot_noncached(PAGE_READONLY));
  146. if (ret)
  147. goto up_fail;
  148. }
  149. #endif
  150. pvti = pvclock_pvti_cpu0_va();
  151. if (pvti && image->sym_pvclock_page) {
  152. ret = remap_pfn_range(vma,
  153. text_start + image->sym_pvclock_page,
  154. __pa(pvti) >> PAGE_SHIFT,
  155. PAGE_SIZE,
  156. PAGE_READONLY);
  157. if (ret)
  158. goto up_fail;
  159. }
  160. up_fail:
  161. if (ret)
  162. current->mm->context.vdso = NULL;
  163. up_write(&mm->mmap_sem);
  164. return ret;
  165. }
  166. #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
  167. static int load_vdso32(void)
  168. {
  169. if (vdso32_enabled != 1) /* Other values all mean "disabled" */
  170. return 0;
  171. return map_vdso(&vdso_image_32, false);
  172. }
  173. #endif
  174. #ifdef CONFIG_X86_64
  175. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  176. {
  177. if (!vdso64_enabled)
  178. return 0;
  179. return map_vdso(&vdso_image_64, true);
  180. }
  181. #ifdef CONFIG_COMPAT
  182. int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
  183. int uses_interp)
  184. {
  185. #ifdef CONFIG_X86_X32_ABI
  186. if (test_thread_flag(TIF_X32)) {
  187. if (!vdso64_enabled)
  188. return 0;
  189. return map_vdso(&vdso_image_x32, true);
  190. }
  191. #endif
  192. #ifdef CONFIG_IA32_EMULATION
  193. return load_vdso32();
  194. #else
  195. return 0;
  196. #endif
  197. }
  198. #endif
  199. #else
  200. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  201. {
  202. return load_vdso32();
  203. }
  204. #endif
  205. #ifdef CONFIG_X86_64
  206. static __init int vdso_setup(char *s)
  207. {
  208. vdso64_enabled = simple_strtoul(s, NULL, 0);
  209. return 0;
  210. }
  211. __setup("vdso=", vdso_setup);
  212. #endif
  213. #ifdef CONFIG_X86_64
  214. static void vgetcpu_cpu_init(void *arg)
  215. {
  216. int cpu = smp_processor_id();
  217. struct desc_struct d = { };
  218. unsigned long node = 0;
  219. #ifdef CONFIG_NUMA
  220. node = cpu_to_node(cpu);
  221. #endif
  222. if (static_cpu_has(X86_FEATURE_RDTSCP))
  223. write_rdtscp_aux((node << 12) | cpu);
  224. /*
  225. * Store cpu number in limit so that it can be loaded
  226. * quickly in user space in vgetcpu. (12 bits for the CPU
  227. * and 8 bits for the node)
  228. */
  229. d.limit0 = cpu | ((node & 0xf) << 12);
  230. d.limit = node >> 4;
  231. d.type = 5; /* RO data, expand down, accessed */
  232. d.dpl = 3; /* Visible to user code */
  233. d.s = 1; /* Not a system segment */
  234. d.p = 1; /* Present */
  235. d.d = 1; /* 32-bit */
  236. write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
  237. }
  238. static int
  239. vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
  240. {
  241. long cpu = (long)arg;
  242. if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
  243. smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
  244. return NOTIFY_DONE;
  245. }
  246. static int __init init_vdso(void)
  247. {
  248. init_vdso_image(&vdso_image_64);
  249. #ifdef CONFIG_X86_X32_ABI
  250. init_vdso_image(&vdso_image_x32);
  251. #endif
  252. cpu_notifier_register_begin();
  253. on_each_cpu(vgetcpu_cpu_init, NULL, 1);
  254. /* notifier priority > KVM */
  255. __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
  256. cpu_notifier_register_done();
  257. return 0;
  258. }
  259. subsys_initcall(init_vdso);
  260. #endif /* CONFIG_X86_64 */