mmu_audit.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. /*
  2. * mmu_audit.c:
  3. *
  4. * Audit code for KVM MMU
  5. *
  6. * Copyright (C) 2006 Qumranet, Inc.
  7. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  8. *
  9. * Authors:
  10. * Yaniv Kamay <yaniv@qumranet.com>
  11. * Avi Kivity <avi@qumranet.com>
  12. * Marcelo Tosatti <mtosatti@redhat.com>
  13. * Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2. See
  16. * the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include <linux/ratelimit.h>
  20. char const *audit_point_name[] = {
  21. "pre page fault",
  22. "post page fault",
  23. "pre pte write",
  24. "post pte write",
  25. "pre sync",
  26. "post sync"
  27. };
  28. #define audit_printk(kvm, fmt, args...) \
  29. printk(KERN_ERR "audit: (%s) error: " \
  30. fmt, audit_point_name[kvm->arch.audit_point], ##args)
  31. typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level);
  32. static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
  33. inspect_spte_fn fn, int level)
  34. {
  35. int i;
  36. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  37. u64 *ent = sp->spt;
  38. fn(vcpu, ent + i, level);
  39. if (is_shadow_present_pte(ent[i]) &&
  40. !is_last_spte(ent[i], level)) {
  41. struct kvm_mmu_page *child;
  42. child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
  43. __mmu_spte_walk(vcpu, child, fn, level - 1);
  44. }
  45. }
  46. }
  47. static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
  48. {
  49. int i;
  50. struct kvm_mmu_page *sp;
  51. if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
  52. return;
  53. if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
  54. hpa_t root = vcpu->arch.mmu.root_hpa;
  55. sp = page_header(root);
  56. __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
  57. return;
  58. }
  59. for (i = 0; i < 4; ++i) {
  60. hpa_t root = vcpu->arch.mmu.pae_root[i];
  61. if (root && VALID_PAGE(root)) {
  62. root &= PT64_BASE_ADDR_MASK;
  63. sp = page_header(root);
  64. __mmu_spte_walk(vcpu, sp, fn, 2);
  65. }
  66. }
  67. return;
  68. }
  69. typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp);
  70. static void walk_all_active_sps(struct kvm *kvm, sp_handler fn)
  71. {
  72. struct kvm_mmu_page *sp;
  73. list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link)
  74. fn(kvm, sp);
  75. }
  76. static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  77. {
  78. struct kvm_mmu_page *sp;
  79. gfn_t gfn;
  80. pfn_t pfn;
  81. hpa_t hpa;
  82. sp = page_header(__pa(sptep));
  83. if (sp->unsync) {
  84. if (level != PT_PAGE_TABLE_LEVEL) {
  85. audit_printk(vcpu->kvm, "unsync sp: %p "
  86. "level = %d\n", sp, level);
  87. return;
  88. }
  89. }
  90. if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level))
  91. return;
  92. gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
  93. pfn = kvm_vcpu_gfn_to_pfn_atomic(vcpu, gfn);
  94. if (is_error_pfn(pfn))
  95. return;
  96. hpa = pfn << PAGE_SHIFT;
  97. if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
  98. audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
  99. "ent %llxn", vcpu->arch.mmu.root_level, pfn,
  100. hpa, *sptep);
  101. }
  102. static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
  103. {
  104. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  105. unsigned long *rmapp;
  106. struct kvm_mmu_page *rev_sp;
  107. struct kvm_memslots *slots;
  108. struct kvm_memory_slot *slot;
  109. gfn_t gfn;
  110. rev_sp = page_header(__pa(sptep));
  111. gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
  112. slots = kvm_memslots_for_spte_role(kvm, rev_sp->role);
  113. slot = __gfn_to_memslot(slots, gfn);
  114. if (!slot) {
  115. if (!__ratelimit(&ratelimit_state))
  116. return;
  117. audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
  118. audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
  119. (long int)(sptep - rev_sp->spt), rev_sp->gfn);
  120. dump_stack();
  121. return;
  122. }
  123. rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
  124. if (!*rmapp) {
  125. if (!__ratelimit(&ratelimit_state))
  126. return;
  127. audit_printk(kvm, "no rmap for writable spte %llx\n",
  128. *sptep);
  129. dump_stack();
  130. }
  131. }
  132. static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  133. {
  134. if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level))
  135. inspect_spte_has_rmap(vcpu->kvm, sptep);
  136. }
  137. static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  138. {
  139. struct kvm_mmu_page *sp = page_header(__pa(sptep));
  140. if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync)
  141. audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync "
  142. "root.\n", sp);
  143. }
  144. static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
  145. {
  146. int i;
  147. if (sp->role.level != PT_PAGE_TABLE_LEVEL)
  148. return;
  149. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  150. if (!is_rmap_spte(sp->spt[i]))
  151. continue;
  152. inspect_spte_has_rmap(kvm, sp->spt + i);
  153. }
  154. }
  155. static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
  156. {
  157. unsigned long *rmapp;
  158. u64 *sptep;
  159. struct rmap_iterator iter;
  160. struct kvm_memslots *slots;
  161. struct kvm_memory_slot *slot;
  162. if (sp->role.direct || sp->unsync || sp->role.invalid)
  163. return;
  164. slots = kvm_memslots_for_spte_role(kvm, sp->role);
  165. slot = __gfn_to_memslot(slots, sp->gfn);
  166. rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
  167. for_each_rmap_spte(rmapp, &iter, sptep)
  168. if (is_writable_pte(*sptep))
  169. audit_printk(kvm, "shadow page has writable "
  170. "mappings: gfn %llx role %x\n",
  171. sp->gfn, sp->role.word);
  172. }
  173. static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
  174. {
  175. check_mappings_rmap(kvm, sp);
  176. audit_write_protection(kvm, sp);
  177. }
  178. static void audit_all_active_sps(struct kvm *kvm)
  179. {
  180. walk_all_active_sps(kvm, audit_sp);
  181. }
  182. static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  183. {
  184. audit_sptes_have_rmaps(vcpu, sptep, level);
  185. audit_mappings(vcpu, sptep, level);
  186. audit_spte_after_sync(vcpu, sptep, level);
  187. }
  188. static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
  189. {
  190. mmu_spte_walk(vcpu, audit_spte);
  191. }
  192. static bool mmu_audit;
  193. static struct static_key mmu_audit_key;
  194. static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  195. {
  196. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  197. if (!__ratelimit(&ratelimit_state))
  198. return;
  199. vcpu->kvm->arch.audit_point = point;
  200. audit_all_active_sps(vcpu->kvm);
  201. audit_vcpu_spte(vcpu);
  202. }
  203. static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  204. {
  205. if (static_key_false((&mmu_audit_key)))
  206. __kvm_mmu_audit(vcpu, point);
  207. }
  208. static void mmu_audit_enable(void)
  209. {
  210. if (mmu_audit)
  211. return;
  212. static_key_slow_inc(&mmu_audit_key);
  213. mmu_audit = true;
  214. }
  215. static void mmu_audit_disable(void)
  216. {
  217. if (!mmu_audit)
  218. return;
  219. static_key_slow_dec(&mmu_audit_key);
  220. mmu_audit = false;
  221. }
  222. static int mmu_audit_set(const char *val, const struct kernel_param *kp)
  223. {
  224. int ret;
  225. unsigned long enable;
  226. ret = kstrtoul(val, 10, &enable);
  227. if (ret < 0)
  228. return -EINVAL;
  229. switch (enable) {
  230. case 0:
  231. mmu_audit_disable();
  232. break;
  233. case 1:
  234. mmu_audit_enable();
  235. break;
  236. default:
  237. return -EINVAL;
  238. }
  239. return 0;
  240. }
  241. static const struct kernel_param_ops audit_param_ops = {
  242. .set = mmu_audit_set,
  243. .get = param_get_bool,
  244. };
  245. arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);