mcelog.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /******************************************************************************
  2. * mcelog.c
  3. * Driver for receiving and transferring machine check error infomation
  4. *
  5. * Copyright (c) 2012 Intel Corporation
  6. * Author: Liu, Jinsong <jinsong.liu@intel.com>
  7. * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
  8. * Author: Ke, Liping <liping.ke@intel.com>
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public License version 2
  12. * as published by the Free Software Foundation; or, when distributed
  13. * separately from the Linux kernel or incorporated into other
  14. * software packages, subject to the following license:
  15. *
  16. * Permission is hereby granted, free of charge, to any person obtaining a copy
  17. * of this source file (the "Software"), to deal in the Software without
  18. * restriction, including without limitation the rights to use, copy, modify,
  19. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  20. * and to permit persons to whom the Software is furnished to do so, subject to
  21. * the following conditions:
  22. *
  23. * The above copyright notice and this permission notice shall be included in
  24. * all copies or substantial portions of the Software.
  25. *
  26. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  27. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  28. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  29. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  30. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  31. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  32. * IN THE SOFTWARE.
  33. */
  34. #define pr_fmt(fmt) "xen_mcelog: " fmt
  35. #include <linux/init.h>
  36. #include <linux/types.h>
  37. #include <linux/kernel.h>
  38. #include <linux/slab.h>
  39. #include <linux/fs.h>
  40. #include <linux/device.h>
  41. #include <linux/miscdevice.h>
  42. #include <linux/uaccess.h>
  43. #include <linux/capability.h>
  44. #include <linux/poll.h>
  45. #include <linux/sched.h>
  46. #include <xen/interface/xen.h>
  47. #include <xen/events.h>
  48. #include <xen/interface/vcpu.h>
  49. #include <xen/xen.h>
  50. #include <asm/xen/hypercall.h>
  51. #include <asm/xen/hypervisor.h>
  52. static struct mc_info g_mi;
  53. static struct mcinfo_logical_cpu *g_physinfo;
  54. static uint32_t ncpus;
  55. static DEFINE_MUTEX(mcelog_lock);
  56. static struct xen_mce_log xen_mcelog = {
  57. .signature = XEN_MCE_LOG_SIGNATURE,
  58. .len = XEN_MCE_LOG_LEN,
  59. .recordlen = sizeof(struct xen_mce),
  60. };
  61. static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock);
  62. static int xen_mce_chrdev_open_count; /* #times opened */
  63. static int xen_mce_chrdev_open_exclu; /* already open exclusive? */
  64. static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait);
  65. static int xen_mce_chrdev_open(struct inode *inode, struct file *file)
  66. {
  67. spin_lock(&xen_mce_chrdev_state_lock);
  68. if (xen_mce_chrdev_open_exclu ||
  69. (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
  70. spin_unlock(&xen_mce_chrdev_state_lock);
  71. return -EBUSY;
  72. }
  73. if (file->f_flags & O_EXCL)
  74. xen_mce_chrdev_open_exclu = 1;
  75. xen_mce_chrdev_open_count++;
  76. spin_unlock(&xen_mce_chrdev_state_lock);
  77. return nonseekable_open(inode, file);
  78. }
  79. static int xen_mce_chrdev_release(struct inode *inode, struct file *file)
  80. {
  81. spin_lock(&xen_mce_chrdev_state_lock);
  82. xen_mce_chrdev_open_count--;
  83. xen_mce_chrdev_open_exclu = 0;
  84. spin_unlock(&xen_mce_chrdev_state_lock);
  85. return 0;
  86. }
  87. static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf,
  88. size_t usize, loff_t *off)
  89. {
  90. char __user *buf = ubuf;
  91. unsigned num;
  92. int i, err;
  93. mutex_lock(&mcelog_lock);
  94. num = xen_mcelog.next;
  95. /* Only supports full reads right now */
  96. err = -EINVAL;
  97. if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce))
  98. goto out;
  99. err = 0;
  100. for (i = 0; i < num; i++) {
  101. struct xen_mce *m = &xen_mcelog.entry[i];
  102. err |= copy_to_user(buf, m, sizeof(*m));
  103. buf += sizeof(*m);
  104. }
  105. memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce));
  106. xen_mcelog.next = 0;
  107. if (err)
  108. err = -EFAULT;
  109. out:
  110. mutex_unlock(&mcelog_lock);
  111. return err ? err : buf - ubuf;
  112. }
  113. static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait)
  114. {
  115. poll_wait(file, &xen_mce_chrdev_wait, wait);
  116. if (xen_mcelog.next)
  117. return POLLIN | POLLRDNORM;
  118. return 0;
  119. }
  120. static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd,
  121. unsigned long arg)
  122. {
  123. int __user *p = (int __user *)arg;
  124. if (!capable(CAP_SYS_ADMIN))
  125. return -EPERM;
  126. switch (cmd) {
  127. case MCE_GET_RECORD_LEN:
  128. return put_user(sizeof(struct xen_mce), p);
  129. case MCE_GET_LOG_LEN:
  130. return put_user(XEN_MCE_LOG_LEN, p);
  131. case MCE_GETCLEAR_FLAGS: {
  132. unsigned flags;
  133. do {
  134. flags = xen_mcelog.flags;
  135. } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags);
  136. return put_user(flags, p);
  137. }
  138. default:
  139. return -ENOTTY;
  140. }
  141. }
  142. static const struct file_operations xen_mce_chrdev_ops = {
  143. .open = xen_mce_chrdev_open,
  144. .release = xen_mce_chrdev_release,
  145. .read = xen_mce_chrdev_read,
  146. .poll = xen_mce_chrdev_poll,
  147. .unlocked_ioctl = xen_mce_chrdev_ioctl,
  148. .llseek = no_llseek,
  149. };
  150. static struct miscdevice xen_mce_chrdev_device = {
  151. MISC_MCELOG_MINOR,
  152. "mcelog",
  153. &xen_mce_chrdev_ops,
  154. };
  155. /*
  156. * Caller should hold the mcelog_lock
  157. */
  158. static void xen_mce_log(struct xen_mce *mce)
  159. {
  160. unsigned entry;
  161. entry = xen_mcelog.next;
  162. /*
  163. * When the buffer fills up discard new entries.
  164. * Assume that the earlier errors are the more
  165. * interesting ones:
  166. */
  167. if (entry >= XEN_MCE_LOG_LEN) {
  168. set_bit(XEN_MCE_OVERFLOW,
  169. (unsigned long *)&xen_mcelog.flags);
  170. return;
  171. }
  172. memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce));
  173. xen_mcelog.next++;
  174. }
  175. static int convert_log(struct mc_info *mi)
  176. {
  177. struct mcinfo_common *mic;
  178. struct mcinfo_global *mc_global;
  179. struct mcinfo_bank *mc_bank;
  180. struct xen_mce m;
  181. uint32_t i;
  182. mic = NULL;
  183. x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
  184. if (unlikely(!mic)) {
  185. pr_warn("Failed to find global error info\n");
  186. return -ENODEV;
  187. }
  188. memset(&m, 0, sizeof(struct xen_mce));
  189. mc_global = (struct mcinfo_global *)mic;
  190. m.mcgstatus = mc_global->mc_gstatus;
  191. m.apicid = mc_global->mc_apicid;
  192. for (i = 0; i < ncpus; i++)
  193. if (g_physinfo[i].mc_apicid == m.apicid)
  194. break;
  195. if (unlikely(i == ncpus)) {
  196. pr_warn("Failed to match cpu with apicid %d\n", m.apicid);
  197. return -ENODEV;
  198. }
  199. m.socketid = g_physinfo[i].mc_chipid;
  200. m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
  201. m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
  202. m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value;
  203. mic = NULL;
  204. x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
  205. if (unlikely(!mic)) {
  206. pr_warn("Fail to find bank error info\n");
  207. return -ENODEV;
  208. }
  209. do {
  210. if ((!mic) || (mic->size == 0) ||
  211. (mic->type != MC_TYPE_GLOBAL &&
  212. mic->type != MC_TYPE_BANK &&
  213. mic->type != MC_TYPE_EXTENDED &&
  214. mic->type != MC_TYPE_RECOVERY))
  215. break;
  216. if (mic->type == MC_TYPE_BANK) {
  217. mc_bank = (struct mcinfo_bank *)mic;
  218. m.misc = mc_bank->mc_misc;
  219. m.status = mc_bank->mc_status;
  220. m.addr = mc_bank->mc_addr;
  221. m.tsc = mc_bank->mc_tsc;
  222. m.bank = mc_bank->mc_bank;
  223. m.finished = 1;
  224. /*log this record*/
  225. xen_mce_log(&m);
  226. }
  227. mic = x86_mcinfo_next(mic);
  228. } while (1);
  229. return 0;
  230. }
  231. static int mc_queue_handle(uint32_t flags)
  232. {
  233. struct xen_mc mc_op;
  234. int ret = 0;
  235. mc_op.cmd = XEN_MC_fetch;
  236. mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
  237. set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi);
  238. do {
  239. mc_op.u.mc_fetch.flags = flags;
  240. ret = HYPERVISOR_mca(&mc_op);
  241. if (ret) {
  242. pr_err("Failed to fetch %surgent error log\n",
  243. flags == XEN_MC_URGENT ? "" : "non");
  244. break;
  245. }
  246. if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
  247. mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
  248. break;
  249. else {
  250. ret = convert_log(&g_mi);
  251. if (ret)
  252. pr_warn("Failed to convert this error log, continue acking it anyway\n");
  253. mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK;
  254. ret = HYPERVISOR_mca(&mc_op);
  255. if (ret) {
  256. pr_err("Failed to ack previous error log\n");
  257. break;
  258. }
  259. }
  260. } while (1);
  261. return ret;
  262. }
  263. /* virq handler for machine check error info*/
  264. static void xen_mce_work_fn(struct work_struct *work)
  265. {
  266. int err;
  267. mutex_lock(&mcelog_lock);
  268. /* urgent mc_info */
  269. err = mc_queue_handle(XEN_MC_URGENT);
  270. if (err)
  271. pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n");
  272. /* nonurgent mc_info */
  273. err = mc_queue_handle(XEN_MC_NONURGENT);
  274. if (err)
  275. pr_err("Failed to handle nonurgent mc_info queue\n");
  276. /* wake processes polling /dev/mcelog */
  277. wake_up_interruptible(&xen_mce_chrdev_wait);
  278. mutex_unlock(&mcelog_lock);
  279. }
  280. static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
  281. static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
  282. {
  283. schedule_work(&xen_mce_work);
  284. return IRQ_HANDLED;
  285. }
  286. static int bind_virq_for_mce(void)
  287. {
  288. int ret;
  289. struct xen_mc mc_op;
  290. memset(&mc_op, 0, sizeof(struct xen_mc));
  291. /* Fetch physical CPU Numbers */
  292. mc_op.cmd = XEN_MC_physcpuinfo;
  293. mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
  294. set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
  295. ret = HYPERVISOR_mca(&mc_op);
  296. if (ret) {
  297. pr_err("Failed to get CPU numbers\n");
  298. return ret;
  299. }
  300. /* Fetch each CPU Physical Info for later reference*/
  301. ncpus = mc_op.u.mc_physcpuinfo.ncpus;
  302. g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu),
  303. GFP_KERNEL);
  304. if (!g_physinfo)
  305. return -ENOMEM;
  306. set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
  307. ret = HYPERVISOR_mca(&mc_op);
  308. if (ret) {
  309. pr_err("Failed to get CPU info\n");
  310. kfree(g_physinfo);
  311. return ret;
  312. }
  313. ret = bind_virq_to_irqhandler(VIRQ_MCA, 0,
  314. xen_mce_interrupt, 0, "mce", NULL);
  315. if (ret < 0) {
  316. pr_err("Failed to bind virq\n");
  317. kfree(g_physinfo);
  318. return ret;
  319. }
  320. return 0;
  321. }
  322. static int __init xen_late_init_mcelog(void)
  323. {
  324. int ret;
  325. /* Only DOM0 is responsible for MCE logging */
  326. if (!xen_initial_domain())
  327. return -ENODEV;
  328. /* register character device /dev/mcelog for xen mcelog */
  329. ret = misc_register(&xen_mce_chrdev_device);
  330. if (ret)
  331. return ret;
  332. ret = bind_virq_for_mce();
  333. if (ret)
  334. goto deregister;
  335. return 0;
  336. deregister:
  337. misc_deregister(&xen_mce_chrdev_device);
  338. return ret;
  339. }
  340. device_initcall(xen_late_init_mcelog);