xen-mca.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. /******************************************************************************
  2. * arch-x86/mca.h
  3. * Guest OS machine check interface to x86 Xen.
  4. *
  5. * Contributed by Advanced Micro Devices, Inc.
  6. * Author: Christoph Egger <Christoph.Egger@amd.com>
  7. *
  8. * Updated by Intel Corporation
  9. * Author: Liu, Jinsong <jinsong.liu@intel.com>
  10. *
  11. * Permission is hereby granted, free of charge, to any person obtaining a copy
  12. * of this software and associated documentation files (the "Software"), to
  13. * deal in the Software without restriction, including without limitation the
  14. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  15. * sell copies of the Software, and to permit persons to whom the Software is
  16. * furnished to do so, subject to the following conditions:
  17. *
  18. * The above copyright notice and this permission notice shall be included in
  19. * all copies or substantial portions of the Software.
  20. *
  21. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  26. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  27. * DEALINGS IN THE SOFTWARE.
  28. */
  29. #ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
  30. #define __XEN_PUBLIC_ARCH_X86_MCA_H__
  31. /* Hypercall */
  32. #define __HYPERVISOR_mca __HYPERVISOR_arch_0
  33. #define XEN_MCA_INTERFACE_VERSION 0x01ecc003
  34. /* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
  35. #define XEN_MC_NONURGENT 0x1
  36. /* IN: Dom0 calls hypercall to retrieve urgent error log entry */
  37. #define XEN_MC_URGENT 0x2
  38. /* IN: Dom0 acknowledges previosly-fetched error log entry */
  39. #define XEN_MC_ACK 0x4
  40. /* OUT: All is ok */
  41. #define XEN_MC_OK 0x0
  42. /* OUT: Domain could not fetch data. */
  43. #define XEN_MC_FETCHFAILED 0x1
  44. /* OUT: There was no machine check data to fetch. */
  45. #define XEN_MC_NODATA 0x2
  46. #ifndef __ASSEMBLY__
  47. /* vIRQ injected to Dom0 */
  48. #define VIRQ_MCA VIRQ_ARCH_0
  49. /*
  50. * mc_info entry types
  51. * mca machine check info are recorded in mc_info entries.
  52. * when fetch mca info, it can use MC_TYPE_... to distinguish
  53. * different mca info.
  54. */
  55. #define MC_TYPE_GLOBAL 0
  56. #define MC_TYPE_BANK 1
  57. #define MC_TYPE_EXTENDED 2
  58. #define MC_TYPE_RECOVERY 3
  59. struct mcinfo_common {
  60. uint16_t type; /* structure type */
  61. uint16_t size; /* size of this struct in bytes */
  62. };
  63. #define MC_FLAG_CORRECTABLE (1 << 0)
  64. #define MC_FLAG_UNCORRECTABLE (1 << 1)
  65. #define MC_FLAG_RECOVERABLE (1 << 2)
  66. #define MC_FLAG_POLLED (1 << 3)
  67. #define MC_FLAG_RESET (1 << 4)
  68. #define MC_FLAG_CMCI (1 << 5)
  69. #define MC_FLAG_MCE (1 << 6)
  70. /* contains x86 global mc information */
  71. struct mcinfo_global {
  72. struct mcinfo_common common;
  73. uint16_t mc_domid; /* running domain at the time in error */
  74. uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
  75. uint32_t mc_socketid; /* physical socket of the physical core */
  76. uint16_t mc_coreid; /* physical impacted core */
  77. uint16_t mc_core_threadid; /* core thread of physical core */
  78. uint32_t mc_apicid;
  79. uint32_t mc_flags;
  80. uint64_t mc_gstatus; /* global status */
  81. };
  82. /* contains x86 bank mc information */
  83. struct mcinfo_bank {
  84. struct mcinfo_common common;
  85. uint16_t mc_bank; /* bank nr */
  86. uint16_t mc_domid; /* domain referenced by mc_addr if valid */
  87. uint64_t mc_status; /* bank status */
  88. uint64_t mc_addr; /* bank address */
  89. uint64_t mc_misc;
  90. uint64_t mc_ctrl2;
  91. uint64_t mc_tsc;
  92. };
  93. struct mcinfo_msr {
  94. uint64_t reg; /* MSR */
  95. uint64_t value; /* MSR value */
  96. };
  97. /* contains mc information from other or additional mc MSRs */
  98. struct mcinfo_extended {
  99. struct mcinfo_common common;
  100. uint32_t mc_msrs; /* Number of msr with valid values. */
  101. /*
  102. * Currently Intel extended MSR (32/64) include all gp registers
  103. * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
  104. * useful at present. So expand this array to 16/32 to leave room.
  105. */
  106. struct mcinfo_msr mc_msr[sizeof(void *) * 4];
  107. };
  108. /* Recovery Action flags. Giving recovery result information to DOM0 */
  109. /* Xen takes successful recovery action, the error is recovered */
  110. #define REC_ACTION_RECOVERED (0x1 << 0)
  111. /* No action is performed by XEN */
  112. #define REC_ACTION_NONE (0x1 << 1)
  113. /* It's possible DOM0 might take action ownership in some case */
  114. #define REC_ACTION_NEED_RESET (0x1 << 2)
  115. /*
  116. * Different Recovery Action types, if the action is performed successfully,
  117. * REC_ACTION_RECOVERED flag will be returned.
  118. */
  119. /* Page Offline Action */
  120. #define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
  121. /* CPU offline Action */
  122. #define MC_ACTION_CPU_OFFLINE (0x1 << 1)
  123. /* L3 cache disable Action */
  124. #define MC_ACTION_CACHE_SHRINK (0x1 << 2)
  125. /*
  126. * Below interface used between XEN/DOM0 for passing XEN's recovery action
  127. * information to DOM0.
  128. */
  129. struct page_offline_action {
  130. /* Params for passing the offlined page number to DOM0 */
  131. uint64_t mfn;
  132. uint64_t status;
  133. };
  134. struct cpu_offline_action {
  135. /* Params for passing the identity of the offlined CPU to DOM0 */
  136. uint32_t mc_socketid;
  137. uint16_t mc_coreid;
  138. uint16_t mc_core_threadid;
  139. };
  140. #define MAX_UNION_SIZE 16
  141. struct mcinfo_recovery {
  142. struct mcinfo_common common;
  143. uint16_t mc_bank; /* bank nr */
  144. uint8_t action_flags;
  145. uint8_t action_types;
  146. union {
  147. struct page_offline_action page_retire;
  148. struct cpu_offline_action cpu_offline;
  149. uint8_t pad[MAX_UNION_SIZE];
  150. } action_info;
  151. };
  152. #define MCINFO_MAXSIZE 768
  153. struct mc_info {
  154. /* Number of mcinfo_* entries in mi_data */
  155. uint32_t mi_nentries;
  156. uint32_t flags;
  157. uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
  158. };
  159. DEFINE_GUEST_HANDLE_STRUCT(mc_info);
  160. #define __MC_MSR_ARRAYSIZE 8
  161. #define __MC_MSR_MCGCAP 0
  162. #define __MC_NMSRS 1
  163. #define MC_NCAPS 7
  164. struct mcinfo_logical_cpu {
  165. uint32_t mc_cpunr;
  166. uint32_t mc_chipid;
  167. uint16_t mc_coreid;
  168. uint16_t mc_threadid;
  169. uint32_t mc_apicid;
  170. uint32_t mc_clusterid;
  171. uint32_t mc_ncores;
  172. uint32_t mc_ncores_active;
  173. uint32_t mc_nthreads;
  174. uint32_t mc_cpuid_level;
  175. uint32_t mc_family;
  176. uint32_t mc_vendor;
  177. uint32_t mc_model;
  178. uint32_t mc_step;
  179. char mc_vendorid[16];
  180. char mc_brandid[64];
  181. uint32_t mc_cpu_caps[MC_NCAPS];
  182. uint32_t mc_cache_size;
  183. uint32_t mc_cache_alignment;
  184. uint32_t mc_nmsrvals;
  185. struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
  186. };
  187. DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
  188. /*
  189. * Prototype:
  190. * uint32_t x86_mcinfo_nentries(struct mc_info *mi);
  191. */
  192. #define x86_mcinfo_nentries(_mi) \
  193. ((_mi)->mi_nentries)
  194. /*
  195. * Prototype:
  196. * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
  197. */
  198. #define x86_mcinfo_first(_mi) \
  199. ((struct mcinfo_common *)(_mi)->mi_data)
  200. /*
  201. * Prototype:
  202. * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
  203. */
  204. #define x86_mcinfo_next(_mic) \
  205. ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
  206. /*
  207. * Prototype:
  208. * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
  209. */
  210. static inline void x86_mcinfo_lookup(struct mcinfo_common **ret,
  211. struct mc_info *mi, uint16_t type)
  212. {
  213. uint32_t i;
  214. struct mcinfo_common *mic;
  215. bool found = 0;
  216. if (!ret || !mi)
  217. return;
  218. mic = x86_mcinfo_first(mi);
  219. for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
  220. if (mic->type == type) {
  221. found = 1;
  222. break;
  223. }
  224. mic = x86_mcinfo_next(mic);
  225. }
  226. *ret = found ? mic : NULL;
  227. }
  228. /*
  229. * Fetch machine check data from hypervisor.
  230. */
  231. #define XEN_MC_fetch 1
  232. struct xen_mc_fetch {
  233. /*
  234. * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
  235. * XEN_MC_ACK if ack'king an earlier fetch
  236. * OUT: XEN_MC_OK, XEN_MC_FETCHAILED, XEN_MC_NODATA
  237. */
  238. uint32_t flags;
  239. uint32_t _pad0;
  240. /* OUT: id for ack, IN: id we are ack'ing */
  241. uint64_t fetch_id;
  242. /* OUT variables. */
  243. GUEST_HANDLE(mc_info) data;
  244. };
  245. DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
  246. /*
  247. * This tells the hypervisor to notify a DomU about the machine check error
  248. */
  249. #define XEN_MC_notifydomain 2
  250. struct xen_mc_notifydomain {
  251. /* IN variables */
  252. uint16_t mc_domid; /* The unprivileged domain to notify */
  253. uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify */
  254. /* IN/OUT variables */
  255. uint32_t flags;
  256. };
  257. DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
  258. #define XEN_MC_physcpuinfo 3
  259. struct xen_mc_physcpuinfo {
  260. /* IN/OUT */
  261. uint32_t ncpus;
  262. uint32_t _pad0;
  263. /* OUT */
  264. GUEST_HANDLE(mcinfo_logical_cpu) info;
  265. };
  266. #define XEN_MC_msrinject 4
  267. #define MC_MSRINJ_MAXMSRS 8
  268. struct xen_mc_msrinject {
  269. /* IN */
  270. uint32_t mcinj_cpunr; /* target processor id */
  271. uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */
  272. uint32_t mcinj_count; /* 0 .. count-1 in array are valid */
  273. uint32_t _pad0;
  274. struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
  275. };
  276. /* Flags for mcinj_flags above; bits 16-31 are reserved */
  277. #define MC_MSRINJ_F_INTERPOSE 0x1
  278. #define XEN_MC_mceinject 5
  279. struct xen_mc_mceinject {
  280. unsigned int mceinj_cpunr; /* target processor id */
  281. };
  282. struct xen_mc {
  283. uint32_t cmd;
  284. uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
  285. union {
  286. struct xen_mc_fetch mc_fetch;
  287. struct xen_mc_notifydomain mc_notifydomain;
  288. struct xen_mc_physcpuinfo mc_physcpuinfo;
  289. struct xen_mc_msrinject mc_msrinject;
  290. struct xen_mc_mceinject mc_mceinject;
  291. } u;
  292. };
  293. DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
  294. /* Fields are zero when not available */
  295. struct xen_mce {
  296. __u64 status;
  297. __u64 misc;
  298. __u64 addr;
  299. __u64 mcgstatus;
  300. __u64 ip;
  301. __u64 tsc; /* cpu time stamp counter */
  302. __u64 time; /* wall time_t when error was detected */
  303. __u8 cpuvendor; /* cpu vendor as encoded in system.h */
  304. __u8 inject_flags; /* software inject flags */
  305. __u16 pad;
  306. __u32 cpuid; /* CPUID 1 EAX */
  307. __u8 cs; /* code segment */
  308. __u8 bank; /* machine check bank */
  309. __u8 cpu; /* cpu number; obsolete; use extcpu now */
  310. __u8 finished; /* entry is valid */
  311. __u32 extcpu; /* linux cpu number that detected the error */
  312. __u32 socketid; /* CPU socket ID */
  313. __u32 apicid; /* CPU initial apic ID */
  314. __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
  315. };
  316. /*
  317. * This structure contains all data related to the MCE log. Also
  318. * carries a signature to make it easier to find from external
  319. * debugging tools. Each entry is only valid when its finished flag
  320. * is set.
  321. */
  322. #define XEN_MCE_LOG_LEN 32
  323. struct xen_mce_log {
  324. char signature[12]; /* "MACHINECHECK" */
  325. unsigned len; /* = XEN_MCE_LOG_LEN */
  326. unsigned next;
  327. unsigned flags;
  328. unsigned recordlen; /* length of struct xen_mce */
  329. struct xen_mce entry[XEN_MCE_LOG_LEN];
  330. };
  331. #define XEN_MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
  332. #define XEN_MCE_LOG_SIGNATURE "MACHINECHECK"
  333. #define MCE_GET_RECORD_LEN _IOR('M', 1, int)
  334. #define MCE_GET_LOG_LEN _IOR('M', 2, int)
  335. #define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
  336. #endif /* __ASSEMBLY__ */
  337. #endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */