edac_core.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. /*
  2. * Defines, structures, APIs for edac_core module
  3. *
  4. * (C) 2007 Linux Networx (http://lnxi.com)
  5. * This file may be distributed under the terms of the
  6. * GNU General Public License.
  7. *
  8. * Written by Thayne Harbaugh
  9. * Based on work by Dan Hollis <goemon at anime dot net> and others.
  10. * http://www.anime.net/~goemon/linux-ecc/
  11. *
  12. * NMI handling support added by
  13. * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
  14. *
  15. * Refactored for multi-source files:
  16. * Doug Thompson <norsk5@xmission.com>
  17. *
  18. */
  19. #ifndef _EDAC_CORE_H_
  20. #define _EDAC_CORE_H_
  21. #include <linux/kernel.h>
  22. #include <linux/types.h>
  23. #include <linux/module.h>
  24. #include <linux/spinlock.h>
  25. #include <linux/smp.h>
  26. #include <linux/pci.h>
  27. #include <linux/time.h>
  28. #include <linux/nmi.h>
  29. #include <linux/rcupdate.h>
  30. #include <linux/completion.h>
  31. #include <linux/kobject.h>
  32. #include <linux/platform_device.h>
  33. #include <linux/workqueue.h>
  34. #include <linux/edac.h>
  35. #define EDAC_DEVICE_NAME_LEN 31
  36. #define EDAC_ATTRIB_VALUE_LEN 15
  37. #if PAGE_SHIFT < 20
  38. #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
  39. #define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
  40. #else /* PAGE_SHIFT > 20 */
  41. #define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
  42. #define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
  43. #endif
  44. #define edac_printk(level, prefix, fmt, arg...) \
  45. printk(level "EDAC " prefix ": " fmt, ##arg)
  46. #define edac_mc_printk(mci, level, fmt, arg...) \
  47. printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
  48. #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
  49. printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
  50. #define edac_device_printk(ctl, level, fmt, arg...) \
  51. printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
  52. #define edac_pci_printk(ctl, level, fmt, arg...) \
  53. printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
  54. /* prefixes for edac_printk() and edac_mc_printk() */
  55. #define EDAC_MC "MC"
  56. #define EDAC_PCI "PCI"
  57. #define EDAC_DEBUG "DEBUG"
  58. extern const char * const edac_mem_types[];
  59. #ifdef CONFIG_EDAC_DEBUG
  60. extern int edac_debug_level;
  61. #define edac_dbg(level, fmt, ...) \
  62. do { \
  63. if (level <= edac_debug_level) \
  64. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  65. "%s: " fmt, __func__, ##__VA_ARGS__); \
  66. } while (0)
  67. #else /* !CONFIG_EDAC_DEBUG */
  68. #define edac_dbg(level, fmt, ...) \
  69. do { \
  70. if (0) \
  71. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  72. "%s: " fmt, __func__, ##__VA_ARGS__); \
  73. } while (0)
  74. #endif /* !CONFIG_EDAC_DEBUG */
  75. #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
  76. PCI_DEVICE_ID_ ## vend ## _ ## dev
  77. #define edac_dev_name(dev) (dev)->dev_name
  78. #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
  79. /*
  80. * The following are the structures to provide for a generic
  81. * or abstract 'edac_device'. This set of structures and the
  82. * code that implements the APIs for the same, provide for
  83. * registering EDAC type devices which are NOT standard memory.
  84. *
  85. * CPU caches (L1 and L2)
  86. * DMA engines
  87. * Core CPU switches
  88. * Fabric switch units
  89. * PCIe interface controllers
  90. * other EDAC/ECC type devices that can be monitored for
  91. * errors, etc.
  92. *
  93. * It allows for a 2 level set of hierarchy. For example:
  94. *
  95. * cache could be composed of L1, L2 and L3 levels of cache.
  96. * Each CPU core would have its own L1 cache, while sharing
  97. * L2 and maybe L3 caches.
  98. *
  99. * View them arranged, via the sysfs presentation:
  100. * /sys/devices/system/edac/..
  101. *
  102. * mc/ <existing memory device directory>
  103. * cpu/cpu0/.. <L1 and L2 block directory>
  104. * /L1-cache/ce_count
  105. * /ue_count
  106. * /L2-cache/ce_count
  107. * /ue_count
  108. * cpu/cpu1/.. <L1 and L2 block directory>
  109. * /L1-cache/ce_count
  110. * /ue_count
  111. * /L2-cache/ce_count
  112. * /ue_count
  113. * ...
  114. *
  115. * the L1 and L2 directories would be "edac_device_block's"
  116. */
  117. struct edac_device_counter {
  118. u32 ue_count;
  119. u32 ce_count;
  120. };
  121. /* forward reference */
  122. struct edac_device_ctl_info;
  123. struct edac_device_block;
  124. /* edac_dev_sysfs_attribute structure
  125. * used for driver sysfs attributes in mem_ctl_info
  126. * for extra controls and attributes:
  127. * like high level error Injection controls
  128. */
  129. struct edac_dev_sysfs_attribute {
  130. struct attribute attr;
  131. ssize_t (*show)(struct edac_device_ctl_info *, char *);
  132. ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
  133. };
  134. /* edac_dev_sysfs_block_attribute structure
  135. *
  136. * used in leaf 'block' nodes for adding controls/attributes
  137. *
  138. * each block in each instance of the containing control structure
  139. * can have an array of the following. The show and store functions
  140. * will be filled in with the show/store function in the
  141. * low level driver.
  142. *
  143. * The 'value' field will be the actual value field used for
  144. * counting
  145. */
  146. struct edac_dev_sysfs_block_attribute {
  147. struct attribute attr;
  148. ssize_t (*show)(struct kobject *, struct attribute *, char *);
  149. ssize_t (*store)(struct kobject *, struct attribute *,
  150. const char *, size_t);
  151. struct edac_device_block *block;
  152. unsigned int value;
  153. };
  154. /* device block control structure */
  155. struct edac_device_block {
  156. struct edac_device_instance *instance; /* Up Pointer */
  157. char name[EDAC_DEVICE_NAME_LEN + 1];
  158. struct edac_device_counter counters; /* basic UE and CE counters */
  159. int nr_attribs; /* how many attributes */
  160. /* this block's attributes, could be NULL */
  161. struct edac_dev_sysfs_block_attribute *block_attributes;
  162. /* edac sysfs device control */
  163. struct kobject kobj;
  164. };
  165. /* device instance control structure */
  166. struct edac_device_instance {
  167. struct edac_device_ctl_info *ctl; /* Up pointer */
  168. char name[EDAC_DEVICE_NAME_LEN + 4];
  169. struct edac_device_counter counters; /* instance counters */
  170. u32 nr_blocks; /* how many blocks */
  171. struct edac_device_block *blocks; /* block array */
  172. /* edac sysfs device control */
  173. struct kobject kobj;
  174. };
  175. /*
  176. * Abstract edac_device control info structure
  177. *
  178. */
  179. struct edac_device_ctl_info {
  180. /* for global list of edac_device_ctl_info structs */
  181. struct list_head link;
  182. struct module *owner; /* Module owner of this control struct */
  183. int dev_idx;
  184. /* Per instance controls for this edac_device */
  185. int log_ue; /* boolean for logging UEs */
  186. int log_ce; /* boolean for logging CEs */
  187. int panic_on_ue; /* boolean for panic'ing on an UE */
  188. unsigned poll_msec; /* number of milliseconds to poll interval */
  189. unsigned long delay; /* number of jiffies for poll_msec */
  190. /* Additional top controller level attributes, but specified
  191. * by the low level driver.
  192. *
  193. * Set by the low level driver to provide attributes at the
  194. * controller level, same level as 'ue_count' and 'ce_count' above.
  195. * An array of structures, NULL terminated
  196. *
  197. * If attributes are desired, then set to array of attributes
  198. * If no attributes are desired, leave NULL
  199. */
  200. struct edac_dev_sysfs_attribute *sysfs_attributes;
  201. /* pointer to main 'edac' subsys in sysfs */
  202. struct bus_type *edac_subsys;
  203. /* the internal state of this controller instance */
  204. int op_state;
  205. /* work struct for this instance */
  206. struct delayed_work work;
  207. /* pointer to edac polling checking routine:
  208. * If NOT NULL: points to polling check routine
  209. * If NULL: Then assumes INTERRUPT operation, where
  210. * MC driver will receive events
  211. */
  212. void (*edac_check) (struct edac_device_ctl_info * edac_dev);
  213. struct device *dev; /* pointer to device structure */
  214. const char *mod_name; /* module name */
  215. const char *ctl_name; /* edac controller name */
  216. const char *dev_name; /* pci/platform/etc... name */
  217. void *pvt_info; /* pointer to 'private driver' info */
  218. unsigned long start_time; /* edac_device load start time (jiffies) */
  219. struct completion removal_complete;
  220. /* sysfs top name under 'edac' directory
  221. * and instance name:
  222. * cpu/cpu0/...
  223. * cpu/cpu1/...
  224. * cpu/cpu2/...
  225. * ...
  226. */
  227. char name[EDAC_DEVICE_NAME_LEN + 1];
  228. /* Number of instances supported on this control structure
  229. * and the array of those instances
  230. */
  231. u32 nr_instances;
  232. struct edac_device_instance *instances;
  233. /* Event counters for the this whole EDAC Device */
  234. struct edac_device_counter counters;
  235. /* edac sysfs device control for the 'name'
  236. * device this structure controls
  237. */
  238. struct kobject kobj;
  239. };
  240. /* To get from the instance's wq to the beginning of the ctl structure */
  241. #define to_edac_mem_ctl_work(w) \
  242. container_of(w, struct mem_ctl_info, work)
  243. #define to_edac_device_ctl_work(w) \
  244. container_of(w,struct edac_device_ctl_info,work)
  245. /*
  246. * The alloc() and free() functions for the 'edac_device' control info
  247. * structure. A MC driver will allocate one of these for each edac_device
  248. * it is going to control/register with the EDAC CORE.
  249. */
  250. extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
  251. unsigned sizeof_private,
  252. char *edac_device_name, unsigned nr_instances,
  253. char *edac_block_name, unsigned nr_blocks,
  254. unsigned offset_value,
  255. struct edac_dev_sysfs_block_attribute *block_attributes,
  256. unsigned nr_attribs,
  257. int device_index);
  258. /* The offset value can be:
  259. * -1 indicating no offset value
  260. * 0 for zero-based block numbers
  261. * 1 for 1-based block number
  262. * other for other-based block number
  263. */
  264. #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
  265. extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
  266. #ifdef CONFIG_PCI
  267. struct edac_pci_counter {
  268. atomic_t pe_count;
  269. atomic_t npe_count;
  270. };
  271. /*
  272. * Abstract edac_pci control info structure
  273. *
  274. */
  275. struct edac_pci_ctl_info {
  276. /* for global list of edac_pci_ctl_info structs */
  277. struct list_head link;
  278. int pci_idx;
  279. struct bus_type *edac_subsys; /* pointer to subsystem */
  280. /* the internal state of this controller instance */
  281. int op_state;
  282. /* work struct for this instance */
  283. struct delayed_work work;
  284. /* pointer to edac polling checking routine:
  285. * If NOT NULL: points to polling check routine
  286. * If NULL: Then assumes INTERRUPT operation, where
  287. * MC driver will receive events
  288. */
  289. void (*edac_check) (struct edac_pci_ctl_info * edac_dev);
  290. struct device *dev; /* pointer to device structure */
  291. const char *mod_name; /* module name */
  292. const char *ctl_name; /* edac controller name */
  293. const char *dev_name; /* pci/platform/etc... name */
  294. void *pvt_info; /* pointer to 'private driver' info */
  295. unsigned long start_time; /* edac_pci load start time (jiffies) */
  296. struct completion complete;
  297. /* sysfs top name under 'edac' directory
  298. * and instance name:
  299. * cpu/cpu0/...
  300. * cpu/cpu1/...
  301. * cpu/cpu2/...
  302. * ...
  303. */
  304. char name[EDAC_DEVICE_NAME_LEN + 1];
  305. /* Event counters for the this whole EDAC Device */
  306. struct edac_pci_counter counters;
  307. /* edac sysfs device control for the 'name'
  308. * device this structure controls
  309. */
  310. struct kobject kobj;
  311. struct completion kobj_complete;
  312. };
  313. #define to_edac_pci_ctl_work(w) \
  314. container_of(w, struct edac_pci_ctl_info,work)
  315. /* write all or some bits in a byte-register*/
  316. static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
  317. u8 mask)
  318. {
  319. if (mask != 0xff) {
  320. u8 buf;
  321. pci_read_config_byte(pdev, offset, &buf);
  322. value &= mask;
  323. buf &= ~mask;
  324. value |= buf;
  325. }
  326. pci_write_config_byte(pdev, offset, value);
  327. }
  328. /* write all or some bits in a word-register*/
  329. static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
  330. u16 value, u16 mask)
  331. {
  332. if (mask != 0xffff) {
  333. u16 buf;
  334. pci_read_config_word(pdev, offset, &buf);
  335. value &= mask;
  336. buf &= ~mask;
  337. value |= buf;
  338. }
  339. pci_write_config_word(pdev, offset, value);
  340. }
  341. /*
  342. * pci_write_bits32
  343. *
  344. * edac local routine to do pci_write_config_dword, but adds
  345. * a mask parameter. If mask is all ones, ignore the mask.
  346. * Otherwise utilize the mask to isolate specified bits
  347. *
  348. * write all or some bits in a dword-register
  349. */
  350. static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
  351. u32 value, u32 mask)
  352. {
  353. if (mask != 0xffffffff) {
  354. u32 buf;
  355. pci_read_config_dword(pdev, offset, &buf);
  356. value &= mask;
  357. buf &= ~mask;
  358. value |= buf;
  359. }
  360. pci_write_config_dword(pdev, offset, value);
  361. }
  362. #endif /* CONFIG_PCI */
  363. struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
  364. unsigned n_layers,
  365. struct edac_mc_layer *layers,
  366. unsigned sz_pvt);
  367. extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
  368. const struct attribute_group **groups);
  369. #define edac_mc_add_mc(mci) edac_mc_add_mc_with_groups(mci, NULL)
  370. extern void edac_mc_free(struct mem_ctl_info *mci);
  371. extern struct mem_ctl_info *edac_mc_find(int idx);
  372. extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
  373. extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
  374. extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
  375. unsigned long page);
  376. void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
  377. struct mem_ctl_info *mci,
  378. struct edac_raw_error_desc *e);
  379. void edac_mc_handle_error(const enum hw_event_mc_err_type type,
  380. struct mem_ctl_info *mci,
  381. const u16 error_count,
  382. const unsigned long page_frame_number,
  383. const unsigned long offset_in_page,
  384. const unsigned long syndrome,
  385. const int top_layer,
  386. const int mid_layer,
  387. const int low_layer,
  388. const char *msg,
  389. const char *other_detail);
  390. /*
  391. * edac_device APIs
  392. */
  393. extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
  394. extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
  395. extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
  396. int inst_nr, int block_nr, const char *msg);
  397. extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
  398. int inst_nr, int block_nr, const char *msg);
  399. extern int edac_device_alloc_index(void);
  400. extern const char *edac_layer_name[];
  401. /*
  402. * edac_pci APIs
  403. */
  404. extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
  405. const char *edac_pci_name);
  406. extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);
  407. extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
  408. unsigned long value);
  409. extern int edac_pci_alloc_index(void);
  410. extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
  411. extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
  412. extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
  413. struct device *dev,
  414. const char *mod_name);
  415. extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
  416. extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
  417. extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
  418. /*
  419. * edac misc APIs
  420. */
  421. extern char *edac_op_state_to_string(int op_state);
  422. #endif /* _EDAC_CORE_H_ */