edac_core.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. /*
  2. * Defines, structures, APIs for edac_core module
  3. *
  4. * (C) 2007 Linux Networx (http://lnxi.com)
  5. * This file may be distributed under the terms of the
  6. * GNU General Public License.
  7. *
  8. * Written by Thayne Harbaugh
  9. * Based on work by Dan Hollis <goemon at anime dot net> and others.
  10. * http://www.anime.net/~goemon/linux-ecc/
  11. *
  12. * NMI handling support added by
  13. * Dave Peterson <[email protected]> <[email protected]>
  14. *
  15. * Refactored for multi-source files:
  16. * Doug Thompson <[email protected]>
  17. *
  18. */
  19. #ifndef _EDAC_CORE_H_
  20. #define _EDAC_CORE_H_
  21. #include <linux/kernel.h>
  22. #include <linux/types.h>
  23. #include <linux/module.h>
  24. #include <linux/spinlock.h>
  25. #include <linux/smp.h>
  26. #include <linux/pci.h>
  27. #include <linux/time.h>
  28. #include <linux/nmi.h>
  29. #include <linux/rcupdate.h>
  30. #include <linux/completion.h>
  31. #include <linux/kobject.h>
  32. #include <linux/platform_device.h>
  33. #include <linux/workqueue.h>
  34. #include <linux/edac.h>
  35. #define EDAC_DEVICE_NAME_LEN 31
  36. #define EDAC_ATTRIB_VALUE_LEN 15
  37. #if PAGE_SHIFT < 20
  38. #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
  39. #define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
  40. #else /* PAGE_SHIFT > 20 */
  41. #define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
  42. #define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
  43. #endif
  44. #define edac_printk(level, prefix, fmt, arg...) \
  45. printk(level "EDAC " prefix ": " fmt, ##arg)
  46. #define edac_mc_printk(mci, level, fmt, arg...) \
  47. printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
  48. #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
  49. printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
  50. #define edac_device_printk(ctl, level, fmt, arg...) \
  51. printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
  52. #define edac_pci_printk(ctl, level, fmt, arg...) \
  53. printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
  54. /* prefixes for edac_printk() and edac_mc_printk() */
  55. #define EDAC_MC "MC"
  56. #define EDAC_PCI "PCI"
  57. #define EDAC_DEBUG "DEBUG"
  58. extern const char * const edac_mem_types[];
  59. #ifdef CONFIG_EDAC_DEBUG
  60. extern int edac_debug_level;
  61. #define edac_dbg(level, fmt, ...) \
  62. do { \
  63. if (level <= edac_debug_level) \
  64. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  65. "%s: " fmt, __func__, ##__VA_ARGS__); \
  66. } while (0)
  67. #else /* !CONFIG_EDAC_DEBUG */
  68. #define edac_dbg(level, fmt, ...) \
  69. do { \
  70. if (0) \
  71. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  72. "%s: " fmt, __func__, ##__VA_ARGS__); \
  73. } while (0)
  74. #endif /* !CONFIG_EDAC_DEBUG */
  75. #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
  76. PCI_DEVICE_ID_ ## vend ## _ ## dev
  77. #define edac_dev_name(dev) (dev)->dev_name
  78. #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
  79. /*
  80. * The following are the structures to provide for a generic
  81. * or abstract 'edac_device'. This set of structures and the
  82. * code that implements the APIs for the same, provide for
  83. * registering EDAC type devices which are NOT standard memory.
  84. *
  85. * CPU caches (L1 and L2)
  86. * DMA engines
  87. * Core CPU switches
  88. * Fabric switch units
  89. * PCIe interface controllers
  90. * other EDAC/ECC type devices that can be monitored for
  91. * errors, etc.
  92. *
  93. * It allows for a 2 level set of hierarchy. For example:
  94. *
  95. * cache could be composed of L1, L2 and L3 levels of cache.
  96. * Each CPU core would have its own L1 cache, while sharing
  97. * L2 and maybe L3 caches.
  98. *
  99. * View them arranged, via the sysfs presentation:
  100. * /sys/devices/system/edac/..
  101. *
  102. * mc/ <existing memory device directory>
  103. * cpu/cpu0/.. <L1 and L2 block directory>
  104. * /L1-cache/ce_count
  105. * /ue_count
  106. * /L2-cache/ce_count
  107. * /ue_count
  108. * cpu/cpu1/.. <L1 and L2 block directory>
  109. * /L1-cache/ce_count
  110. * /ue_count
  111. * /L2-cache/ce_count
  112. * /ue_count
  113. * ...
  114. *
  115. * the L1 and L2 directories would be "edac_device_block's"
  116. */
  117. struct edac_device_counter {
  118. u32 ue_count;
  119. u32 ce_count;
  120. };
  121. /* forward reference */
  122. struct edac_device_ctl_info;
  123. struct edac_device_block;
  124. /* edac_dev_sysfs_attribute structure
  125. * used for driver sysfs attributes in mem_ctl_info
  126. * for extra controls and attributes:
  127. * like high level error Injection controls
  128. */
  129. struct edac_dev_sysfs_attribute {
  130. struct attribute attr;
  131. ssize_t (*show)(struct edac_device_ctl_info *, char *);
  132. ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
  133. };
  134. /* edac_dev_sysfs_block_attribute structure
  135. *
  136. * used in leaf 'block' nodes for adding controls/attributes
  137. *
  138. * each block in each instance of the containing control structure
  139. * can have an array of the following. The show and store functions
  140. * will be filled in with the show/store function in the
  141. * low level driver.
  142. *
  143. * The 'value' field will be the actual value field used for
  144. * counting
  145. */
  146. struct edac_dev_sysfs_block_attribute {
  147. struct attribute attr;
  148. ssize_t (*show)(struct kobject *, struct attribute *, char *);
  149. ssize_t (*store)(struct kobject *, struct attribute *,
  150. const char *, size_t);
  151. struct edac_device_block *block;
  152. unsigned int value;
  153. };
  154. /* device block control structure */
  155. struct edac_device_block {
  156. struct edac_device_instance *instance; /* Up Pointer */
  157. char name[EDAC_DEVICE_NAME_LEN + 1];
  158. struct edac_device_counter counters; /* basic UE and CE counters */
  159. int nr_attribs; /* how many attributes */
  160. /* this block's attributes, could be NULL */
  161. struct edac_dev_sysfs_block_attribute *block_attributes;
  162. /* edac sysfs device control */
  163. struct kobject kobj;
  164. };
  165. /* device instance control structure */
  166. struct edac_device_instance {
  167. struct edac_device_ctl_info *ctl; /* Up pointer */
  168. char name[EDAC_DEVICE_NAME_LEN + 4];
  169. struct edac_device_counter counters; /* instance counters */
  170. u32 nr_blocks; /* how many blocks */
  171. struct edac_device_block *blocks; /* block array */
  172. /* edac sysfs device control */
  173. struct kobject kobj;
  174. };
  175. /*
  176. * Abstract edac_device control info structure
  177. *
  178. */
  179. struct edac_device_ctl_info {
  180. /* for global list of edac_device_ctl_info structs */
  181. struct list_head link;
  182. struct module *owner; /* Module owner of this control struct */
  183. int dev_idx;
  184. /* Per instance controls for this edac_device */
  185. int log_ue; /* boolean for logging UEs */
  186. int log_ce; /* boolean for logging CEs */
  187. int panic_on_ce; /* boolean for panic'ing on an CE */
  188. int panic_on_ue; /* boolean for panic'ing on an UE */
  189. unsigned poll_msec; /* number of milliseconds to poll interval */
  190. unsigned long delay; /* number of jiffies for poll_msec */
  191. bool defer_work; /* Create a deferrable work for polling */
  192. /* Additional top controller level attributes, but specified
  193. * by the low level driver.
  194. *
  195. * Set by the low level driver to provide attributes at the
  196. * controller level, same level as 'ue_count' and 'ce_count' above.
  197. * An array of structures, NULL terminated
  198. *
  199. * If attributes are desired, then set to array of attributes
  200. * If no attributes are desired, leave NULL
  201. */
  202. struct edac_dev_sysfs_attribute *sysfs_attributes;
  203. /* pointer to main 'edac' subsys in sysfs */
  204. struct bus_type *edac_subsys;
  205. /* the internal state of this controller instance */
  206. int op_state;
  207. /* work struct for this instance */
  208. struct delayed_work work;
  209. /* pointer to edac polling checking routine:
  210. * If NOT NULL: points to polling check routine
  211. * If NULL: Then assumes INTERRUPT operation, where
  212. * MC driver will receive events
  213. */
  214. void (*edac_check) (struct edac_device_ctl_info * edac_dev);
  215. struct device *dev; /* pointer to device structure */
  216. const char *mod_name; /* module name */
  217. const char *ctl_name; /* edac controller name */
  218. const char *dev_name; /* pci/platform/etc... name */
  219. void *pvt_info; /* pointer to 'private driver' info */
  220. unsigned long start_time; /* edac_device load start time (jiffies) */
  221. struct completion removal_complete;
  222. /* sysfs top name under 'edac' directory
  223. * and instance name:
  224. * cpu/cpu0/...
  225. * cpu/cpu1/...
  226. * cpu/cpu2/...
  227. * ...
  228. */
  229. char name[EDAC_DEVICE_NAME_LEN + 1];
  230. /* Number of instances supported on this control structure
  231. * and the array of those instances
  232. */
  233. u32 nr_instances;
  234. struct edac_device_instance *instances;
  235. /* Event counters for the this whole EDAC Device */
  236. struct edac_device_counter counters;
  237. /* edac sysfs device control for the 'name'
  238. * device this structure controls
  239. */
  240. struct kobject kobj;
  241. };
  242. /* To get from the instance's wq to the beginning of the ctl structure */
  243. #define to_edac_mem_ctl_work(w) \
  244. container_of(w, struct mem_ctl_info, work)
  245. #define to_edac_device_ctl_work(w) \
  246. container_of(w,struct edac_device_ctl_info,work)
  247. /*
  248. * The alloc() and free() functions for the 'edac_device' control info
  249. * structure. A MC driver will allocate one of these for each edac_device
  250. * it is going to control/register with the EDAC CORE.
  251. */
  252. extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
  253. unsigned sizeof_private,
  254. char *edac_device_name, unsigned nr_instances,
  255. char *edac_block_name, unsigned nr_blocks,
  256. unsigned offset_value,
  257. struct edac_dev_sysfs_block_attribute *block_attributes,
  258. unsigned nr_attribs,
  259. int device_index);
  260. /* The offset value can be:
  261. * -1 indicating no offset value
  262. * 0 for zero-based block numbers
  263. * 1 for 1-based block number
  264. * other for other-based block number
  265. */
  266. #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
  267. extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
  268. #ifdef CONFIG_PCI
  269. struct edac_pci_counter {
  270. atomic_t pe_count;
  271. atomic_t npe_count;
  272. };
  273. /*
  274. * Abstract edac_pci control info structure
  275. *
  276. */
  277. struct edac_pci_ctl_info {
  278. /* for global list of edac_pci_ctl_info structs */
  279. struct list_head link;
  280. int pci_idx;
  281. struct bus_type *edac_subsys; /* pointer to subsystem */
  282. /* the internal state of this controller instance */
  283. int op_state;
  284. /* work struct for this instance */
  285. struct delayed_work work;
  286. /* pointer to edac polling checking routine:
  287. * If NOT NULL: points to polling check routine
  288. * If NULL: Then assumes INTERRUPT operation, where
  289. * MC driver will receive events
  290. */
  291. void (*edac_check) (struct edac_pci_ctl_info * edac_dev);
  292. struct device *dev; /* pointer to device structure */
  293. const char *mod_name; /* module name */
  294. const char *ctl_name; /* edac controller name */
  295. const char *dev_name; /* pci/platform/etc... name */
  296. void *pvt_info; /* pointer to 'private driver' info */
  297. unsigned long start_time; /* edac_pci load start time (jiffies) */
  298. struct completion complete;
  299. /* sysfs top name under 'edac' directory
  300. * and instance name:
  301. * cpu/cpu0/...
  302. * cpu/cpu1/...
  303. * cpu/cpu2/...
  304. * ...
  305. */
  306. char name[EDAC_DEVICE_NAME_LEN + 1];
  307. /* Event counters for the this whole EDAC Device */
  308. struct edac_pci_counter counters;
  309. /* edac sysfs device control for the 'name'
  310. * device this structure controls
  311. */
  312. struct kobject kobj;
  313. struct completion kobj_complete;
  314. };
  315. #define to_edac_pci_ctl_work(w) \
  316. container_of(w, struct edac_pci_ctl_info,work)
  317. /* write all or some bits in a byte-register*/
  318. static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
  319. u8 mask)
  320. {
  321. if (mask != 0xff) {
  322. u8 buf;
  323. pci_read_config_byte(pdev, offset, &buf);
  324. value &= mask;
  325. buf &= ~mask;
  326. value |= buf;
  327. }
  328. pci_write_config_byte(pdev, offset, value);
  329. }
  330. /* write all or some bits in a word-register*/
  331. static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
  332. u16 value, u16 mask)
  333. {
  334. if (mask != 0xffff) {
  335. u16 buf;
  336. pci_read_config_word(pdev, offset, &buf);
  337. value &= mask;
  338. buf &= ~mask;
  339. value |= buf;
  340. }
  341. pci_write_config_word(pdev, offset, value);
  342. }
  343. /*
  344. * pci_write_bits32
  345. *
  346. * edac local routine to do pci_write_config_dword, but adds
  347. * a mask parameter. If mask is all ones, ignore the mask.
  348. * Otherwise utilize the mask to isolate specified bits
  349. *
  350. * write all or some bits in a dword-register
  351. */
  352. static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
  353. u32 value, u32 mask)
  354. {
  355. if (mask != 0xffffffff) {
  356. u32 buf;
  357. pci_read_config_dword(pdev, offset, &buf);
  358. value &= mask;
  359. buf &= ~mask;
  360. value |= buf;
  361. }
  362. pci_write_config_dword(pdev, offset, value);
  363. }
  364. #endif /* CONFIG_PCI */
  365. struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
  366. unsigned n_layers,
  367. struct edac_mc_layer *layers,
  368. unsigned sz_pvt);
  369. extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
  370. const struct attribute_group **groups);
  371. #define edac_mc_add_mc(mci) edac_mc_add_mc_with_groups(mci, NULL)
  372. extern void edac_mc_free(struct mem_ctl_info *mci);
  373. extern struct mem_ctl_info *edac_mc_find(int idx);
  374. extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
  375. extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
  376. extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
  377. unsigned long page);
  378. void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
  379. struct mem_ctl_info *mci,
  380. struct edac_raw_error_desc *e);
  381. void edac_mc_handle_error(const enum hw_event_mc_err_type type,
  382. struct mem_ctl_info *mci,
  383. const u16 error_count,
  384. const unsigned long page_frame_number,
  385. const unsigned long offset_in_page,
  386. const unsigned long syndrome,
  387. const int top_layer,
  388. const int mid_layer,
  389. const int low_layer,
  390. const char *msg,
  391. const char *other_detail);
  392. /*
  393. * edac_device APIs
  394. */
  395. extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
  396. extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
  397. extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
  398. int inst_nr, int block_nr, const char *msg);
  399. extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
  400. int inst_nr, int block_nr, const char *msg);
  401. extern int edac_device_alloc_index(void);
  402. extern const char *edac_layer_name[];
  403. /*
  404. * edac_pci APIs
  405. */
  406. extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
  407. const char *edac_pci_name);
  408. extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);
  409. extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
  410. unsigned long value);
  411. extern int edac_pci_alloc_index(void);
  412. extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
  413. extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
  414. extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
  415. struct device *dev,
  416. const char *mod_name);
  417. extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
  418. extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
  419. extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
  420. /*
  421. * edac misc APIs
  422. */
  423. extern char *edac_op_state_to_string(int op_state);
  424. #endif /* _EDAC_CORE_H_ */