mce.c 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. /*
  2. * NFIT - Machine Check Handler
  3. *
  4. * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of version 2 of the GNU General Public License as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. */
  15. #include <linux/notifier.h>
  16. #include <linux/acpi.h>
  17. #include <linux/nd.h>
  18. #include <asm/mce.h>
  19. #include "nfit.h"
  20. static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
  21. void *data)
  22. {
  23. struct mce *mce = (struct mce *)data;
  24. struct acpi_nfit_desc *acpi_desc;
  25. struct nfit_spa *nfit_spa;
  26. /* We only care about memory errors */
  27. if (!mce_is_memory_error(mce))
  28. return NOTIFY_DONE;
  29. /*
  30. * mce->addr contains the physical addr accessed that caused the
  31. * machine check. We need to walk through the list of NFITs, and see
  32. * if any of them matches that address, and only then start a scrub.
  33. */
  34. mutex_lock(&acpi_desc_lock);
  35. list_for_each_entry(acpi_desc, &acpi_descs, list) {
  36. struct device *dev = acpi_desc->dev;
  37. int found_match = 0;
  38. mutex_lock(&acpi_desc->init_mutex);
  39. list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
  40. struct acpi_nfit_system_address *spa = nfit_spa->spa;
  41. if (nfit_spa_type(spa) != NFIT_SPA_PM)
  42. continue;
  43. /* find the spa that covers the mce addr */
  44. if (spa->address > mce->addr)
  45. continue;
  46. if ((spa->address + spa->length - 1) < mce->addr)
  47. continue;
  48. found_match = 1;
  49. dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
  50. __func__, spa->range_index, spa->address,
  51. spa->length);
  52. /*
  53. * We can break at the first match because we're going
  54. * to rescan all the SPA ranges. There shouldn't be any
  55. * aliasing anyway.
  56. */
  57. break;
  58. }
  59. mutex_unlock(&acpi_desc->init_mutex);
  60. if (!found_match)
  61. continue;
  62. /* If this fails due to an -ENOMEM, there is little we can do */
  63. nvdimm_bus_add_poison(acpi_desc->nvdimm_bus,
  64. ALIGN(mce->addr, L1_CACHE_BYTES),
  65. L1_CACHE_BYTES);
  66. nvdimm_region_notify(nfit_spa->nd_region,
  67. NVDIMM_REVALIDATE_POISON);
  68. if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
  69. /*
  70. * We can ignore an -EBUSY here because if an ARS is
  71. * already in progress, just let that be the last
  72. * authoritative one
  73. */
  74. acpi_nfit_ars_rescan(acpi_desc);
  75. }
  76. break;
  77. }
  78. mutex_unlock(&acpi_desc_lock);
  79. return NOTIFY_DONE;
  80. }
  81. static struct notifier_block nfit_mce_dec = {
  82. .notifier_call = nfit_handle_mce,
  83. };
  84. void nfit_mce_register(void)
  85. {
  86. mce_register_decode_chain(&nfit_mce_dec);
  87. }
  88. void nfit_mce_unregister(void)
  89. {
  90. mce_unregister_decode_chain(&nfit_mce_dec);
  91. }