/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and * only version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include #include #include #include #include #include #include #include #include #include #include "edac_core.h" #ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL static int poll_msec = 1000; module_param(poll_msec, int, 0444); #endif #ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_CE static bool panic_on_ce = 1; #else static bool panic_on_ce; #endif module_param_named(panic_on_ce, panic_on_ce, bool, 0664); #ifdef CONFIG_EDAC_KRYO3XX_ARM64_PANIC_ON_UE #define ARM64_ERP_PANIC_ON_UE 1 #else #define ARM64_ERP_PANIC_ON_UE 0 #endif #define L1 0x0 #define L2 0x1 #define L3 0x2 #define EDAC_CPU "kryo3xx_edac" #define KRYO3XX_ERRXSTATUS_VALID(a) ((a >> 30) & 0x1) #define KRYO3XX_ERRXSTATUS_UE(a) ((a >> 29) & 0x1) #define KRYO3XX_ERRXSTATUS_SERR(a) (a & 0xFF) #define KRYO3XX_ERRXMISC_LVL(a) ((a >> 1) & 0x7) #define KRYO3XX_ERRXMISC_WAY(a) ((a >> 28) & 0xF) static inline void set_errxctlr_el1(void) { u64 val = 0x10f; asm volatile("msr s3_0_c5_c4_1, %0" : : "r" (val)); } static inline void set_errxmisc_overflow(void) { u64 val = 0x7F7F00000000ULL; asm volatile("msr s3_0_c5_c5_0, %0" : : "r" (val)); } static inline void write_errselr_el1(u64 val) { asm volatile("msr s3_0_c5_c3_1, %0" : : "r" (val)); } static inline u64 read_errxstatus_el1(void) { u64 val; asm volatile("mrs %0, s3_0_c5_c4_2" : "=r" (val)); return val; } static inline u64 read_errxmisc_el1(void) { u64 val; asm volatile("mrs %0, s3_0_c5_c5_0" : "=r" (val)); return val; } static inline void clear_errxstatus_valid(u64 val) { asm volatile("msr s3_0_c5_c4_2, %0" : : "r" (val)); } struct errors_edac { const char * const msg; void (*func)(struct edac_device_ctl_info *edac_dev, int inst_nr, int block_nr, const char *msg); }; static const struct errors_edac errors[] = { {"Kryo3xx L1 Correctable Error", edac_device_handle_ce }, {"Kryo3xx L1 Uncorrectable Error", edac_device_handle_ue }, {"Kryo3xx L2 Correctable Error", edac_device_handle_ce }, {"Kryo3xx L2 Uncorrectable Error", edac_device_handle_ue }, {"L3 Correctable Error", edac_device_handle_ce }, {"L3 Uncorrectable Error", edac_device_handle_ue }, }; #define KRYO3XX_L1_CE 0 #define KRYO3XX_L1_UE 1 #define KRYO3XX_L2_CE 2 #define KRYO3XX_L2_UE 3 #define KRYO3XX_L3_CE 4 #define KRYO3XX_L3_UE 5 #define DATA_BUF_ERR 0x2 #define CACHE_DATA_ERR 0x6 #define CACHE_TAG_DIRTY_ERR 0x7 #define TLB_PARITY_ERR_DATA 0x8 #define TLB_PARITY_ERR_TAG 0x9 #define BUS_ERROR 0x12 struct erp_drvdata { struct edac_device_ctl_info *edev_ctl; struct erp_drvdata __percpu **erp_cpu_drvdata; struct notifier_block nb_pm; int ppi; }; static struct erp_drvdata *panic_handler_drvdata; static DEFINE_SPINLOCK(local_handler_lock); static void l1_l2_irq_enable(void *info) { int irq = *(int *)info; enable_percpu_irq(irq, IRQ_TYPE_LEVEL_HIGH); } static int request_erp_irq(struct platform_device *pdev, const char *propname, const char *desc, irq_handler_t handler, void *ed, int percpu) { int rc; struct resource *r; struct erp_drvdata *drv = ed; r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, propname); if (!r) { pr_err("ARM64 CPU ERP: Could not find <%s> IRQ property. Proceeding anyway.\n", propname); goto out; } if (!percpu) { rc = devm_request_threaded_irq(&pdev->dev, r->start, NULL, handler, IRQF_ONESHOT | IRQF_TRIGGER_HIGH, desc, ed); if (rc) { pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n", (int) r->start, rc, propname, desc); goto out; } } else { drv->erp_cpu_drvdata = alloc_percpu(struct erp_drvdata *); if (!drv->erp_cpu_drvdata) { pr_err("Failed to allocate percpu erp data\n"); goto out; } *raw_cpu_ptr(drv->erp_cpu_drvdata) = drv; rc = request_percpu_irq(r->start, handler, desc, drv->erp_cpu_drvdata); if (rc) { pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n", (int) r->start, rc, propname, desc); goto out_free; } drv->ppi = r->start; on_each_cpu(l1_l2_irq_enable, &(r->start), 1); } return 0; out_free: free_percpu(drv->erp_cpu_drvdata); drv->erp_cpu_drvdata = NULL; out: return -EINVAL; } static void dump_err_reg(int errorcode, int level, u64 errxstatus, u64 errxmisc, struct edac_device_ctl_info *edev_ctl) { edac_printk(KERN_CRIT, EDAC_CPU, "ERRXSTATUS_EL1: %llx\n", errxstatus); edac_printk(KERN_CRIT, EDAC_CPU, "ERRXMISC_EL1: %llx\n", errxmisc); edac_printk(KERN_CRIT, EDAC_CPU, "Cache level: L%d\n", level + 1); switch (KRYO3XX_ERRXSTATUS_SERR(errxstatus)) { case DATA_BUF_ERR: edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from internal data buffer\n"); break; case CACHE_DATA_ERR: edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache data RAM\n"); break; case CACHE_TAG_DIRTY_ERR: edac_printk(KERN_CRIT, EDAC_CPU, "ECC Error from cache tag or dirty RAM\n"); break; case TLB_PARITY_ERR_DATA: edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB RAM\n"); break; case TLB_PARITY_ERR_TAG: edac_printk(KERN_CRIT, EDAC_CPU, "Parity error on TLB DATA\n"); case BUS_ERROR: edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n"); break; } if (level == L3) edac_printk(KERN_CRIT, EDAC_CPU, "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc)); else edac_printk(KERN_CRIT, EDAC_CPU, "Way: %d\n", (int) KRYO3XX_ERRXMISC_WAY(errxmisc) >> 2); edev_ctl->panic_on_ce = panic_on_ce; errors[errorcode].func(edev_ctl, smp_processor_id(), level, errors[errorcode].msg); } static void kryo3xx_parse_l1_l2_cache_error(u64 errxstatus, u64 errxmisc, struct edac_device_ctl_info *edev_ctl) { switch (KRYO3XX_ERRXMISC_LVL(errxmisc)) { case L1: if (KRYO3XX_ERRXSTATUS_UE(errxstatus)) dump_err_reg(KRYO3XX_L1_UE, L1, errxstatus, errxmisc, edev_ctl); else dump_err_reg(KRYO3XX_L1_CE, L1, errxstatus, errxmisc, edev_ctl); break; case L2: if (KRYO3XX_ERRXSTATUS_UE(errxstatus)) dump_err_reg(KRYO3XX_L2_UE, L2, errxstatus, errxmisc, edev_ctl); else dump_err_reg(KRYO3XX_L2_CE, L2, errxstatus, errxmisc, edev_ctl); break; } } static void kryo3xx_check_l1_l2_ecc(void *info) { struct edac_device_ctl_info *edev_ctl = info; u64 errxstatus = 0; u64 errxmisc = 0; unsigned long flags; spin_lock_irqsave(&local_handler_lock, flags); write_errselr_el1(0); errxstatus = read_errxstatus_el1(); if (KRYO3XX_ERRXSTATUS_VALID(errxstatus)) { errxmisc = read_errxmisc_el1(); edac_printk(KERN_CRIT, EDAC_CPU, "Kryo3xx CPU%d detected a L1/L2 cache error\n", smp_processor_id()); kryo3xx_parse_l1_l2_cache_error(errxstatus, errxmisc, edev_ctl); clear_errxstatus_valid(errxstatus); } spin_unlock_irqrestore(&local_handler_lock, flags); } static bool l3_is_bus_error(u64 errxstatus) { if (KRYO3XX_ERRXSTATUS_SERR(errxstatus) == BUS_ERROR) { edac_printk(KERN_CRIT, EDAC_CPU, "Bus Error\n"); return true; } return false; } static void kryo3xx_check_l3_scu_error(struct edac_device_ctl_info *edev_ctl) { u64 errxstatus = 0; u64 errxmisc = 0; unsigned long flags; spin_lock_irqsave(&local_handler_lock, flags); write_errselr_el1(1); errxstatus = read_errxstatus_el1(); errxmisc = read_errxmisc_el1(); if (KRYO3XX_ERRXSTATUS_VALID(errxstatus) && KRYO3XX_ERRXMISC_LVL(errxmisc) == L3) { if (l3_is_bus_error(errxstatus)) { if (edev_ctl->panic_on_ue) panic("Causing panic due to Bus Error\n"); goto out; } if (KRYO3XX_ERRXSTATUS_UE(errxstatus)) { edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 uncorrectable error\n"); dump_err_reg(KRYO3XX_L3_UE, L3, errxstatus, errxmisc, edev_ctl); } else { edac_printk(KERN_CRIT, EDAC_CPU, "Detected L3 correctable error\n"); dump_err_reg(KRYO3XX_L3_CE, L3, errxstatus, errxmisc, edev_ctl); } clear_errxstatus_valid(errxstatus); } out: spin_unlock_irqrestore(&local_handler_lock, flags); } void kryo3xx_poll_cache_errors(struct edac_device_ctl_info *edev_ctl) { int cpu; if (edev_ctl == NULL) edev_ctl = panic_handler_drvdata->edev_ctl; kryo3xx_check_l3_scu_error(edev_ctl); for_each_possible_cpu(cpu) smp_call_function_single(cpu, kryo3xx_check_l1_l2_ecc, edev_ctl, 0); } static irqreturn_t kryo3xx_l1_l2_handler(int irq, void *drvdata) { kryo3xx_check_l1_l2_ecc(panic_handler_drvdata->edev_ctl); return IRQ_HANDLED; } static irqreturn_t kryo3xx_l3_scu_handler(int irq, void *drvdata) { struct erp_drvdata *drv = drvdata; struct edac_device_ctl_info *edev_ctl = drv->edev_ctl; kryo3xx_check_l3_scu_error(edev_ctl); return IRQ_HANDLED; } static void initialize_registers(void *info) { set_errxctlr_el1(); set_errxmisc_overflow(); } static void init_regs_on_cpu(bool all_cpus) { int cpu; write_errselr_el1(0); if (all_cpus) { for_each_possible_cpu(cpu) smp_call_function_single(cpu, initialize_registers, NULL, 1); } else initialize_registers(NULL); write_errselr_el1(1); initialize_registers(NULL); } static int kryo3xx_pmu_cpu_pm_notify(struct notifier_block *self, unsigned long action, void *v) { switch (action) { case CPU_PM_EXIT: init_regs_on_cpu(false); kryo3xx_check_l3_scu_error(panic_handler_drvdata->edev_ctl); kryo3xx_check_l1_l2_ecc(panic_handler_drvdata->edev_ctl); break; } return NOTIFY_OK; } static int kryo3xx_cpu_erp_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct erp_drvdata *drv; int rc = 0; int fail = 0; init_regs_on_cpu(true); drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL); if (!drv) return -ENOMEM; drv->edev_ctl = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(), "L", 3, 1, NULL, 0, edac_device_alloc_index()); if (!drv->edev_ctl) return -ENOMEM; #ifdef CONFIG_EDAC_KRYO3XX_ARM64_POLL drv->edev_ctl->edac_check = kryo3xx_poll_cache_errors; drv->edev_ctl->poll_msec = poll_msec; drv->edev_ctl->defer_work = 1; #endif drv->edev_ctl->dev = dev; drv->edev_ctl->mod_name = dev_name(dev); drv->edev_ctl->dev_name = dev_name(dev); drv->edev_ctl->ctl_name = "cache"; drv->edev_ctl->panic_on_ce = panic_on_ce; drv->edev_ctl->panic_on_ue = ARM64_ERP_PANIC_ON_UE; drv->nb_pm.notifier_call = kryo3xx_pmu_cpu_pm_notify; platform_set_drvdata(pdev, drv); rc = edac_device_add_device(drv->edev_ctl); if (rc) goto out_mem; panic_handler_drvdata = drv; if (request_erp_irq(pdev, "l1-l2-faultirq", "KRYO3XX L1-L2 ECC FAULTIRQ", kryo3xx_l1_l2_handler, drv, 1)) fail++; if (request_erp_irq(pdev, "l3-scu-faultirq", "KRYO3XX L3-SCU ECC FAULTIRQ", kryo3xx_l3_scu_handler, drv, 0)) fail++; if (fail == of_irq_count(dev->of_node)) { pr_err("KRYO3XX ERP: Could not request any IRQs. Giving up.\n"); rc = -ENODEV; goto out_dev; } cpu_pm_register_notifier(&(drv->nb_pm)); return 0; out_dev: edac_device_del_device(dev); out_mem: edac_device_free_ctl_info(drv->edev_ctl); return rc; } static int kryo3xx_cpu_erp_remove(struct platform_device *pdev) { struct erp_drvdata *drv = dev_get_drvdata(&pdev->dev); struct edac_device_ctl_info *edac_ctl = drv->edev_ctl; if (drv->erp_cpu_drvdata != NULL) { free_percpu_irq(drv->ppi, drv->erp_cpu_drvdata); free_percpu(drv->erp_cpu_drvdata); } edac_device_del_device(edac_ctl->dev); edac_device_free_ctl_info(edac_ctl); return 0; } static const struct of_device_id kryo3xx_cpu_erp_match_table[] = { { .compatible = "arm,arm64-kryo3xx-cpu-erp" }, { } }; static struct platform_driver kryo3xx_cpu_erp_driver = { .probe = kryo3xx_cpu_erp_probe, .remove = kryo3xx_cpu_erp_remove, .driver = { .name = "kryo3xx_cpu_cache_erp", .owner = THIS_MODULE, .of_match_table = of_match_ptr(kryo3xx_cpu_erp_match_table), }, }; static int __init kryo3xx_cpu_erp_init(void) { return platform_driver_register(&kryo3xx_cpu_erp_driver); } module_init(kryo3xx_cpu_erp_init); static void __exit kryo3xx_cpu_erp_exit(void) { platform_driver_unregister(&kryo3xx_cpu_erp_driver); } module_exit(kryo3xx_cpu_erp_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Kryo3xx EDAC driver");