These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / arch / powerpc / platforms / powernv / opal-hmi.c
1 /*
2  * OPAL hypervisor Maintenance interrupt handling support in PowreNV.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Copyright 2014 IBM Corporation
18  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
19  */
20
21 #undef DEBUG
22
23 #include <linux/kernel.h>
24 #include <linux/init.h>
25 #include <linux/of.h>
26 #include <linux/mm.h>
27 #include <linux/slab.h>
28
29 #include <asm/opal.h>
30 #include <asm/cputable.h>
31 #include <asm/machdep.h>
32
33 static int opal_hmi_handler_nb_init;
34 struct OpalHmiEvtNode {
35         struct list_head list;
36         struct OpalHMIEvent hmi_evt;
37 };
38
39 struct xstop_reason {
40         uint32_t xstop_reason;
41         const char *unit_failed;
42         const char *description;
43 };
44
45 static LIST_HEAD(opal_hmi_evt_list);
46 static DEFINE_SPINLOCK(opal_hmi_evt_lock);
47
48 static void print_core_checkstop_reason(const char *level,
49                                         struct OpalHMIEvent *hmi_evt)
50 {
51         int i;
52         static const struct xstop_reason xstop_reason[] = {
53                 { CORE_CHECKSTOP_IFU_REGFILE, "IFU",
54                                 "RegFile core check stop" },
55                 { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" },
56                 { CORE_CHECKSTOP_PC_DURING_RECOV, "PC",
57                                 "Core checkstop during recovery" },
58                 { CORE_CHECKSTOP_ISU_REGFILE, "ISU",
59                                 "RegFile core check stop (mapper error)" },
60                 { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" },
61                 { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" },
62                 { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" },
63                 { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC",
64                                 "Recovery in maintenance mode" },
65                 { CORE_CHECKSTOP_LSU_REGFILE, "LSU",
66                                 "RegFile core check stop" },
67                 { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC",
68                                 "Forward Progress Error" },
69                 { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" },
70                 { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" },
71                 { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC",
72                                 "Hypervisor Resource error - core check stop" },
73                 { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC",
74                                 "Hang Recovery Failed (core check stop)" },
75                 { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC",
76                                 "Ambiguous Hang Detected (unknown source)" },
77                 { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC",
78                                 "Debug Trigger Error inject" },
79                 { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC",
80                                 "Hypervisor check stop via SPRC/SPRD" },
81         };
82
83         /* Validity check */
84         if (!hmi_evt->u.xstop_error.xstop_reason) {
85                 printk("%s      Unknown Core check stop.\n", level);
86                 return;
87         }
88
89         printk("%s      CPU PIR: %08x\n", level,
90                         be32_to_cpu(hmi_evt->u.xstop_error.u.pir));
91         for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
92                 if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
93                                         xstop_reason[i].xstop_reason)
94                         printk("%s      [Unit: %-3s] %s\n", level,
95                                         xstop_reason[i].unit_failed,
96                                         xstop_reason[i].description);
97 }
98
99 static void print_nx_checkstop_reason(const char *level,
100                                         struct OpalHMIEvent *hmi_evt)
101 {
102         int i;
103         static const struct xstop_reason xstop_reason[] = {
104                 { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine",
105                                         "SHM invalid state error" },
106                 { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine",
107                                         "DMA invalid state error bit 15" },
108                 { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine",
109                                         "DMA invalid state error bit 16" },
110                 { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine",
111                                         "Channel 0 invalid state error" },
112                 { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine",
113                                         "Channel 1 invalid state error" },
114                 { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine",
115                                         "Channel 2 invalid state error" },
116                 { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine",
117                                         "Channel 3 invalid state error" },
118                 { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine",
119                                         "Channel 4 invalid state error" },
120                 { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine",
121                                         "Channel 5 invalid state error" },
122                 { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine",
123                                         "Channel 6 invalid state error" },
124                 { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine",
125                                         "Channel 7 invalid state error" },
126                 { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine",
127                                         "UE error on CRB(CSB address, CCB)" },
128                 { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine",
129                                         "SUE error on CRB(CSB address, CCB)" },
130                 { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface",
131                 "CRB Kill ISN received while holding ISN with UE error" },
132         };
133
134         /* Validity check */
135         if (!hmi_evt->u.xstop_error.xstop_reason) {
136                 printk("%s      Unknown NX check stop.\n", level);
137                 return;
138         }
139
140         printk("%s      NX checkstop on CHIP ID: %x\n", level,
141                         be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
142         for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
143                 if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
144                                         xstop_reason[i].xstop_reason)
145                         printk("%s      [Unit: %-3s] %s\n", level,
146                                         xstop_reason[i].unit_failed,
147                                         xstop_reason[i].description);
148 }
149
150 static void print_checkstop_reason(const char *level,
151                                         struct OpalHMIEvent *hmi_evt)
152 {
153         switch (hmi_evt->u.xstop_error.xstop_type) {
154         case CHECKSTOP_TYPE_CORE:
155                 print_core_checkstop_reason(level, hmi_evt);
156                 break;
157         case CHECKSTOP_TYPE_NX:
158                 print_nx_checkstop_reason(level, hmi_evt);
159                 break;
160         case CHECKSTOP_TYPE_UNKNOWN:
161                 printk("%s      Unknown Malfunction Alert.\n", level);
162                 break;
163         }
164 }
165
166 static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
167 {
168         const char *level, *sevstr, *error_info;
169         static const char *hmi_error_types[] = {
170                 "Malfunction Alert",
171                 "Processor Recovery done",
172                 "Processor recovery occurred again",
173                 "Processor recovery occurred for masked error",
174                 "Timer facility experienced an error",
175                 "TFMR SPR is corrupted",
176                 "UPS (Uniterrupted Power System) Overflow indication",
177                 "An XSCOM operation failure",
178                 "An XSCOM operation completed",
179                 "SCOM has set a reserved FIR bit to cause recovery",
180                 "Debug trigger has set a reserved FIR bit to cause recovery",
181                 "A hypervisor resource error occurred"
182         };
183
184         /* Print things out */
185         if (hmi_evt->version < OpalHMIEvt_V1) {
186                 pr_err("HMI Interrupt, Unknown event version %d !\n",
187                         hmi_evt->version);
188                 return;
189         }
190         switch (hmi_evt->severity) {
191         case OpalHMI_SEV_NO_ERROR:
192                 level = KERN_INFO;
193                 sevstr = "Harmless";
194                 break;
195         case OpalHMI_SEV_WARNING:
196                 level = KERN_WARNING;
197                 sevstr = "";
198                 break;
199         case OpalHMI_SEV_ERROR_SYNC:
200                 level = KERN_ERR;
201                 sevstr = "Severe";
202                 break;
203         case OpalHMI_SEV_FATAL:
204         default:
205                 level = KERN_ERR;
206                 sevstr = "Fatal";
207                 break;
208         }
209
210         printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
211                 level, sevstr,
212                 hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
213                 "Recovered" : "Not recovered");
214         error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
215                         hmi_error_types[hmi_evt->type]
216                         : "Unknown";
217         printk("%s Error detail: %s\n", level, error_info);
218         printk("%s      HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer));
219         if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
220                 (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
221                 printk("%s      TFMR: %016llx\n", level,
222                                                 be64_to_cpu(hmi_evt->tfmr));
223
224         if (hmi_evt->version < OpalHMIEvt_V2)
225                 return;
226
227         /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */
228         if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT)
229                 print_checkstop_reason(level, hmi_evt);
230 }
231
232 static void hmi_event_handler(struct work_struct *work)
233 {
234         unsigned long flags;
235         struct OpalHMIEvent *hmi_evt;
236         struct OpalHmiEvtNode *msg_node;
237         uint8_t disposition;
238         struct opal_msg msg;
239         int unrecoverable = 0;
240
241         spin_lock_irqsave(&opal_hmi_evt_lock, flags);
242         while (!list_empty(&opal_hmi_evt_list)) {
243                 msg_node = list_entry(opal_hmi_evt_list.next,
244                                            struct OpalHmiEvtNode, list);
245                 list_del(&msg_node->list);
246                 spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
247
248                 hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
249                 print_hmi_event_info(hmi_evt);
250                 disposition = hmi_evt->disposition;
251                 kfree(msg_node);
252
253                 /*
254                  * Check if HMI event has been recovered or not. If not
255                  * then kernel can't continue, we need to panic.
256                  * But before we do that, display all the HMI event
257                  * available on the list and set unrecoverable flag to 1.
258                  */
259                 if (disposition != OpalHMI_DISPOSITION_RECOVERED)
260                         unrecoverable = 1;
261
262                 spin_lock_irqsave(&opal_hmi_evt_lock, flags);
263         }
264         spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
265
266         if (unrecoverable) {
267                 int ret;
268
269                 /* Pull all HMI events from OPAL before we panic. */
270                 while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
271                         u32 type;
272
273                         type = be32_to_cpu(msg.msg_type);
274
275                         /* skip if not HMI event */
276                         if (type != OPAL_MSG_HMI_EVT)
277                                 continue;
278
279                         /* HMI event info starts from param[0] */
280                         hmi_evt = (struct OpalHMIEvent *)&msg.params[0];
281                         print_hmi_event_info(hmi_evt);
282                 }
283
284                 /*
285                  * Unrecoverable HMI exception. We need to inform BMC/OCC
286                  * about this error so that it can collect relevant data
287                  * for error analysis before rebooting.
288                  */
289                 ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
290                         "Unrecoverable HMI exception");
291                 if (ret == OPAL_UNSUPPORTED) {
292                         pr_emerg("Reboot type %d not supported\n",
293                                                 OPAL_REBOOT_PLATFORM_ERROR);
294                 }
295
296                 /*
297                  * Fall through and panic if opal_cec_reboot2() returns
298                  * OPAL_UNSUPPORTED.
299                  */
300                 panic("Unrecoverable HMI exception");
301         }
302 }
303
304 static DECLARE_WORK(hmi_event_work, hmi_event_handler);
305 /*
306  * opal_handle_hmi_event - notifier handler that queues up HMI events
307  * to be preocessed later.
308  */
309 static int opal_handle_hmi_event(struct notifier_block *nb,
310                           unsigned long msg_type, void *msg)
311 {
312         unsigned long flags;
313         struct OpalHMIEvent *hmi_evt;
314         struct opal_msg *hmi_msg = msg;
315         struct OpalHmiEvtNode *msg_node;
316
317         /* Sanity Checks */
318         if (msg_type != OPAL_MSG_HMI_EVT)
319                 return 0;
320
321         /* HMI event info starts from param[0] */
322         hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
323
324         /* Delay the logging of HMI events to workqueue. */
325         msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
326         if (!msg_node) {
327                 pr_err("HMI: out of memory, Opal message event not handled\n");
328                 return -ENOMEM;
329         }
330         memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
331
332         spin_lock_irqsave(&opal_hmi_evt_lock, flags);
333         list_add(&msg_node->list, &opal_hmi_evt_list);
334         spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
335
336         schedule_work(&hmi_event_work);
337         return 0;
338 }
339
340 static struct notifier_block opal_hmi_handler_nb = {
341         .notifier_call  = opal_handle_hmi_event,
342         .next           = NULL,
343         .priority       = 0,
344 };
345
346 int __init opal_hmi_handler_init(void)
347 {
348         int ret;
349
350         if (!opal_hmi_handler_nb_init) {
351                 ret = opal_message_notifier_register(
352                                 OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
353                 if (ret) {
354                         pr_err("%s: Can't register OPAL event notifier (%d)\n",
355                                __func__, ret);
356                         return ret;
357                 }
358                 opal_hmi_handler_nb_init = 1;
359         }
360         return 0;
361 }