These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / arch / s390 / kernel / nmi.c
1 /*
2  *   Machine check handler
3  *
4  *    Copyright IBM Corp. 2000, 2009
5  *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
6  *               Martin Schwidefsky <schwidefsky@de.ibm.com>,
7  *               Cornelia Huck <cornelia.huck@de.ibm.com>,
8  *               Heiko Carstens <heiko.carstens@de.ibm.com>,
9  */
10
11 #include <linux/kernel_stat.h>
12 #include <linux/init.h>
13 #include <linux/errno.h>
14 #include <linux/hardirq.h>
15 #include <linux/time.h>
16 #include <linux/module.h>
17 #include <asm/lowcore.h>
18 #include <asm/smp.h>
19 #include <asm/etr.h>
20 #include <asm/cputime.h>
21 #include <asm/nmi.h>
22 #include <asm/crw.h>
23 #include <asm/switch_to.h>
24 #include <asm/ctl_reg.h>
25
26 struct mcck_struct {
27         unsigned int kill_task : 1;
28         unsigned int channel_report : 1;
29         unsigned int warning : 1;
30         unsigned int etr_queue : 1;
31         unsigned int stp_queue : 1;
32         unsigned long mcck_code;
33 };
34
35 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
36
37 static void s390_handle_damage(void)
38 {
39         smp_send_stop();
40         disabled_wait((unsigned long) __builtin_return_address(0));
41         while (1);
42 }
43
44 /*
45  * Main machine check handler function. Will be called with interrupts enabled
46  * or disabled and machine checks enabled or disabled.
47  */
48 void s390_handle_mcck(void)
49 {
50         unsigned long flags;
51         struct mcck_struct mcck;
52
53         /*
54          * Disable machine checks and get the current state of accumulated
55          * machine checks. Afterwards delete the old state and enable machine
56          * checks again.
57          */
58         local_irq_save(flags);
59         local_mcck_disable();
60         mcck = *this_cpu_ptr(&cpu_mcck);
61         memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
62         clear_cpu_flag(CIF_MCCK_PENDING);
63         local_mcck_enable();
64         local_irq_restore(flags);
65
66         if (mcck.channel_report)
67                 crw_handle_channel_report();
68         /*
69          * A warning may remain for a prolonged period on the bare iron.
70          * (actually until the machine is powered off, or the problem is gone)
71          * So we just stop listening for the WARNING MCH and avoid continuously
72          * being interrupted.  One caveat is however, that we must do this per
73          * processor and cannot use the smp version of ctl_clear_bit().
74          * On VM we only get one interrupt per virtally presented machinecheck.
75          * Though one suffices, we may get one interrupt per (virtual) cpu.
76          */
77         if (mcck.warning) {     /* WARNING pending ? */
78                 static int mchchk_wng_posted = 0;
79
80                 /* Use single cpu clear, as we cannot handle smp here. */
81                 __ctl_clear_bit(14, 24);        /* Disable WARNING MCH */
82                 if (xchg(&mchchk_wng_posted, 1) == 0)
83                         kill_cad_pid(SIGPWR, 1);
84         }
85         if (mcck.etr_queue)
86                 etr_queue_work();
87         if (mcck.stp_queue)
88                 stp_queue_work();
89         if (mcck.kill_task) {
90                 local_irq_enable();
91                 printk(KERN_EMERG "mcck: Terminating task because of machine "
92                        "malfunction (code 0x%016lx).\n", mcck.mcck_code);
93                 printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
94                        current->comm, current->pid);
95                 do_exit(SIGSEGV);
96         }
97 }
98 EXPORT_SYMBOL_GPL(s390_handle_mcck);
99
100 /*
101  * returns 0 if all registers could be validated
102  * returns 1 otherwise
103  */
104 static int notrace s390_validate_registers(union mci mci)
105 {
106         int kill_task;
107         u64 zero;
108         void *fpt_save_area, *fpt_creg_save_area;
109
110         kill_task = 0;
111         zero = 0;
112
113         if (!mci.gr) {
114                 /*
115                  * General purpose registers couldn't be restored and have
116                  * unknown contents. Process needs to be terminated.
117                  */
118                 kill_task = 1;
119         }
120         if (!mci.fp) {
121                 /*
122                  * Floating point registers can't be restored and
123                  * therefore the process needs to be terminated.
124                  */
125                 kill_task = 1;
126         }
127         fpt_save_area = &S390_lowcore.floating_pt_save_area;
128         fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
129         if (!mci.fc) {
130                 /*
131                  * Floating point control register can't be restored.
132                  * Task will be terminated.
133                  */
134                 asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
135                 kill_task = 1;
136         } else
137                 asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
138
139         if (!MACHINE_HAS_VX) {
140                 /* Validate floating point registers */
141                 asm volatile(
142                         "       ld      0,0(%0)\n"
143                         "       ld      1,8(%0)\n"
144                         "       ld      2,16(%0)\n"
145                         "       ld      3,24(%0)\n"
146                         "       ld      4,32(%0)\n"
147                         "       ld      5,40(%0)\n"
148                         "       ld      6,48(%0)\n"
149                         "       ld      7,56(%0)\n"
150                         "       ld      8,64(%0)\n"
151                         "       ld      9,72(%0)\n"
152                         "       ld      10,80(%0)\n"
153                         "       ld      11,88(%0)\n"
154                         "       ld      12,96(%0)\n"
155                         "       ld      13,104(%0)\n"
156                         "       ld      14,112(%0)\n"
157                         "       ld      15,120(%0)\n"
158                         : : "a" (fpt_save_area));
159         } else {
160                 /* Validate vector registers */
161                 union ctlreg0 cr0;
162
163                 if (!mci.vr) {
164                         /*
165                          * Vector registers can't be restored and therefore
166                          * the process needs to be terminated.
167                          */
168                         kill_task = 1;
169                 }
170                 cr0.val = S390_lowcore.cregs_save_area[0];
171                 cr0.afp = cr0.vx = 1;
172                 __ctl_load(cr0.val, 0, 0);
173                 asm volatile(
174                         "       la      1,%0\n"
175                         "       .word   0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
176                         "       .word   0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
177                         : : "Q" (*(struct vx_array *)
178                                  &S390_lowcore.vector_save_area) : "1");
179                 __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
180         }
181         /* Validate access registers */
182         asm volatile(
183                 "       lam     0,15,0(%0)"
184                 : : "a" (&S390_lowcore.access_regs_save_area));
185         if (!mci.ar) {
186                 /*
187                  * Access registers have unknown contents.
188                  * Terminating task.
189                  */
190                 kill_task = 1;
191         }
192         /* Validate control registers */
193         if (!mci.cr) {
194                 /*
195                  * Control registers have unknown contents.
196                  * Can't recover and therefore stopping machine.
197                  */
198                 s390_handle_damage();
199         } else {
200                 asm volatile(
201                         "       lctlg   0,15,0(%0)"
202                         : : "a" (&S390_lowcore.cregs_save_area));
203         }
204         /*
205          * We don't even try to validate the TOD register, since we simply
206          * can't write something sensible into that register.
207          */
208         /*
209          * See if we can validate the TOD programmable register with its
210          * old contents (should be zero) otherwise set it to zero.
211          */
212         if (!mci.pr)
213                 asm volatile(
214                         "       sr      0,0\n"
215                         "       sckpf"
216                         : : : "0", "cc");
217         else
218                 asm volatile(
219                         "       l       0,0(%0)\n"
220                         "       sckpf"
221                         : : "a" (&S390_lowcore.tod_progreg_save_area)
222                         : "0", "cc");
223         /* Validate clock comparator register */
224         set_clock_comparator(S390_lowcore.clock_comparator);
225         /* Check if old PSW is valid */
226         if (!mci.wp)
227                 /*
228                  * Can't tell if we come from user or kernel mode
229                  * -> stopping machine.
230                  */
231                 s390_handle_damage();
232
233         if (!mci.ms || !mci.pm || !mci.ia)
234                 kill_task = 1;
235
236         return kill_task;
237 }
238
239 #define MAX_IPD_COUNT   29
240 #define MAX_IPD_TIME    (5 * 60 * USEC_PER_SEC) /* 5 minutes */
241
242 #define ED_STP_ISLAND   6       /* External damage STP island check */
243 #define ED_STP_SYNC     7       /* External damage STP sync check */
244 #define ED_ETR_SYNC     12      /* External damage ETR sync check */
245 #define ED_ETR_SWITCH   13      /* External damage ETR switch to local */
246
247 /*
248  * machine check handler.
249  */
250 void notrace s390_do_machine_check(struct pt_regs *regs)
251 {
252         static int ipd_count;
253         static DEFINE_SPINLOCK(ipd_lock);
254         static unsigned long long last_ipd;
255         struct mcck_struct *mcck;
256         unsigned long long tmp;
257         union mci mci;
258         int umode;
259
260         nmi_enter();
261         inc_irq_stat(NMI_NMI);
262         mci.val = S390_lowcore.mcck_interruption_code;
263         mcck = this_cpu_ptr(&cpu_mcck);
264         umode = user_mode(regs);
265
266         if (mci.sd) {
267                 /* System damage -> stopping machine */
268                 s390_handle_damage();
269         }
270         if (mci.pd) {
271                 if (mci.b) {
272                         /* Processing backup -> verify if we can survive this */
273                         u64 z_mcic, o_mcic, t_mcic;
274                         z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
275                         o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
276                                   1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
277                                   1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
278                                   1ULL<<16);
279                         t_mcic = mci.val;
280
281                         if (((t_mcic & z_mcic) != 0) ||
282                             ((t_mcic & o_mcic) != o_mcic)) {
283                                 s390_handle_damage();
284                         }
285
286                         /*
287                          * Nullifying exigent condition, therefore we might
288                          * retry this instruction.
289                          */
290                         spin_lock(&ipd_lock);
291                         tmp = get_tod_clock();
292                         if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
293                                 ipd_count++;
294                         else
295                                 ipd_count = 1;
296                         last_ipd = tmp;
297                         if (ipd_count == MAX_IPD_COUNT)
298                                 s390_handle_damage();
299                         spin_unlock(&ipd_lock);
300                 } else {
301                         /* Processing damage -> stopping machine */
302                         s390_handle_damage();
303                 }
304         }
305         if (s390_validate_registers(mci)) {
306                 if (umode) {
307                         /*
308                          * Couldn't restore all register contents while in
309                          * user mode -> mark task for termination.
310                          */
311                         mcck->kill_task = 1;
312                         mcck->mcck_code = mci.val;
313                         set_cpu_flag(CIF_MCCK_PENDING);
314                 } else {
315                         /*
316                          * Couldn't restore all register contents while in
317                          * kernel mode -> stopping machine.
318                          */
319                         s390_handle_damage();
320                 }
321         }
322         if (mci.cd) {
323                 /* Timing facility damage */
324                 s390_handle_damage();
325         }
326         if (mci.ed && mci.ec) {
327                 /* External damage */
328                 if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
329                         mcck->etr_queue |= etr_sync_check();
330                 if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
331                         mcck->etr_queue |= etr_switch_to_local();
332                 if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
333                         mcck->stp_queue |= stp_sync_check();
334                 if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
335                         mcck->stp_queue |= stp_island_check();
336                 if (mcck->etr_queue || mcck->stp_queue)
337                         set_cpu_flag(CIF_MCCK_PENDING);
338         }
339         if (mci.se)
340                 /* Storage error uncorrected */
341                 s390_handle_damage();
342         if (mci.ke)
343                 /* Storage key-error uncorrected */
344                 s390_handle_damage();
345         if (mci.ds && mci.fa)
346                 /* Storage degradation */
347                 s390_handle_damage();
348         if (mci.cp) {
349                 /* Channel report word pending */
350                 mcck->channel_report = 1;
351                 set_cpu_flag(CIF_MCCK_PENDING);
352         }
353         if (mci.w) {
354                 /* Warning pending */
355                 mcck->warning = 1;
356                 set_cpu_flag(CIF_MCCK_PENDING);
357         }
358         nmi_exit();
359 }
360
361 static int __init machine_check_init(void)
362 {
363         ctl_set_bit(14, 25);    /* enable external damage MCH */
364         ctl_set_bit(14, 27);    /* enable system recovery MCH */
365         ctl_set_bit(14, 24);    /* enable warning MCH */
366         return 0;
367 }
368 early_initcall(machine_check_init);