2 * Copyright © 2012 Raspberry Pi Foundation
3 * Copyright © 2012 RISC OS Open Ltd
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of the copyright holders not be used in
10 * advertising or publicity pertaining to distribution of the software without
11 * specific, written prior permission. The copyright holders make no
12 * representations about the suitability of this software for any purpose. It
13 * is provided "as is" without express or implied warranty.
15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
24 * Author: Ben Avison (bavison@riscosopen.org)
28 /* Prevent the stack from becoming executable */
29 #if defined(__linux__) && defined(__ELF__)
30 .section .note.GNU-stack,"",%progbits
40 #include "pixman-arm-simd-asm.h"
42 /* A head macro should do all processing which results in an output of up to
43 * 16 bytes, as far as the final load instruction. The corresponding tail macro
44 * should complete the processing of the up-to-16 bytes. The calling macro will
45 * sometimes choose to insert a preload or a decrement of X between them.
46 * cond ARM condition code for code block
47 * numbytes Number of output bytes that should be generated this time
48 * firstreg First WK register in which to place output
49 * unaligned_src Whether to use non-wordaligned loads of source image
50 * unaligned_mask Whether to use non-wordaligned loads of mask image
51 * preload If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output
55 line_saved_regs STRIDE_D, STRIDE_S
58 .macro blit_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
59 pixld cond, numbytes, firstreg, SRC, unaligned_src
62 .macro blit_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
67 110: pixld , 16, 0, SRC, unaligned_src
68 pixld , 16, 4, SRC, unaligned_src
72 subs X, X, #32*8/src_bpp
80 generate_composite_function \
81 pixman_composite_src_8888_8888_asm_armv6, 32, 0, 32, \
82 FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
83 4, /* prefetch distance */ \
85 nop_macro, /* newline */ \
86 nop_macro, /* cleanup */ \
88 nop_macro, /* process tail */ \
91 generate_composite_function \
92 pixman_composite_src_0565_0565_asm_armv6, 16, 0, 16, \
93 FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
94 4, /* prefetch distance */ \
96 nop_macro, /* newline */ \
97 nop_macro, /* cleanup */ \
99 nop_macro, /* process tail */ \
102 generate_composite_function \
103 pixman_composite_src_8_8_asm_armv6, 8, 0, 8, \
104 FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
105 3, /* prefetch distance */ \
107 nop_macro, /* newline */ \
108 nop_macro, /* cleanup */ \
110 nop_macro, /* process tail */ \
113 /******************************************************************************/
115 .macro src_n_8888_init
116 ldr SRC, [sp, #ARGS_STACK_OFFSET]
122 .macro src_n_0565_init
123 ldrh SRC, [sp, #ARGS_STACK_OFFSET]
124 orr SRC, SRC, lsl #16
131 ldrb SRC, [sp, #ARGS_STACK_OFFSET]
133 orr SRC, SRC, lsl #16
139 .macro fill_process_tail cond, numbytes, firstreg
144 pixst cond, numbytes, 4, DST
151 generate_composite_function \
152 pixman_composite_src_n_8888_asm_armv6, 0, 0, 32, \
153 FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
154 0, /* prefetch distance doesn't apply */ \
156 nop_macro, /* newline */ \
157 nop_macro /* cleanup */ \
158 nop_macro /* process head */ \
161 generate_composite_function \
162 pixman_composite_src_n_0565_asm_armv6, 0, 0, 16, \
163 FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
164 0, /* prefetch distance doesn't apply */ \
166 nop_macro, /* newline */ \
167 nop_macro /* cleanup */ \
168 nop_macro /* process head */ \
171 generate_composite_function \
172 pixman_composite_src_n_8_asm_armv6, 0, 0, 8, \
173 FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
174 0, /* prefetch distance doesn't apply */ \
176 nop_macro, /* newline */ \
177 nop_macro /* cleanup */ \
178 nop_macro /* process head */ \
181 /******************************************************************************/
183 .macro src_x888_8888_pixel, cond, reg
184 orr&cond WK®, WK®, #0xFF000000
187 .macro pixman_composite_src_x888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
188 pixld cond, numbytes, firstreg, SRC, unaligned_src
191 .macro pixman_composite_src_x888_8888_process_tail cond, numbytes, firstreg
192 src_x888_8888_pixel cond, %(firstreg+0)
194 src_x888_8888_pixel cond, %(firstreg+1)
196 src_x888_8888_pixel cond, %(firstreg+2)
197 src_x888_8888_pixel cond, %(firstreg+3)
202 generate_composite_function \
203 pixman_composite_src_x888_8888_asm_armv6, 32, 0, 32, \
204 FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \
205 3, /* prefetch distance */ \
206 nop_macro, /* init */ \
207 nop_macro, /* newline */ \
208 nop_macro, /* cleanup */ \
209 pixman_composite_src_x888_8888_process_head, \
210 pixman_composite_src_x888_8888_process_tail
212 /******************************************************************************/
214 .macro src_0565_8888_init
215 /* Hold loop invariants in MASK and STRIDE_M */
216 ldr MASK, =0x07E007E0
217 mov STRIDE_M, #0xFF000000
218 /* Set GE[3:0] to 1010 so SEL instructions do what we want */
219 ldr SCRATCH, =0x80008000
220 uadd8 SCRATCH, SCRATCH, SCRATCH
223 .macro src_0565_8888_2pixels, reg1, reg2
224 and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000
225 bic WK®2, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb
226 orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg
227 mov WK®1, WK®2, lsl #16 @ rrrrr000000bbbbb0000000000000000
228 mov SCRATCH, SCRATCH, ror #19 @ GGGG0000ggggggggggg00000GGGGGGGG
229 bic WK®2, WK®2, WK®1, lsr #16 @ RRRRR000000BBBBB0000000000000000
230 orr WK®1, WK®1, WK®1, lsr #5 @ rrrrrrrrrr0bbbbbbbbbb00000000000
231 orr WK®2, WK®2, WK®2, lsr #5 @ RRRRRRRRRR0BBBBBBBBBB00000000000
232 pkhtb WK®1, WK®1, WK®1, asr #5 @ rrrrrrrr--------bbbbbbbb--------
233 sel WK®1, WK®1, SCRATCH @ rrrrrrrrggggggggbbbbbbbb--------
234 mov SCRATCH, SCRATCH, ror #16 @ ggg00000GGGGGGGGGGGG0000gggggggg
235 pkhtb WK®2, WK®2, WK®2, asr #5 @ RRRRRRRR--------BBBBBBBB--------
236 sel WK®2, WK®2, SCRATCH @ RRRRRRRRGGGGGGGGBBBBBBBB--------
237 orr WK®1, STRIDE_M, WK®1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb
238 orr WK®2, STRIDE_M, WK®2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
241 /* This version doesn't need STRIDE_M, but is one instruction longer.
242 It would however be preferable for an XRGB target, since we could knock off the last 2 instructions, but is that a common case?
243 and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000
244 bic WK®1, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb
245 orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg
246 mov WK®2, WK®1, lsr #16 @ 0000000000000000RRRRR000000BBBBB
247 mov SCRATCH, SCRATCH, ror #27 @ GGGGGGGGGGGG0000ggggggggggg00000
248 bic WK®1, WK®1, WK®2, lsl #16 @ 0000000000000000rrrrr000000bbbbb
249 mov WK®2, WK®2, lsl #3 @ 0000000000000RRRRR000000BBBBB000
250 mov WK®1, WK®1, lsl #3 @ 0000000000000rrrrr000000bbbbb000
251 orr WK®2, WK®2, WK®2, lsr #5 @ 0000000000000RRRRRRRRRR0BBBBBBBB
252 orr WK®1, WK®1, WK®1, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb
253 pkhbt WK®2, WK®2, WK®2, lsl #5 @ --------RRRRRRRR--------BBBBBBBB
254 pkhbt WK®1, WK®1, WK®1, lsl #5 @ --------rrrrrrrr--------bbbbbbbb
255 sel WK®2, SCRATCH, WK®2 @ --------RRRRRRRRGGGGGGGGBBBBBBBB
256 sel WK®1, SCRATCH, WK®1 @ --------rrrrrrrrggggggggbbbbbbbb
257 orr WK®2, WK®2, #0xFF000000 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
258 orr WK®1, WK®1, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb
261 .macro src_0565_8888_1pixel, reg
262 bic SCRATCH, WK®, MASK @ 0000000000000000rrrrr000000bbbbb
263 and WK®, WK®, MASK @ 000000000000000000000gggggg00000
264 mov SCRATCH, SCRATCH, lsl #3 @ 0000000000000rrrrr000000bbbbb000
265 mov WK®, WK®, lsl #5 @ 0000000000000000gggggg0000000000
266 orr SCRATCH, SCRATCH, SCRATCH, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb
267 orr WK®, WK®, WK®, lsr #6 @ 000000000000000gggggggggggg00000
268 pkhbt SCRATCH, SCRATCH, SCRATCH, lsl #5 @ --------rrrrrrrr--------bbbbbbbb
269 sel WK®, WK®, SCRATCH @ --------rrrrrrrrggggggggbbbbbbbb
270 orr WK®, WK®, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb
273 .macro src_0565_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
275 pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src
276 .elseif numbytes == 8
277 pixld , 4, firstreg, SRC, unaligned_src
278 .elseif numbytes == 4
279 pixld , 2, firstreg, SRC, unaligned_src
283 .macro src_0565_8888_process_tail cond, numbytes, firstreg
285 src_0565_8888_2pixels firstreg, %(firstreg+1)
286 src_0565_8888_2pixels %(firstreg+2), %(firstreg+3)
287 .elseif numbytes == 8
288 src_0565_8888_2pixels firstreg, %(firstreg+1)
290 src_0565_8888_1pixel firstreg
294 generate_composite_function \
295 pixman_composite_src_0565_8888_asm_armv6, 16, 0, 32, \
296 FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \
297 3, /* prefetch distance */ \
298 src_0565_8888_init, \
299 nop_macro, /* newline */ \
300 nop_macro, /* cleanup */ \
301 src_0565_8888_process_head, \
302 src_0565_8888_process_tail
304 /******************************************************************************/
306 .macro add_8_8_8pixels cond, dst1, dst2
307 uqadd8&cond WK&dst1, WK&dst1, MASK
308 uqadd8&cond WK&dst2, WK&dst2, STRIDE_M
311 .macro add_8_8_4pixels cond, dst
312 uqadd8&cond WK&dst, WK&dst, MASK
315 .macro add_8_8_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
319 pixld cond, 8, 4, SRC, unaligned_src
320 pixld cond, 16, firstreg, DST, 0
321 add_8_8_8pixels cond, firstreg, %(firstreg+1)
322 pixld cond, 8, 4, SRC, unaligned_src
324 pixld cond, numbytes, 4, SRC, unaligned_src
325 pixld cond, numbytes, firstreg, DST, 0
331 .macro add_8_8_process_tail cond, numbytes, firstreg
333 add_8_8_8pixels cond, %(firstreg+2), %(firstreg+3)
334 .elseif numbytes == 8
335 add_8_8_8pixels cond, firstreg, %(firstreg+1)
337 add_8_8_4pixels cond, firstreg
341 generate_composite_function \
342 pixman_composite_add_8_8_asm_armv6, 8, 0, 8, \
343 FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_PRESERVES_SCRATCH, \
344 2, /* prefetch distance */ \
345 nop_macro, /* init */ \
346 nop_macro, /* newline */ \
347 nop_macro, /* cleanup */ \
348 add_8_8_process_head, \
351 /******************************************************************************/
353 .macro over_8888_8888_init
354 /* Hold loop invariant in MASK */
355 ldr MASK, =0x00800080
356 /* Set GE[3:0] to 0101 so SEL instructions do what we want */
357 uadd8 SCRATCH, MASK, MASK
358 line_saved_regs STRIDE_D, STRIDE_S, ORIG_W
361 .macro over_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
366 pixld , numbytes, %(4+firstreg), SRC, unaligned_src
367 pixld , numbytes, firstreg, DST, 0
374 .macro over_8888_8888_check_transparent numbytes, reg0, reg1, reg2, reg3
375 /* Since these colours a premultiplied by alpha, only 0 indicates transparent (any other colour with 0 in the alpha byte is luminous) */
386 .macro over_8888_8888_prepare next
387 mov WK&next, WK&next, lsr #24
390 .macro over_8888_8888_1pixel src, dst, offset, next
391 /* src = destination component multiplier */
392 rsb WK&src, WK&src, #255
393 /* Split even/odd bytes of dst into SCRATCH/dst */
394 uxtb16 SCRATCH, WK&dst
395 uxtb16 WK&dst, WK&dst, ror #8
396 /* Multiply through, adding 0.5 to the upper byte of result for rounding */
397 mla SCRATCH, SCRATCH, WK&src, MASK
398 mla WK&dst, WK&dst, WK&src, MASK
399 /* Where we would have had a stall between the result of the first MLA and the shifter input,
400 * reload the complete source pixel */
401 ldr WK&src, [SRC, #offset]
402 /* Multiply by 257/256 to approximate 256/255 */
403 uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
404 /* In this stall, start processing the next pixel */
406 mov WK&next, WK&next, lsr #24
408 uxtab16 WK&dst, WK&dst, WK&dst, ror #8
409 /* Recombine even/odd bytes of multiplied destination */
410 mov SCRATCH, SCRATCH, ror #8
411 sel WK&dst, SCRATCH, WK&dst
412 /* Saturated add of source to multiplied destination */
413 uqadd8 WK&dst, WK&dst, WK&src
416 .macro over_8888_8888_process_tail cond, numbytes, firstreg
421 over_8888_8888_check_transparent numbytes, %(4+firstreg), %(5+firstreg), %(6+firstreg), %(7+firstreg)
423 over_8888_8888_prepare %(4+firstreg)
424 .set PROCESS_REG, firstreg
425 .set PROCESS_OFF, -numbytes
427 over_8888_8888_1pixel %(4+PROCESS_REG), %(0+PROCESS_REG), PROCESS_OFF, %(5+PROCESS_REG)
428 .set PROCESS_REG, PROCESS_REG+1
429 .set PROCESS_OFF, PROCESS_OFF+4
431 pixst , numbytes, firstreg, DST
439 generate_composite_function \
440 pixman_composite_over_8888_8888_asm_armv6, 32, 0, 32 \
441 FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
442 2, /* prefetch distance */ \
443 over_8888_8888_init, \
444 nop_macro, /* newline */ \
445 nop_macro, /* cleanup */ \
446 over_8888_8888_process_head, \
447 over_8888_8888_process_tail
449 /******************************************************************************/
451 /* Multiply each byte of a word by a byte.
452 * Useful when there aren't any obvious ways to fill the stalls with other instructions.
453 * word Register containing 4 bytes
454 * byte Register containing byte multiplier (bits 8-31 must be 0)
455 * tmp Scratch register
456 * half Register containing the constant 0x00800080
457 * GE[3:0] bits must contain 0101
459 .macro mul_8888_8 word, byte, tmp, half
460 /* Split even/odd bytes of word apart */
462 uxtb16 word, word, ror #8
463 /* Multiply bytes together with rounding, then by 257/256 */
464 mla tmp, tmp, byte, half
465 mla word, word, byte, half /* 1 stall follows */
466 uxtab16 tmp, tmp, tmp, ror #8 /* 1 stall follows */
467 uxtab16 word, word, word, ror #8
468 /* Recombine bytes */
473 /******************************************************************************/
475 .macro over_8888_n_8888_init
476 /* Mask is constant */
477 ldr MASK, [sp, #ARGS_STACK_OFFSET+8]
478 /* Hold loop invariant in STRIDE_M */
479 ldr STRIDE_M, =0x00800080
480 /* We only want the alpha bits of the constant mask */
481 mov MASK, MASK, lsr #24
482 /* Set GE[3:0] to 0101 so SEL instructions do what we want */
483 uadd8 SCRATCH, STRIDE_M, STRIDE_M
484 line_saved_regs Y, STRIDE_D, STRIDE_S, ORIG_W
487 .macro over_8888_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
492 pixld , numbytes, %(4+(firstreg%2)), SRC, unaligned_src
493 pixld , numbytes, firstreg, DST, 0
500 .macro over_8888_n_8888_1pixel src, dst
501 mul_8888_8 WK&src, MASK, SCRATCH, STRIDE_M
502 sub WK7, WK6, WK&src, lsr #24
503 mul_8888_8 WK&dst, WK7, SCRATCH, STRIDE_M
504 uqadd8 WK&dst, WK&dst, WK&src
507 .macro over_8888_n_8888_process_tail cond, numbytes, firstreg
512 over_8888_8888_check_transparent numbytes, %(4+(firstreg%2)), %(5+(firstreg%2)), %(6+firstreg), %(7+firstreg)
515 .set PROCESS_REG, firstreg
517 .if numbytes == 16 && PROCESS_REG == 2
518 /* We're using WK6 and WK7 as temporaries, so half way through
519 * 4 pixels, reload the second two source pixels but this time
520 * into WK4 and WK5 */
521 ldmdb SRC, {WK4, WK5}
523 over_8888_n_8888_1pixel %(4+(PROCESS_REG%2)), %(PROCESS_REG)
524 .set PROCESS_REG, PROCESS_REG+1
526 pixst , numbytes, firstreg, DST
534 generate_composite_function \
535 pixman_composite_over_8888_n_8888_asm_armv6, 32, 0, 32 \
536 FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
537 2, /* prefetch distance */ \
538 over_8888_n_8888_init, \
539 nop_macro, /* newline */ \
540 nop_macro, /* cleanup */ \
541 over_8888_n_8888_process_head, \
542 over_8888_n_8888_process_tail
544 /******************************************************************************/
546 .macro over_n_8_8888_init
547 /* Source is constant, but splitting it into even/odd bytes is a loop invariant */
548 ldr SRC, [sp, #ARGS_STACK_OFFSET]
549 /* Not enough registers to hold this constant, but we still use it here to set GE[3:0] */
550 ldr SCRATCH, =0x00800080
552 uxtb16 SRC, SRC, ror #8
553 /* Set GE[3:0] to 0101 so SEL instructions do what we want */
554 uadd8 SCRATCH, SCRATCH, SCRATCH
555 line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
558 .macro over_n_8_8888_newline
559 ldr STRIDE_D, =0x00800080
565 .macro over_n_8_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
567 pixld , numbytes/4, 4, MASK, unaligned_mask
568 pixld , numbytes, firstreg, DST, 0
572 .macro over_n_8_8888_1pixel src, dst
573 uxtb Y, WK4, ror #src*8
574 /* Trailing part of multiplication of source */
575 mla SCRATCH, STRIDE_S, Y, STRIDE_D
576 mla Y, SRC, Y, STRIDE_D
578 uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
579 uxtab16 Y, Y, Y, ror #8
580 mov SCRATCH, SCRATCH, ror #8
581 sub ORIG_W, ORIG_W, Y, lsr #24
583 /* Then multiply the destination */
584 mul_8888_8 WK&dst, ORIG_W, SCRATCH, STRIDE_D
585 uqadd8 WK&dst, WK&dst, Y
588 .macro over_n_8_8888_process_tail cond, numbytes, firstreg
592 .set PROCESS_REG, firstreg
594 over_n_8_8888_1pixel %(PROCESS_REG-firstreg), %(PROCESS_REG)
595 .set PROCESS_REG, PROCESS_REG+1
597 pixst , numbytes, firstreg, DST
602 generate_composite_function \
603 pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \
604 FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
605 2, /* prefetch distance */ \
606 over_n_8_8888_init, \
607 over_n_8_8888_newline, \
608 nop_macro, /* cleanup */ \
609 over_n_8_8888_process_head, \
610 over_n_8_8888_process_tail
612 /******************************************************************************/