2 * Copyright © 2008 Mozilla Corporation
3 * Copyright © 2010 Nokia Corporation
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of Mozilla Corporation not be used in
10 * advertising or publicity pertaining to distribution of the software without
11 * specific, written prior permission. Mozilla Corporation makes no
12 * representations about the suitability of this software for any purpose. It
13 * is provided "as is" without express or implied warranty.
15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
24 * Author: Jeff Muizelaar (jeff@infidigm.net)
28 /* Prevent the stack from becoming executable */
29 #if defined(__linux__) && defined(__ELF__)
30 .section .note.GNU-stack,"",%progbits
40 /* Supplementary macro for setting function attributes */
41 .macro pixman_asm_function fname
46 .type fname, %function
52 * Note: This code is only using armv5te instructions (not even armv6),
53 * but is scheduled for ARM Cortex-A8 pipeline. So it might need to
54 * be split into a few variants, tuned for each microarchitecture.
56 * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
57 * have efficient write combining), it needs to be changed to use 16-byte
58 * aligned writes using STM instruction.
60 * Nearest scanline scaler macro template uses the following arguments:
61 * fname - name of the function to generate
62 * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
63 * t - type suffix for LDR/STR instructions
64 * prefetch_distance - prefetch in the source image by that many
66 * prefetch_braking_distance - stop prefetching when that many pixels are
67 * remaining before the end of scanline
70 .macro generate_nearest_scanline_func fname, bpp_shift, t, \
72 prefetch_braking_distance
74 pixman_asm_function fname
84 SRC_WIDTH_FIXED .req r8
87 push {r4, r5, r6, r7, r8, r10}
88 mvn VXMASK, #((1 << bpp_shift) - 1)
89 ldr SRC_WIDTH_FIXED, [sp, #28]
91 /* define helper macro */
93 ldr&t TMP1, [SRC, TMP1]
94 and TMP2, VXMASK, VX, asr #(16 - bpp_shift)
96 str&t TMP1, [DST], #(1 << bpp_shift)
97 9: subpls VX, VX, SRC_WIDTH_FIXED
100 ldr&t TMP2, [SRC, TMP2]
101 and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
103 str&t TMP2, [DST], #(1 << bpp_shift)
104 9: subpls VX, VX, SRC_WIDTH_FIXED
108 /* now do the scaling */
109 and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
111 9: subpls VX, VX, SRC_WIDTH_FIXED
113 subs W, W, #(8 + prefetch_braking_distance)
115 /* calculate prefetch offset */
116 mov PF_OFFS, #prefetch_distance
117 mla PF_OFFS, UNIT_X, PF_OFFS, VX
118 1: /* main loop, process 8 pixels per iteration with prefetch */
119 pld [SRC, PF_OFFS, asr #(16 - bpp_shift)]
120 add PF_OFFS, UNIT_X, lsl #3
128 subs W, W, #(4 - 8 - prefetch_braking_distance)
130 1: /* process the remaining pixels */
141 ldrne&t TMP1, [SRC, TMP1]
143 /* cleanup helper macro */
144 .purgem scale_2_pixels
154 .unreq SRC_WIDTH_FIXED
156 pop {r4, r5, r6, r7, r8, r10}
161 generate_nearest_scanline_func \
162 pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
164 generate_nearest_scanline_func \
165 pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32