Add qemu 2.4.0
[kvmfornfv.git] / qemu / pixman / pixman / pixman-inlines.h
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28
29 #include "pixman-private.h"
30
31 #define PIXMAN_REPEAT_COVER -1
32
33 /* Flags describing input parameters to fast path macro template.
34  * Turning on some flag values may indicate that
35  * "some property X is available so template can use this" or
36  * "some property X should be handled by template".
37  *
38  * FLAG_HAVE_SOLID_MASK
39  *  Input mask is solid so template should handle this.
40  *
41  * FLAG_HAVE_NON_SOLID_MASK
42  *  Input mask is bits mask so template should handle this.
43  *
44  * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45  * exclusive. (It's not allowed to turn both flags on)
46  */
47 #define FLAG_NONE                               (0)
48 #define FLAG_HAVE_SOLID_MASK                    (1 <<   1)
49 #define FLAG_HAVE_NON_SOLID_MASK                (1 <<   2)
50
51 /* To avoid too short repeated scanline function calls, extend source
52  * scanlines having width less than below constant value.
53  */
54 #define REPEAT_NORMAL_MIN_WIDTH                 64
55
56 static force_inline pixman_bool_t
57 repeat (pixman_repeat_t repeat, int *c, int size)
58 {
59     if (repeat == PIXMAN_REPEAT_NONE)
60     {
61         if (*c < 0 || *c >= size)
62             return FALSE;
63     }
64     else if (repeat == PIXMAN_REPEAT_NORMAL)
65     {
66         while (*c >= size)
67             *c -= size;
68         while (*c < 0)
69             *c += size;
70     }
71     else if (repeat == PIXMAN_REPEAT_PAD)
72     {
73         *c = CLIP (*c, 0, size - 1);
74     }
75     else /* REFLECT */
76     {
77         *c = MOD (*c, size * 2);
78         if (*c >= size)
79             *c = size * 2 - *c - 1;
80     }
81     return TRUE;
82 }
83
84 static force_inline int
85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
86 {
87     return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
88            ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
89 }
90
91 #if BILINEAR_INTERPOLATION_BITS <= 4
92 /* Inspired by Filter_32_opaque from Skia */
93 static force_inline uint32_t
94 bilinear_interpolation (uint32_t tl, uint32_t tr,
95                         uint32_t bl, uint32_t br,
96                         int distx, int disty)
97 {
98     int distxy, distxiy, distixy, distixiy;
99     uint32_t lo, hi;
100
101     distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
102     disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
103
104     distxy = distx * disty;
105     distxiy = (distx << 4) - distxy;    /* distx * (16 - disty) */
106     distixy = (disty << 4) - distxy;    /* disty * (16 - distx) */
107     distixiy =
108         16 * 16 - (disty << 4) -
109         (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
110
111     lo = (tl & 0xff00ff) * distixiy;
112     hi = ((tl >> 8) & 0xff00ff) * distixiy;
113
114     lo += (tr & 0xff00ff) * distxiy;
115     hi += ((tr >> 8) & 0xff00ff) * distxiy;
116
117     lo += (bl & 0xff00ff) * distixy;
118     hi += ((bl >> 8) & 0xff00ff) * distixy;
119
120     lo += (br & 0xff00ff) * distxy;
121     hi += ((br >> 8) & 0xff00ff) * distxy;
122
123     return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
124 }
125
126 #else
127 #if SIZEOF_LONG > 4
128
129 static force_inline uint32_t
130 bilinear_interpolation (uint32_t tl, uint32_t tr,
131                         uint32_t bl, uint32_t br,
132                         int distx, int disty)
133 {
134     uint64_t distxy, distxiy, distixy, distixiy;
135     uint64_t tl64, tr64, bl64, br64;
136     uint64_t f, r;
137
138     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
139     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
140
141     distxy = distx * disty;
142     distxiy = distx * (256 - disty);
143     distixy = (256 - distx) * disty;
144     distixiy = (256 - distx) * (256 - disty);
145
146     /* Alpha and Blue */
147     tl64 = tl & 0xff0000ff;
148     tr64 = tr & 0xff0000ff;
149     bl64 = bl & 0xff0000ff;
150     br64 = br & 0xff0000ff;
151
152     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
153     r = f & 0x0000ff0000ff0000ull;
154
155     /* Red and Green */
156     tl64 = tl;
157     tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
158
159     tr64 = tr;
160     tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
161
162     bl64 = bl;
163     bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
164
165     br64 = br;
166     br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
167
168     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
169     r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
170
171     return (uint32_t)(r >> 16);
172 }
173
174 #else
175
176 static force_inline uint32_t
177 bilinear_interpolation (uint32_t tl, uint32_t tr,
178                         uint32_t bl, uint32_t br,
179                         int distx, int disty)
180 {
181     int distxy, distxiy, distixy, distixiy;
182     uint32_t f, r;
183
184     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
185     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
186
187     distxy = distx * disty;
188     distxiy = (distx << 8) - distxy;    /* distx * (256 - disty) */
189     distixy = (disty << 8) - distxy;    /* disty * (256 - distx) */
190     distixiy =
191         256 * 256 - (disty << 8) -
192         (distx << 8) + distxy;          /* (256 - distx) * (256 - disty) */
193
194     /* Blue */
195     r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
196       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
197
198     /* Green */
199     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
200       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
201     r |= f & 0xff000000;
202
203     tl >>= 16;
204     tr >>= 16;
205     bl >>= 16;
206     br >>= 16;
207     r >>= 16;
208
209     /* Red */
210     f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
211       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
212     r |= f & 0x00ff0000;
213
214     /* Alpha */
215     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
216       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
217     r |= f & 0xff000000;
218
219     return r;
220 }
221
222 #endif
223 #endif // BILINEAR_INTERPOLATION_BITS <= 4
224
225 /*
226  * For each scanline fetched from source image with PAD repeat:
227  * - calculate how many pixels need to be padded on the left side
228  * - calculate how many pixels need to be padded on the right side
229  * - update width to only count pixels which are fetched from the image
230  * All this information is returned via 'width', 'left_pad', 'right_pad'
231  * arguments. The code is assuming that 'unit_x' is positive.
232  *
233  * Note: 64-bit math is used in order to avoid potential overflows, which
234  *       is probably excessive in many cases. This particular function
235  *       may need its own correctness test and performance tuning.
236  */
237 static force_inline void
238 pad_repeat_get_scanline_bounds (int32_t         source_image_width,
239                                 pixman_fixed_t  vx,
240                                 pixman_fixed_t  unit_x,
241                                 int32_t *       width,
242                                 int32_t *       left_pad,
243                                 int32_t *       right_pad)
244 {
245     int64_t max_vx = (int64_t) source_image_width << 16;
246     int64_t tmp;
247     if (vx < 0)
248     {
249         tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
250         if (tmp > *width)
251         {
252             *left_pad = *width;
253             *width = 0;
254         }
255         else
256         {
257             *left_pad = (int32_t) tmp;
258             *width -= (int32_t) tmp;
259         }
260     }
261     else
262     {
263         *left_pad = 0;
264     }
265     tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
266     if (tmp < 0)
267     {
268         *right_pad = *width;
269         *width = 0;
270     }
271     else if (tmp >= *width)
272     {
273         *right_pad = 0;
274     }
275     else
276     {
277         *right_pad = *width - (int32_t) tmp;
278         *width = (int32_t) tmp;
279     }
280 }
281
282 /* A macroified version of specialized nearest scalers for some
283  * common 8888 and 565 formats. It supports SRC and OVER ops.
284  *
285  * There are two repeat versions, one that handles repeat normal,
286  * and one without repeat handling that only works if the src region
287  * used is completely covered by the pre-repeated source samples.
288  *
289  * The loops are unrolled to process two pixels per iteration for better
290  * performance on most CPU architectures (superscalar processors
291  * can issue several operations simultaneously, other processors can hide
292  * instructions latencies by pipelining operations). Unrolling more
293  * does not make much sense because the compiler will start running out
294  * of spare registers soon.
295  */
296
297 #define GET_8888_ALPHA(s) ((s) >> 24)
298  /* This is not actually used since we don't have an OVER with
299     565 source, but it is needed to build. */
300 #define GET_0565_ALPHA(s) 0xff
301 #define GET_x888_ALPHA(s) 0xff
302
303 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,                       \
304                               src_type_t, dst_type_t, OP, repeat_mode)                          \
305 static force_inline void                                                                        \
306 scanline_func_name (dst_type_t       *dst,                                                      \
307                     const src_type_t *src,                                                      \
308                     int32_t           w,                                                        \
309                     pixman_fixed_t    vx,                                                       \
310                     pixman_fixed_t    unit_x,                                                   \
311                     pixman_fixed_t    src_width_fixed,                                          \
312                     pixman_bool_t     fully_transparent_src)                                    \
313 {                                                                                               \
314         uint32_t   d;                                                                           \
315         src_type_t s1, s2;                                                                      \
316         uint8_t    a1, a2;                                                                      \
317         int        x1, x2;                                                                      \
318                                                                                                 \
319         if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)                        \
320             return;                                                                             \
321                                                                                                 \
322         if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)            \
323             abort();                                                                            \
324                                                                                                 \
325         while ((w -= 2) >= 0)                                                                   \
326         {                                                                                       \
327             x1 = pixman_fixed_to_int (vx);                                                      \
328             vx += unit_x;                                                                       \
329             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
330             {                                                                                   \
331                 /* This works because we know that unit_x is positive */                        \
332                 while (vx >= 0)                                                                 \
333                     vx -= src_width_fixed;                                                      \
334             }                                                                                   \
335             s1 = *(src + x1);                                                                   \
336                                                                                                 \
337             x2 = pixman_fixed_to_int (vx);                                                      \
338             vx += unit_x;                                                                       \
339             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
340             {                                                                                   \
341                 /* This works because we know that unit_x is positive */                        \
342                 while (vx >= 0)                                                                 \
343                     vx -= src_width_fixed;                                                      \
344             }                                                                                   \
345             s2 = *(src + x2);                                                                   \
346                                                                                                 \
347             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
348             {                                                                                   \
349                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
350                 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);                                          \
351                                                                                                 \
352                 if (a1 == 0xff)                                                                 \
353                 {                                                                               \
354                     *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                   \
355                 }                                                                               \
356                 else if (s1)                                                                    \
357                 {                                                                               \
358                     d = convert_ ## DST_FORMAT ## _to_8888 (*dst);                              \
359                     s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);                               \
360                     a1 ^= 0xff;                                                                 \
361                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
362                     *dst = convert_8888_to_ ## DST_FORMAT (d);                                  \
363                 }                                                                               \
364                 dst++;                                                                          \
365                                                                                                 \
366                 if (a2 == 0xff)                                                                 \
367                 {                                                                               \
368                     *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);                   \
369                 }                                                                               \
370                 else if (s2)                                                                    \
371                 {                                                                               \
372                     d = convert_## DST_FORMAT ## _to_8888 (*dst);                               \
373                     s2 = convert_## SRC_FORMAT ## _to_8888 (s2);                                \
374                     a2 ^= 0xff;                                                                 \
375                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);                                        \
376                     *dst = convert_8888_to_ ## DST_FORMAT (d);                                  \
377                 }                                                                               \
378                 dst++;                                                                          \
379             }                                                                                   \
380             else /* PIXMAN_OP_SRC */                                                            \
381             {                                                                                   \
382                 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                     \
383                 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);                     \
384             }                                                                                   \
385         }                                                                                       \
386                                                                                                 \
387         if (w & 1)                                                                              \
388         {                                                                                       \
389             x1 = pixman_fixed_to_int (vx);                                                      \
390             s1 = *(src + x1);                                                                   \
391                                                                                                 \
392             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
393             {                                                                                   \
394                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
395                                                                                                 \
396                 if (a1 == 0xff)                                                                 \
397                 {                                                                               \
398                     *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                   \
399                 }                                                                               \
400                 else if (s1)                                                                    \
401                 {                                                                               \
402                     d = convert_## DST_FORMAT ## _to_8888 (*dst);                               \
403                     s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);                               \
404                     a1 ^= 0xff;                                                                 \
405                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
406                     *dst = convert_8888_to_ ## DST_FORMAT (d);                                  \
407                 }                                                                               \
408                 dst++;                                                                          \
409             }                                                                                   \
410             else /* PIXMAN_OP_SRC */                                                            \
411             {                                                                                   \
412                 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                     \
413             }                                                                                   \
414         }                                                                                       \
415 }
416
417 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,      \
418                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
419 static void                                                                                     \
420 fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,                \
421                                                    pixman_composite_info_t *info)               \
422 {                                                                                               \
423     PIXMAN_COMPOSITE_ARGS (info);                                                               \
424     dst_type_t *dst_line;                                                                       \
425     mask_type_t *mask_line;                                                                     \
426     src_type_t *src_first_line;                                                                 \
427     int       y;                                                                                \
428     pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);               \
429     pixman_fixed_t max_vy;                                                                      \
430     pixman_vector_t v;                                                                          \
431     pixman_fixed_t vx, vy;                                                                      \
432     pixman_fixed_t unit_x, unit_y;                                                              \
433     int32_t left_pad, right_pad;                                                                \
434                                                                                                 \
435     src_type_t *src;                                                                            \
436     dst_type_t *dst;                                                                            \
437     mask_type_t solid_mask;                                                                     \
438     const mask_type_t *mask = &solid_mask;                                                      \
439     int src_stride, mask_stride, dst_stride;                                                    \
440                                                                                                 \
441     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
442     if (have_mask)                                                                              \
443     {                                                                                           \
444         if (mask_is_solid)                                                                      \
445             solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);    \
446         else                                                                                    \
447             PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                     \
448                                    mask_stride, mask_line, 1);                                  \
449     }                                                                                           \
450     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
451      * transformed from destination space to source space */                                    \
452     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
453                                                                                                 \
454     /* reference point is the center of the pixel */                                            \
455     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
456     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
457     v.vector[2] = pixman_fixed_1;                                                               \
458                                                                                                 \
459     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
460         return;                                                                                 \
461                                                                                                 \
462     unit_x = src_image->common.transform->matrix[0][0];                                         \
463     unit_y = src_image->common.transform->matrix[1][1];                                         \
464                                                                                                 \
465     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */                   \
466     v.vector[0] -= pixman_fixed_e;                                                              \
467     v.vector[1] -= pixman_fixed_e;                                                              \
468                                                                                                 \
469     vx = v.vector[0];                                                                           \
470     vy = v.vector[1];                                                                           \
471                                                                                                 \
472     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
473     {                                                                                           \
474         max_vy = pixman_int_to_fixed (src_image->bits.height);                                  \
475                                                                                                 \
476         /* Clamp repeating positions inside the actual samples */                               \
477         repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                                    \
478         repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                             \
479     }                                                                                           \
480                                                                                                 \
481     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
482         PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
483     {                                                                                           \
484         pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,                      \
485                                         &width, &left_pad, &right_pad);                         \
486         vx += left_pad * unit_x;                                                                \
487     }                                                                                           \
488                                                                                                 \
489     while (--height >= 0)                                                                       \
490     {                                                                                           \
491         dst = dst_line;                                                                         \
492         dst_line += dst_stride;                                                                 \
493         if (have_mask && !mask_is_solid)                                                        \
494         {                                                                                       \
495             mask = mask_line;                                                                   \
496             mask_line += mask_stride;                                                           \
497         }                                                                                       \
498                                                                                                 \
499         y = pixman_fixed_to_int (vy);                                                           \
500         vy += unit_y;                                                                           \
501         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                              \
502             repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                         \
503         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
504         {                                                                                       \
505             repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);                             \
506             src = src_first_line + src_stride * y;                                              \
507             if (left_pad > 0)                                                                   \
508             {                                                                                   \
509                 scanline_func (mask, dst,                                                       \
510                                src + src_image->bits.width - src_image->bits.width + 1,         \
511                                left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);           \
512             }                                                                                   \
513             if (width > 0)                                                                      \
514             {                                                                                   \
515                 scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
516                                dst + left_pad, src + src_image->bits.width, width,              \
517                                vx - src_width_fixed, unit_x, src_width_fixed, FALSE);           \
518             }                                                                                   \
519             if (right_pad > 0)                                                                  \
520             {                                                                                   \
521                 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
522                                dst + left_pad + width, src + src_image->bits.width,             \
523                                right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);          \
524             }                                                                                   \
525         }                                                                                       \
526         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
527         {                                                                                       \
528             static const src_type_t zero[1] = { 0 };                                            \
529             if (y < 0 || y >= src_image->bits.height)                                           \
530             {                                                                                   \
531                 scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,               \
532                                -pixman_fixed_e, 0, src_width_fixed, TRUE);                      \
533                 continue;                                                                       \
534             }                                                                                   \
535             src = src_first_line + src_stride * y;                                              \
536             if (left_pad > 0)                                                                   \
537             {                                                                                   \
538                 scanline_func (mask, dst, zero + 1, left_pad,                                   \
539                                -pixman_fixed_e, 0, src_width_fixed, TRUE);                      \
540             }                                                                                   \
541             if (width > 0)                                                                      \
542             {                                                                                   \
543                 scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
544                                dst + left_pad, src + src_image->bits.width, width,              \
545                                vx - src_width_fixed, unit_x, src_width_fixed, FALSE);           \
546             }                                                                                   \
547             if (right_pad > 0)                                                                  \
548             {                                                                                   \
549                 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
550                                dst + left_pad + width, zero + 1, right_pad,                     \
551                                -pixman_fixed_e, 0, src_width_fixed, TRUE);                      \
552             }                                                                                   \
553         }                                                                                       \
554         else                                                                                    \
555         {                                                                                       \
556             src = src_first_line + src_stride * y;                                              \
557             scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \
558                            unit_x, src_width_fixed, FALSE);                                     \
559         }                                                                                       \
560     }                                                                                           \
561 }
562
563 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
564 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,   \
565                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
566         FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
567                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)
568
569 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,    \
570                               repeat_mode)                                                      \
571     static force_inline void                                                                    \
572     scanline_func##scale_func_name##_wrapper (                                                  \
573                     const uint8_t    *mask,                                                     \
574                     dst_type_t       *dst,                                                      \
575                     const src_type_t *src,                                                      \
576                     int32_t          w,                                                         \
577                     pixman_fixed_t   vx,                                                        \
578                     pixman_fixed_t   unit_x,                                                    \
579                     pixman_fixed_t   max_vx,                                                    \
580                     pixman_bool_t    fully_transparent_src)                                     \
581     {                                                                                           \
582         scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);                 \
583     }                                                                                           \
584     FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,       \
585                                src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
586
587 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,           \
588                               repeat_mode)                                                      \
589         FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,           \
590                               dst_type_t, repeat_mode)
591
592 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,                           \
593                      src_type_t, dst_type_t, OP, repeat_mode)                           \
594     FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
595                           SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,               \
596                           OP, repeat_mode)                                              \
597     FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,                       \
598                           scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
599                           src_type_t, dst_type_t, repeat_mode)
600
601
602 #define SCALED_NEAREST_FLAGS                                            \
603     (FAST_PATH_SCALE_TRANSFORM  |                                       \
604      FAST_PATH_NO_ALPHA_MAP     |                                       \
605      FAST_PATH_NEAREST_FILTER   |                                       \
606      FAST_PATH_NO_ACCESSORS     |                                       \
607      FAST_PATH_NARROW_FORMAT)
608
609 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)                    \
610     {   PIXMAN_OP_ ## op,                                               \
611         PIXMAN_ ## s,                                                   \
612         (SCALED_NEAREST_FLAGS           |                               \
613          FAST_PATH_NORMAL_REPEAT        |                               \
614          FAST_PATH_X_UNIT_POSITIVE),                                    \
615         PIXMAN_null, 0,                                                 \
616         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
617         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
618     }
619
620 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)                       \
621     {   PIXMAN_OP_ ## op,                                               \
622         PIXMAN_ ## s,                                                   \
623         (SCALED_NEAREST_FLAGS           |                               \
624          FAST_PATH_PAD_REPEAT           |                               \
625          FAST_PATH_X_UNIT_POSITIVE),                                    \
626         PIXMAN_null, 0,                                                 \
627         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
628         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
629     }
630
631 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)                      \
632     {   PIXMAN_OP_ ## op,                                               \
633         PIXMAN_ ## s,                                                   \
634         (SCALED_NEAREST_FLAGS           |                               \
635          FAST_PATH_NONE_REPEAT          |                               \
636          FAST_PATH_X_UNIT_POSITIVE),                                    \
637         PIXMAN_null, 0,                                                 \
638         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
639         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
640     }
641
642 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)                     \
643     {   PIXMAN_OP_ ## op,                                               \
644         PIXMAN_ ## s,                                                   \
645         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
646         PIXMAN_null, 0,                                                 \
647         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
648         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
649     }
650
651 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)            \
652     {   PIXMAN_OP_ ## op,                                               \
653         PIXMAN_ ## s,                                                   \
654         (SCALED_NEAREST_FLAGS           |                               \
655          FAST_PATH_NORMAL_REPEAT        |                               \
656          FAST_PATH_X_UNIT_POSITIVE),                                    \
657         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
658         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
659         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
660     }
661
662 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)               \
663     {   PIXMAN_OP_ ## op,                                               \
664         PIXMAN_ ## s,                                                   \
665         (SCALED_NEAREST_FLAGS           |                               \
666          FAST_PATH_PAD_REPEAT           |                               \
667          FAST_PATH_X_UNIT_POSITIVE),                                    \
668         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
669         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
670         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
671     }
672
673 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)              \
674     {   PIXMAN_OP_ ## op,                                               \
675         PIXMAN_ ## s,                                                   \
676         (SCALED_NEAREST_FLAGS           |                               \
677          FAST_PATH_NONE_REPEAT          |                               \
678          FAST_PATH_X_UNIT_POSITIVE),                                    \
679         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
680         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
681         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
682     }
683
684 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)             \
685     {   PIXMAN_OP_ ## op,                                               \
686         PIXMAN_ ## s,                                                   \
687         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
688         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
689         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
690         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
691     }
692
693 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)         \
694     {   PIXMAN_OP_ ## op,                                               \
695         PIXMAN_ ## s,                                                   \
696         (SCALED_NEAREST_FLAGS           |                               \
697          FAST_PATH_NORMAL_REPEAT        |                               \
698          FAST_PATH_X_UNIT_POSITIVE),                                    \
699         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
700         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
701         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
702     }
703
704 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)            \
705     {   PIXMAN_OP_ ## op,                                               \
706         PIXMAN_ ## s,                                                   \
707         (SCALED_NEAREST_FLAGS           |                               \
708          FAST_PATH_PAD_REPEAT           |                               \
709          FAST_PATH_X_UNIT_POSITIVE),                                    \
710         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
711         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
712         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
713     }
714
715 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)           \
716     {   PIXMAN_OP_ ## op,                                               \
717         PIXMAN_ ## s,                                                   \
718         (SCALED_NEAREST_FLAGS           |                               \
719          FAST_PATH_NONE_REPEAT          |                               \
720          FAST_PATH_X_UNIT_POSITIVE),                                    \
721         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
722         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
723         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
724     }
725
726 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)          \
727     {   PIXMAN_OP_ ## op,                                               \
728         PIXMAN_ ## s,                                                   \
729         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
730         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
731         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
732         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
733     }
734
735 /* Prefer the use of 'cover' variant, because it is faster */
736 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                           \
737     SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                       \
738     SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                        \
739     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),                         \
740     SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
741
742 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)                   \
743     SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),               \
744     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                \
745     SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
746
747 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)                \
748     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),            \
749     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),             \
750     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
751
752 /*****************************************************************************/
753
754 /*
755  * Identify 5 zones in each scanline for bilinear scaling. Depending on
756  * whether 2 pixels to be interpolated are fetched from the image itself,
757  * from the padding area around it or from both image and padding area.
758  */
759 static force_inline void
760 bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
761                                          pixman_fixed_t  vx,
762                                          pixman_fixed_t  unit_x,
763                                          int32_t *       left_pad,
764                                          int32_t *       left_tz,
765                                          int32_t *       width,
766                                          int32_t *       right_tz,
767                                          int32_t *       right_pad)
768 {
769         int width1 = *width, left_pad1, right_pad1;
770         int width2 = *width, left_pad2, right_pad2;
771
772         pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
773                                         &width1, &left_pad1, &right_pad1);
774         pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
775                                         unit_x, &width2, &left_pad2, &right_pad2);
776
777         *left_pad = left_pad2;
778         *left_tz = left_pad1 - left_pad2;
779         *right_tz = right_pad2 - right_pad1;
780         *right_pad = right_pad1;
781         *width -= *left_pad + *left_tz + *right_tz + *right_pad;
782 }
783
784 /*
785  * Main loop template for single pass bilinear scaling. It needs to be
786  * provided with 'scanline_func' which should do the compositing operation.
787  * The needed function has the following prototype:
788  *
789  *      scanline_func (dst_type_t *       dst,
790  *                     const mask_type_ * mask,
791  *                     const src_type_t * src_top,
792  *                     const src_type_t * src_bottom,
793  *                     int32_t            width,
794  *                     int                weight_top,
795  *                     int                weight_bottom,
796  *                     pixman_fixed_t     vx,
797  *                     pixman_fixed_t     unit_x,
798  *                     pixman_fixed_t     max_vx,
799  *                     pixman_bool_t      zero_src)
800  *
801  * Where:
802  *  dst                 - destination scanline buffer for storing results
803  *  mask                - mask buffer (or single value for solid mask)
804  *  src_top, src_bottom - two source scanlines
805  *  width               - number of pixels to process
806  *  weight_top          - weight of the top row for interpolation
807  *  weight_bottom       - weight of the bottom row for interpolation
808  *  vx                  - initial position for fetching the first pair of
809  *                        pixels from the source buffer
810  *  unit_x              - position increment needed to move to the next pair
811  *                        of pixels
812  *  max_vx              - image size as a fixed point value, can be used for
813  *                        implementing NORMAL repeat (when it is supported)
814  *  zero_src            - boolean hint variable, which is set to TRUE when
815  *                        all source pixels are fetched from zero padding
816  *                        zone for NONE repeat
817  *
818  * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
819  *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
820  *       for NONE repeat when handling fuzzy antialiased top or bottom image
821  *       edges. Also both top and bottom weight variables are guaranteed to
822  *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
823  *       For example, the weights can fit into unsigned byte or be used
824  *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
825  *       precision.
826  */
827 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,     \
828                                   dst_type_t, repeat_mode, flags)                               \
829 static void                                                                                     \
830 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,                \
831                                                    pixman_composite_info_t *info)               \
832 {                                                                                               \
833     PIXMAN_COMPOSITE_ARGS (info);                                                               \
834     dst_type_t *dst_line;                                                                       \
835     mask_type_t *mask_line;                                                                     \
836     src_type_t *src_first_line;                                                                 \
837     int       y1, y2;                                                                           \
838     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */            \
839     pixman_vector_t v;                                                                          \
840     pixman_fixed_t vx, vy;                                                                      \
841     pixman_fixed_t unit_x, unit_y;                                                              \
842     int32_t left_pad, left_tz, right_tz, right_pad;                                             \
843                                                                                                 \
844     dst_type_t *dst;                                                                            \
845     mask_type_t solid_mask;                                                                     \
846     const mask_type_t *mask = &solid_mask;                                                      \
847     int src_stride, mask_stride, dst_stride;                                                    \
848                                                                                                 \
849     int src_width;                                                                              \
850     pixman_fixed_t src_width_fixed;                                                             \
851     int max_x;                                                                                  \
852     pixman_bool_t need_src_extension;                                                           \
853                                                                                                 \
854     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
855     if (flags & FLAG_HAVE_SOLID_MASK)                                                           \
856     {                                                                                           \
857         solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);        \
858         mask_stride = 0;                                                                        \
859     }                                                                                           \
860     else if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                  \
861     {                                                                                           \
862         PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                         \
863                                mask_stride, mask_line, 1);                                      \
864     }                                                                                           \
865                                                                                                 \
866     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
867      * transformed from destination space to source space */                                    \
868     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
869                                                                                                 \
870     /* reference point is the center of the pixel */                                            \
871     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
872     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
873     v.vector[2] = pixman_fixed_1;                                                               \
874                                                                                                 \
875     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
876         return;                                                                                 \
877                                                                                                 \
878     unit_x = src_image->common.transform->matrix[0][0];                                         \
879     unit_y = src_image->common.transform->matrix[1][1];                                         \
880                                                                                                 \
881     v.vector[0] -= pixman_fixed_1 / 2;                                                          \
882     v.vector[1] -= pixman_fixed_1 / 2;                                                          \
883                                                                                                 \
884     vy = v.vector[1];                                                                           \
885                                                                                                 \
886     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
887         PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
888     {                                                                                           \
889         bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,    \
890                                         &left_pad, &left_tz, &width, &right_tz, &right_pad);    \
891         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
892         {                                                                                       \
893             /* PAD repeat does not need special handling for 'transition zones' and */          \
894             /* they can be combined with 'padding zones' safely */                              \
895             left_pad += left_tz;                                                                \
896             right_pad += right_tz;                                                              \
897             left_tz = right_tz = 0;                                                             \
898         }                                                                                       \
899         v.vector[0] += left_pad * unit_x;                                                       \
900     }                                                                                           \
901                                                                                                 \
902     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
903     {                                                                                           \
904         vx = v.vector[0];                                                                       \
905         repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));         \
906         max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;                   \
907                                                                                                 \
908         if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)                                    \
909         {                                                                                       \
910             src_width = 0;                                                                      \
911                                                                                                 \
912             while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)                   \
913                 src_width += src_image->bits.width;                                             \
914                                                                                                 \
915             need_src_extension = TRUE;                                                          \
916         }                                                                                       \
917         else                                                                                    \
918         {                                                                                       \
919             src_width = src_image->bits.width;                                                  \
920             need_src_extension = FALSE;                                                         \
921         }                                                                                       \
922                                                                                                 \
923         src_width_fixed = pixman_int_to_fixed (src_width);                                      \
924     }                                                                                           \
925                                                                                                 \
926     while (--height >= 0)                                                                       \
927     {                                                                                           \
928         int weight1, weight2;                                                                   \
929         dst = dst_line;                                                                         \
930         dst_line += dst_stride;                                                                 \
931         vx = v.vector[0];                                                                       \
932         if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                   \
933         {                                                                                       \
934             mask = mask_line;                                                                   \
935             mask_line += mask_stride;                                                           \
936         }                                                                                       \
937                                                                                                 \
938         y1 = pixman_fixed_to_int (vy);                                                          \
939         weight2 = pixman_fixed_to_bilinear_weight (vy);                                         \
940         if (weight2)                                                                            \
941         {                                                                                       \
942             /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */        \
943             y2 = y1 + 1;                                                                        \
944             weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;                                   \
945         }                                                                                       \
946         else                                                                                    \
947         {                                                                                       \
948             /* set both top and bottom row to the same scanline and tweak weights */            \
949             y2 = y1;                                                                            \
950             weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;                               \
951         }                                                                                       \
952         vy += unit_y;                                                                           \
953         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
954         {                                                                                       \
955             src_type_t *src1, *src2;                                                            \
956             src_type_t buf1[2];                                                                 \
957             src_type_t buf2[2];                                                                 \
958             repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);                            \
959             repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);                            \
960             src1 = src_first_line + src_stride * y1;                                            \
961             src2 = src_first_line + src_stride * y2;                                            \
962                                                                                                 \
963             if (left_pad > 0)                                                                   \
964             {                                                                                   \
965                 buf1[0] = buf1[1] = src1[0];                                                    \
966                 buf2[0] = buf2[1] = src2[0];                                                    \
967                 scanline_func (dst, mask,                                                       \
968                                buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);         \
969                 dst += left_pad;                                                                \
970                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
971                     mask += left_pad;                                                           \
972             }                                                                                   \
973             if (width > 0)                                                                      \
974             {                                                                                   \
975                 scanline_func (dst, mask,                                                       \
976                                src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
977                 dst += width;                                                                   \
978                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
979                     mask += width;                                                              \
980             }                                                                                   \
981             if (right_pad > 0)                                                                  \
982             {                                                                                   \
983                 buf1[0] = buf1[1] = src1[src_image->bits.width - 1];                            \
984                 buf2[0] = buf2[1] = src2[src_image->bits.width - 1];                            \
985                 scanline_func (dst, mask,                                                       \
986                                buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);        \
987             }                                                                                   \
988         }                                                                                       \
989         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
990         {                                                                                       \
991             src_type_t *src1, *src2;                                                            \
992             src_type_t buf1[2];                                                                 \
993             src_type_t buf2[2];                                                                 \
994             /* handle top/bottom zero padding by just setting weights to 0 if needed */         \
995             if (y1 < 0)                                                                         \
996             {                                                                                   \
997                 weight1 = 0;                                                                    \
998                 y1 = 0;                                                                         \
999             }                                                                                   \
1000             if (y1 >= src_image->bits.height)                                                   \
1001             {                                                                                   \
1002                 weight1 = 0;                                                                    \
1003                 y1 = src_image->bits.height - 1;                                                \
1004             }                                                                                   \
1005             if (y2 < 0)                                                                         \
1006             {                                                                                   \
1007                 weight2 = 0;                                                                    \
1008                 y2 = 0;                                                                         \
1009             }                                                                                   \
1010             if (y2 >= src_image->bits.height)                                                   \
1011             {                                                                                   \
1012                 weight2 = 0;                                                                    \
1013                 y2 = src_image->bits.height - 1;                                                \
1014             }                                                                                   \
1015             src1 = src_first_line + src_stride * y1;                                            \
1016             src2 = src_first_line + src_stride * y2;                                            \
1017                                                                                                 \
1018             if (left_pad > 0)                                                                   \
1019             {                                                                                   \
1020                 buf1[0] = buf1[1] = 0;                                                          \
1021                 buf2[0] = buf2[1] = 0;                                                          \
1022                 scanline_func (dst, mask,                                                       \
1023                                buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);          \
1024                 dst += left_pad;                                                                \
1025                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1026                     mask += left_pad;                                                           \
1027             }                                                                                   \
1028             if (left_tz > 0)                                                                    \
1029             {                                                                                   \
1030                 buf1[0] = 0;                                                                    \
1031                 buf1[1] = src1[0];                                                              \
1032                 buf2[0] = 0;                                                                    \
1033                 buf2[1] = src2[0];                                                              \
1034                 scanline_func (dst, mask,                                                       \
1035                                buf1, buf2, left_tz, weight1, weight2,                           \
1036                                pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
1037                 dst += left_tz;                                                                 \
1038                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1039                     mask += left_tz;                                                            \
1040                 vx += left_tz * unit_x;                                                         \
1041             }                                                                                   \
1042             if (width > 0)                                                                      \
1043             {                                                                                   \
1044                 scanline_func (dst, mask,                                                       \
1045                                src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
1046                 dst += width;                                                                   \
1047                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1048                     mask += width;                                                              \
1049                 vx += width * unit_x;                                                           \
1050             }                                                                                   \
1051             if (right_tz > 0)                                                                   \
1052             {                                                                                   \
1053                 buf1[0] = src1[src_image->bits.width - 1];                                      \
1054                 buf1[1] = 0;                                                                    \
1055                 buf2[0] = src2[src_image->bits.width - 1];                                      \
1056                 buf2[1] = 0;                                                                    \
1057                 scanline_func (dst, mask,                                                       \
1058                                buf1, buf2, right_tz, weight1, weight2,                          \
1059                                pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
1060                 dst += right_tz;                                                                \
1061                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1062                     mask += right_tz;                                                           \
1063             }                                                                                   \
1064             if (right_pad > 0)                                                                  \
1065             {                                                                                   \
1066                 buf1[0] = buf1[1] = 0;                                                          \
1067                 buf2[0] = buf2[1] = 0;                                                          \
1068                 scanline_func (dst, mask,                                                       \
1069                                buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);         \
1070             }                                                                                   \
1071         }                                                                                       \
1072         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                         \
1073         {                                                                                       \
1074             int32_t         num_pixels;                                                         \
1075             int32_t         width_remain;                                                       \
1076             src_type_t *    src_line_top;                                                       \
1077             src_type_t *    src_line_bottom;                                                    \
1078             src_type_t      buf1[2];                                                            \
1079             src_type_t      buf2[2];                                                            \
1080             src_type_t      extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];                      \
1081             src_type_t      extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];                      \
1082             int             i, j;                                                               \
1083                                                                                                 \
1084             repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);                         \
1085             repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);                         \
1086             src_line_top = src_first_line + src_stride * y1;                                    \
1087             src_line_bottom = src_first_line + src_stride * y2;                                 \
1088                                                                                                 \
1089             if (need_src_extension)                                                             \
1090             {                                                                                   \
1091                 for (i=0; i<src_width;)                                                         \
1092                 {                                                                               \
1093                     for (j=0; j<src_image->bits.width; j++, i++)                                \
1094                     {                                                                           \
1095                         extended_src_line0[i] = src_line_top[j];                                \
1096                         extended_src_line1[i] = src_line_bottom[j];                             \
1097                     }                                                                           \
1098                 }                                                                               \
1099                                                                                                 \
1100                 src_line_top = &extended_src_line0[0];                                          \
1101                 src_line_bottom = &extended_src_line1[0];                                       \
1102             }                                                                                   \
1103                                                                                                 \
1104             /* Top & Bottom wrap around buffer */                                               \
1105             buf1[0] = src_line_top[src_width - 1];                                              \
1106             buf1[1] = src_line_top[0];                                                          \
1107             buf2[0] = src_line_bottom[src_width - 1];                                           \
1108             buf2[1] = src_line_bottom[0];                                                       \
1109                                                                                                 \
1110             width_remain = width;                                                               \
1111                                                                                                 \
1112             while (width_remain > 0)                                                            \
1113             {                                                                                   \
1114                 /* We use src_width_fixed because it can make vx in original source range */    \
1115                 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                            \
1116                                                                                                 \
1117                 /* Wrap around part */                                                          \
1118                 if (pixman_fixed_to_int (vx) == src_width - 1)                                  \
1119                 {                                                                               \
1120                     /* for positive unit_x                                                      \
1121                      * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed           \
1122                      *                                                                          \
1123                      * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
1124                      * So we are safe from overflow.                                            \
1125                      */                                                                         \
1126                     num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;        \
1127                                                                                                 \
1128                     if (num_pixels > width_remain)                                              \
1129                         num_pixels = width_remain;                                              \
1130                                                                                                 \
1131                     scanline_func (dst, mask, buf1, buf2, num_pixels,                           \
1132                                    weight1, weight2, pixman_fixed_frac(vx),                     \
1133                                    unit_x, src_width_fixed, FALSE);                             \
1134                                                                                                 \
1135                     width_remain -= num_pixels;                                                 \
1136                     vx += num_pixels * unit_x;                                                  \
1137                     dst += num_pixels;                                                          \
1138                                                                                                 \
1139                     if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
1140                         mask += num_pixels;                                                     \
1141                                                                                                 \
1142                     repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                        \
1143                 }                                                                               \
1144                                                                                                 \
1145                 /* Normal scanline composite */                                                 \
1146                 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)              \
1147                 {                                                                               \
1148                     /* for positive unit_x                                                      \
1149                      * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)     \
1150                      *                                                                          \
1151                      * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
1152                      * So we are safe from overflow here.                                       \
1153                      */                                                                         \
1154                     num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)      \
1155                                   / unit_x) + 1;                                                \
1156                                                                                                 \
1157                     if (num_pixels > width_remain)                                              \
1158                         num_pixels = width_remain;                                              \
1159                                                                                                 \
1160                     scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,        \
1161                                    weight1, weight2, vx, unit_x, src_width_fixed, FALSE);       \
1162                                                                                                 \
1163                     width_remain -= num_pixels;                                                 \
1164                     vx += num_pixels * unit_x;                                                  \
1165                     dst += num_pixels;                                                          \
1166                                                                                                 \
1167                     if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
1168                         mask += num_pixels;                                                     \
1169                 }                                                                               \
1170             }                                                                                   \
1171         }                                                                                       \
1172         else                                                                                    \
1173         {                                                                                       \
1174             scanline_func (dst, mask, src_first_line + src_stride * y1,                         \
1175                            src_first_line + src_stride * y2, width,                             \
1176                            weight1, weight2, vx, unit_x, max_vx, FALSE);                        \
1177         }                                                                                       \
1178     }                                                                                           \
1179 }
1180
1181 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1182 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,  \
1183                                   dst_type_t, repeat_mode, flags)                               \
1184         FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
1185                                   dst_type_t, repeat_mode, flags)
1186
1187 #define SCALED_BILINEAR_FLAGS                                           \
1188     (FAST_PATH_SCALE_TRANSFORM  |                                       \
1189      FAST_PATH_NO_ALPHA_MAP     |                                       \
1190      FAST_PATH_BILINEAR_FILTER  |                                       \
1191      FAST_PATH_NO_ACCESSORS     |                                       \
1192      FAST_PATH_NARROW_FORMAT)
1193
1194 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)                      \
1195     {   PIXMAN_OP_ ## op,                                               \
1196         PIXMAN_ ## s,                                                   \
1197         (SCALED_BILINEAR_FLAGS          |                               \
1198          FAST_PATH_PAD_REPEAT           |                               \
1199          FAST_PATH_X_UNIT_POSITIVE),                                    \
1200         PIXMAN_null, 0,                                                 \
1201         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1202         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1203     }
1204
1205 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)                     \
1206     {   PIXMAN_OP_ ## op,                                               \
1207         PIXMAN_ ## s,                                                   \
1208         (SCALED_BILINEAR_FLAGS          |                               \
1209          FAST_PATH_NONE_REPEAT          |                               \
1210          FAST_PATH_X_UNIT_POSITIVE),                                    \
1211         PIXMAN_null, 0,                                                 \
1212         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1213         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1214     }
1215
1216 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)                    \
1217     {   PIXMAN_OP_ ## op,                                               \
1218         PIXMAN_ ## s,                                                   \
1219         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
1220         PIXMAN_null, 0,                                                 \
1221         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1222         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1223     }
1224
1225 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)                   \
1226     {   PIXMAN_OP_ ## op,                                               \
1227         PIXMAN_ ## s,                                                   \
1228         (SCALED_BILINEAR_FLAGS          |                               \
1229          FAST_PATH_NORMAL_REPEAT        |                               \
1230          FAST_PATH_X_UNIT_POSITIVE),                                    \
1231         PIXMAN_null, 0,                                                 \
1232         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1233         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1234     }
1235
1236 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)              \
1237     {   PIXMAN_OP_ ## op,                                               \
1238         PIXMAN_ ## s,                                                   \
1239         (SCALED_BILINEAR_FLAGS          |                               \
1240          FAST_PATH_PAD_REPEAT           |                               \
1241          FAST_PATH_X_UNIT_POSITIVE),                                    \
1242         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1243         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1244         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1245     }
1246
1247 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)             \
1248     {   PIXMAN_OP_ ## op,                                               \
1249         PIXMAN_ ## s,                                                   \
1250         (SCALED_BILINEAR_FLAGS          |                               \
1251          FAST_PATH_NONE_REPEAT          |                               \
1252          FAST_PATH_X_UNIT_POSITIVE),                                    \
1253         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1254         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1255         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1256     }
1257
1258 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)            \
1259     {   PIXMAN_OP_ ## op,                                               \
1260         PIXMAN_ ## s,                                                   \
1261         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
1262         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1263         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1264         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1265     }
1266
1267 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)           \
1268     {   PIXMAN_OP_ ## op,                                               \
1269         PIXMAN_ ## s,                                                   \
1270         (SCALED_BILINEAR_FLAGS          |                               \
1271          FAST_PATH_NORMAL_REPEAT        |                               \
1272          FAST_PATH_X_UNIT_POSITIVE),                                    \
1273         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1274         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1275         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1276     }
1277
1278 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)           \
1279     {   PIXMAN_OP_ ## op,                                               \
1280         PIXMAN_ ## s,                                                   \
1281         (SCALED_BILINEAR_FLAGS          |                               \
1282          FAST_PATH_PAD_REPEAT           |                               \
1283          FAST_PATH_X_UNIT_POSITIVE),                                    \
1284         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1285         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1286         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1287     }
1288
1289 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)          \
1290     {   PIXMAN_OP_ ## op,                                               \
1291         PIXMAN_ ## s,                                                   \
1292         (SCALED_BILINEAR_FLAGS          |                               \
1293          FAST_PATH_NONE_REPEAT          |                               \
1294          FAST_PATH_X_UNIT_POSITIVE),                                    \
1295         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1296         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1297         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1298     }
1299
1300 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)         \
1301     {   PIXMAN_OP_ ## op,                                               \
1302         PIXMAN_ ## s,                                                   \
1303         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
1304         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1305         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1306         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1307     }
1308
1309 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)        \
1310     {   PIXMAN_OP_ ## op,                                               \
1311         PIXMAN_ ## s,                                                   \
1312         (SCALED_BILINEAR_FLAGS          |                               \
1313          FAST_PATH_NORMAL_REPEAT        |                               \
1314          FAST_PATH_X_UNIT_POSITIVE),                                    \
1315         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1316         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1317         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1318     }
1319
1320 /* Prefer the use of 'cover' variant, because it is faster */
1321 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)                          \
1322     SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),                      \
1323     SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),                       \
1324     SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),                        \
1325     SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1326
1327 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)                  \
1328     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),              \
1329     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),               \
1330     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),                \
1331     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1332
1333 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)               \
1334     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),           \
1335     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),            \
1336     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),             \
1337     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1338
1339 #endif