Add qemu 2.4.0
[kvmfornfv.git] / qemu / pixman / pixman / pixman-fast-path.c
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29 #include <string.h>
30 #include <stdlib.h>
31 #include "pixman-private.h"
32 #include "pixman-combine32.h"
33 #include "pixman-inlines.h"
34
35 static force_inline uint32_t
36 fetch_24 (uint8_t *a)
37 {
38     if (((uintptr_t)a) & 1)
39     {
40 #ifdef WORDS_BIGENDIAN
41         return (*a << 16) | (*(uint16_t *)(a + 1));
42 #else
43         return *a | (*(uint16_t *)(a + 1) << 8);
44 #endif
45     }
46     else
47     {
48 #ifdef WORDS_BIGENDIAN
49         return (*(uint16_t *)a << 8) | *(a + 2);
50 #else
51         return *(uint16_t *)a | (*(a + 2) << 16);
52 #endif
53     }
54 }
55
56 static force_inline void
57 store_24 (uint8_t *a,
58           uint32_t v)
59 {
60     if (((uintptr_t)a) & 1)
61     {
62 #ifdef WORDS_BIGENDIAN
63         *a = (uint8_t) (v >> 16);
64         *(uint16_t *)(a + 1) = (uint16_t) (v);
65 #else
66         *a = (uint8_t) (v);
67         *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
68 #endif
69     }
70     else
71     {
72 #ifdef WORDS_BIGENDIAN
73         *(uint16_t *)a = (uint16_t)(v >> 8);
74         *(a + 2) = (uint8_t)v;
75 #else
76         *(uint16_t *)a = (uint16_t)v;
77         *(a + 2) = (uint8_t)(v >> 16);
78 #endif
79     }
80 }
81
82 static force_inline uint32_t
83 over (uint32_t src,
84       uint32_t dest)
85 {
86     uint32_t a = ~src >> 24;
87
88     UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
89
90     return dest;
91 }
92
93 static force_inline uint32_t
94 in (uint32_t x,
95     uint8_t  y)
96 {
97     uint16_t a = y;
98
99     UN8x4_MUL_UN8 (x, a);
100
101     return x;
102 }
103
104 /*
105  * Naming convention:
106  *
107  *  op_src_mask_dest
108  */
109 static void
110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
111                                  pixman_composite_info_t *info)
112 {
113     PIXMAN_COMPOSITE_ARGS (info);
114     uint32_t    *src, *src_line;
115     uint32_t    *dst, *dst_line;
116     uint8_t     *mask, *mask_line;
117     int src_stride, mask_stride, dst_stride;
118     uint8_t m;
119     uint32_t s, d;
120     int32_t w;
121
122     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
123     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
124     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
125
126     while (height--)
127     {
128         src = src_line;
129         src_line += src_stride;
130         dst = dst_line;
131         dst_line += dst_stride;
132         mask = mask_line;
133         mask_line += mask_stride;
134
135         w = width;
136         while (w--)
137         {
138             m = *mask++;
139             if (m)
140             {
141                 s = *src | 0xff000000;
142
143                 if (m == 0xff)
144                 {
145                     *dst = s;
146                 }
147                 else
148                 {
149                     d = in (s, m);
150                     *dst = over (d, *dst);
151                 }
152             }
153             src++;
154             dst++;
155         }
156     }
157 }
158
159 static void
160 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
161                          pixman_composite_info_t *info)
162 {
163     PIXMAN_COMPOSITE_ARGS (info);
164     uint32_t src, srca;
165     uint8_t     *dst_line, *dst;
166     uint8_t     *mask_line, *mask, m;
167     int dst_stride, mask_stride;
168     int32_t w;
169     uint16_t t;
170
171     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
172
173     srca = src >> 24;
174
175     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
176     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
177
178     if (srca == 0xff)
179     {
180         while (height--)
181         {
182             dst = dst_line;
183             dst_line += dst_stride;
184             mask = mask_line;
185             mask_line += mask_stride;
186             w = width;
187
188             while (w--)
189             {
190                 m = *mask++;
191
192                 if (m == 0)
193                     *dst = 0;
194                 else if (m != 0xff)
195                     *dst = MUL_UN8 (m, *dst, t);
196
197                 dst++;
198             }
199         }
200     }
201     else
202     {
203         while (height--)
204         {
205             dst = dst_line;
206             dst_line += dst_stride;
207             mask = mask_line;
208             mask_line += mask_stride;
209             w = width;
210
211             while (w--)
212             {
213                 m = *mask++;
214                 m = MUL_UN8 (m, srca, t);
215
216                 if (m == 0)
217                     *dst = 0;
218                 else if (m != 0xff)
219                     *dst = MUL_UN8 (m, *dst, t);
220
221                 dst++;
222             }
223         }
224     }
225 }
226
227 static void
228 fast_composite_in_8_8 (pixman_implementation_t *imp,
229                        pixman_composite_info_t *info)
230 {
231     PIXMAN_COMPOSITE_ARGS (info);
232     uint8_t     *dst_line, *dst;
233     uint8_t     *src_line, *src;
234     int dst_stride, src_stride;
235     int32_t w;
236     uint8_t s;
237     uint16_t t;
238
239     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
240     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
241
242     while (height--)
243     {
244         dst = dst_line;
245         dst_line += dst_stride;
246         src = src_line;
247         src_line += src_stride;
248         w = width;
249
250         while (w--)
251         {
252             s = *src++;
253
254             if (s == 0)
255                 *dst = 0;
256             else if (s != 0xff)
257                 *dst = MUL_UN8 (s, *dst, t);
258
259             dst++;
260         }
261     }
262 }
263
264 static void
265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
266                               pixman_composite_info_t *info)
267 {
268     PIXMAN_COMPOSITE_ARGS (info);
269     uint32_t src, srca;
270     uint32_t    *dst_line, *dst, d;
271     uint8_t     *mask_line, *mask, m;
272     int dst_stride, mask_stride;
273     int32_t w;
274
275     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
276
277     srca = src >> 24;
278     if (src == 0)
279         return;
280
281     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
282     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
283
284     while (height--)
285     {
286         dst = dst_line;
287         dst_line += dst_stride;
288         mask = mask_line;
289         mask_line += mask_stride;
290         w = width;
291
292         while (w--)
293         {
294             m = *mask++;
295             if (m == 0xff)
296             {
297                 if (srca == 0xff)
298                     *dst = src;
299                 else
300                     *dst = over (src, *dst);
301             }
302             else if (m)
303             {
304                 d = in (src, m);
305                 *dst = over (d, *dst);
306             }
307             dst++;
308         }
309     }
310 }
311
312 static void
313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
314                                    pixman_composite_info_t *info)
315 {
316     PIXMAN_COMPOSITE_ARGS (info);
317     uint32_t src, s;
318     uint32_t    *dst_line, *dst, d;
319     uint32_t    *mask_line, *mask, ma;
320     int dst_stride, mask_stride;
321     int32_t w;
322
323     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
324
325     if (src == 0)
326         return;
327
328     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
329     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
330
331     while (height--)
332     {
333         dst = dst_line;
334         dst_line += dst_stride;
335         mask = mask_line;
336         mask_line += mask_stride;
337         w = width;
338
339         while (w--)
340         {
341             ma = *mask++;
342
343             if (ma)
344             {
345                 d = *dst;
346                 s = src;
347
348                 UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
349
350                 *dst = s;
351             }
352
353             dst++;
354         }
355     }
356 }
357
358 static void
359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
360                                     pixman_composite_info_t *info)
361 {
362     PIXMAN_COMPOSITE_ARGS (info);
363     uint32_t src, srca, s;
364     uint32_t    *dst_line, *dst, d;
365     uint32_t    *mask_line, *mask, ma;
366     int dst_stride, mask_stride;
367     int32_t w;
368
369     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
370
371     srca = src >> 24;
372     if (src == 0)
373         return;
374
375     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
376     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
377
378     while (height--)
379     {
380         dst = dst_line;
381         dst_line += dst_stride;
382         mask = mask_line;
383         mask_line += mask_stride;
384         w = width;
385
386         while (w--)
387         {
388             ma = *mask++;
389             if (ma == 0xffffffff)
390             {
391                 if (srca == 0xff)
392                     *dst = src;
393                 else
394                     *dst = over (src, *dst);
395             }
396             else if (ma)
397             {
398                 d = *dst;
399                 s = src;
400
401                 UN8x4_MUL_UN8x4 (s, ma);
402                 UN8x4_MUL_UN8 (ma, srca);
403                 ma = ~ma;
404                 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
405
406                 *dst = d;
407             }
408
409             dst++;
410         }
411     }
412 }
413
414 static void
415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
416                               pixman_composite_info_t *info)
417 {
418     PIXMAN_COMPOSITE_ARGS (info);
419     uint32_t src, srca;
420     uint8_t     *dst_line, *dst;
421     uint32_t d;
422     uint8_t     *mask_line, *mask, m;
423     int dst_stride, mask_stride;
424     int32_t w;
425
426     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
427
428     srca = src >> 24;
429     if (src == 0)
430         return;
431
432     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
433     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
434
435     while (height--)
436     {
437         dst = dst_line;
438         dst_line += dst_stride;
439         mask = mask_line;
440         mask_line += mask_stride;
441         w = width;
442
443         while (w--)
444         {
445             m = *mask++;
446             if (m == 0xff)
447             {
448                 if (srca == 0xff)
449                 {
450                     d = src;
451                 }
452                 else
453                 {
454                     d = fetch_24 (dst);
455                     d = over (src, d);
456                 }
457                 store_24 (dst, d);
458             }
459             else if (m)
460             {
461                 d = over (in (src, m), fetch_24 (dst));
462                 store_24 (dst, d);
463             }
464             dst += 3;
465         }
466     }
467 }
468
469 static void
470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
471                               pixman_composite_info_t *info)
472 {
473     PIXMAN_COMPOSITE_ARGS (info);
474     uint32_t src, srca;
475     uint16_t    *dst_line, *dst;
476     uint32_t d;
477     uint8_t     *mask_line, *mask, m;
478     int dst_stride, mask_stride;
479     int32_t w;
480
481     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
482
483     srca = src >> 24;
484     if (src == 0)
485         return;
486
487     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
488     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
489
490     while (height--)
491     {
492         dst = dst_line;
493         dst_line += dst_stride;
494         mask = mask_line;
495         mask_line += mask_stride;
496         w = width;
497
498         while (w--)
499         {
500             m = *mask++;
501             if (m == 0xff)
502             {
503                 if (srca == 0xff)
504                 {
505                     d = src;
506                 }
507                 else
508                 {
509                     d = *dst;
510                     d = over (src, convert_0565_to_0888 (d));
511                 }
512                 *dst = convert_8888_to_0565 (d);
513             }
514             else if (m)
515             {
516                 d = *dst;
517                 d = over (in (src, m), convert_0565_to_0888 (d));
518                 *dst = convert_8888_to_0565 (d);
519             }
520             dst++;
521         }
522     }
523 }
524
525 static void
526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
527                                     pixman_composite_info_t *info)
528 {
529     PIXMAN_COMPOSITE_ARGS (info);
530     uint32_t  src, srca, s;
531     uint16_t  src16;
532     uint16_t *dst_line, *dst;
533     uint32_t  d;
534     uint32_t *mask_line, *mask, ma;
535     int dst_stride, mask_stride;
536     int32_t w;
537
538     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
539
540     srca = src >> 24;
541     if (src == 0)
542         return;
543
544     src16 = convert_8888_to_0565 (src);
545
546     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
547     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
548
549     while (height--)
550     {
551         dst = dst_line;
552         dst_line += dst_stride;
553         mask = mask_line;
554         mask_line += mask_stride;
555         w = width;
556
557         while (w--)
558         {
559             ma = *mask++;
560             if (ma == 0xffffffff)
561             {
562                 if (srca == 0xff)
563                 {
564                     *dst = src16;
565                 }
566                 else
567                 {
568                     d = *dst;
569                     d = over (src, convert_0565_to_0888 (d));
570                     *dst = convert_8888_to_0565 (d);
571                 }
572             }
573             else if (ma)
574             {
575                 d = *dst;
576                 d = convert_0565_to_0888 (d);
577
578                 s = src;
579
580                 UN8x4_MUL_UN8x4 (s, ma);
581                 UN8x4_MUL_UN8 (ma, srca);
582                 ma = ~ma;
583                 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
584
585                 *dst = convert_8888_to_0565 (d);
586             }
587             dst++;
588         }
589     }
590 }
591
592 static void
593 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
594                                pixman_composite_info_t *info)
595 {
596     PIXMAN_COMPOSITE_ARGS (info);
597     uint32_t    *dst_line, *dst;
598     uint32_t    *src_line, *src, s;
599     int dst_stride, src_stride;
600     uint8_t a;
601     int32_t w;
602
603     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
604     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
605
606     while (height--)
607     {
608         dst = dst_line;
609         dst_line += dst_stride;
610         src = src_line;
611         src_line += src_stride;
612         w = width;
613
614         while (w--)
615         {
616             s = *src++;
617             a = s >> 24;
618             if (a == 0xff)
619                 *dst = s;
620             else if (s)
621                 *dst = over (s, *dst);
622             dst++;
623         }
624     }
625 }
626
627 static void
628 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
629                               pixman_composite_info_t *info)
630 {
631     PIXMAN_COMPOSITE_ARGS (info);
632     uint32_t    *dst_line, *dst;
633     uint32_t    *src_line, *src;
634     int dst_stride, src_stride;
635     int32_t w;
636
637     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
638     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
639
640     while (height--)
641     {
642         dst = dst_line;
643         dst_line += dst_stride;
644         src = src_line;
645         src_line += src_stride;
646         w = width;
647
648         while (w--)
649             *dst++ = (*src++) | 0xff000000;
650     }
651 }
652
653 #if 0
654 static void
655 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
656                                pixman_composite_info_t *info)
657 {
658     PIXMAN_COMPOSITE_ARGS (info);
659     uint8_t     *dst_line, *dst;
660     uint32_t d;
661     uint32_t    *src_line, *src, s;
662     uint8_t a;
663     int dst_stride, src_stride;
664     int32_t w;
665
666     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
667     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
668
669     while (height--)
670     {
671         dst = dst_line;
672         dst_line += dst_stride;
673         src = src_line;
674         src_line += src_stride;
675         w = width;
676
677         while (w--)
678         {
679             s = *src++;
680             a = s >> 24;
681             if (a)
682             {
683                 if (a == 0xff)
684                     d = s;
685                 else
686                     d = over (s, fetch_24 (dst));
687
688                 store_24 (dst, d);
689             }
690             dst += 3;
691         }
692     }
693 }
694 #endif
695
696 static void
697 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
698                                pixman_composite_info_t *info)
699 {
700     PIXMAN_COMPOSITE_ARGS (info);
701     uint16_t    *dst_line, *dst;
702     uint32_t d;
703     uint32_t    *src_line, *src, s;
704     uint8_t a;
705     int dst_stride, src_stride;
706     int32_t w;
707
708     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
709     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
710
711     while (height--)
712     {
713         dst = dst_line;
714         dst_line += dst_stride;
715         src = src_line;
716         src_line += src_stride;
717         w = width;
718
719         while (w--)
720         {
721             s = *src++;
722             a = s >> 24;
723             if (s)
724             {
725                 if (a == 0xff)
726                 {
727                     d = s;
728                 }
729                 else
730                 {
731                     d = *dst;
732                     d = over (s, convert_0565_to_0888 (d));
733                 }
734                 *dst = convert_8888_to_0565 (d);
735             }
736             dst++;
737         }
738     }
739 }
740
741 static void
742 fast_composite_add_8_8 (pixman_implementation_t *imp,
743                         pixman_composite_info_t *info)
744 {
745     PIXMAN_COMPOSITE_ARGS (info);
746     uint8_t     *dst_line, *dst;
747     uint8_t     *src_line, *src;
748     int dst_stride, src_stride;
749     int32_t w;
750     uint8_t s, d;
751     uint16_t t;
752
753     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
754     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
755
756     while (height--)
757     {
758         dst = dst_line;
759         dst_line += dst_stride;
760         src = src_line;
761         src_line += src_stride;
762         w = width;
763
764         while (w--)
765         {
766             s = *src++;
767             if (s)
768             {
769                 if (s != 0xff)
770                 {
771                     d = *dst;
772                     t = d + s;
773                     s = t | (0 - (t >> 8));
774                 }
775                 *dst = s;
776             }
777             dst++;
778         }
779     }
780 }
781
782 static void
783 fast_composite_add_0565_0565 (pixman_implementation_t *imp,
784                               pixman_composite_info_t *info)
785 {
786     PIXMAN_COMPOSITE_ARGS (info);
787     uint16_t    *dst_line, *dst;
788     uint32_t    d;
789     uint16_t    *src_line, *src;
790     uint32_t    s;
791     int dst_stride, src_stride;
792     int32_t w;
793
794     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
795     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
796
797     while (height--)
798     {
799         dst = dst_line;
800         dst_line += dst_stride;
801         src = src_line;
802         src_line += src_stride;
803         w = width;
804
805         while (w--)
806         {
807             s = *src++;
808             if (s)
809             {
810                 d = *dst;
811                 s = convert_0565_to_8888 (s);
812                 if (d)
813                 {
814                     d = convert_0565_to_8888 (d);
815                     UN8x4_ADD_UN8x4 (s, d);
816                 }
817                 *dst = convert_8888_to_0565 (s);
818             }
819             dst++;
820         }
821     }
822 }
823
824 static void
825 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
826                               pixman_composite_info_t *info)
827 {
828     PIXMAN_COMPOSITE_ARGS (info);
829     uint32_t    *dst_line, *dst;
830     uint32_t    *src_line, *src;
831     int dst_stride, src_stride;
832     int32_t w;
833     uint32_t s, d;
834
835     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
836     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
837
838     while (height--)
839     {
840         dst = dst_line;
841         dst_line += dst_stride;
842         src = src_line;
843         src_line += src_stride;
844         w = width;
845
846         while (w--)
847         {
848             s = *src++;
849             if (s)
850             {
851                 if (s != 0xffffffff)
852                 {
853                     d = *dst;
854                     if (d)
855                         UN8x4_ADD_UN8x4 (s, d);
856                 }
857                 *dst = s;
858             }
859             dst++;
860         }
861     }
862 }
863
864 static void
865 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
866                           pixman_composite_info_t *info)
867 {
868     PIXMAN_COMPOSITE_ARGS (info);
869     uint8_t     *dst_line, *dst;
870     uint8_t     *mask_line, *mask;
871     int dst_stride, mask_stride;
872     int32_t w;
873     uint32_t src;
874     uint8_t sa;
875
876     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
877     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
878     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
879     sa = (src >> 24);
880
881     while (height--)
882     {
883         dst = dst_line;
884         dst_line += dst_stride;
885         mask = mask_line;
886         mask_line += mask_stride;
887         w = width;
888
889         while (w--)
890         {
891             uint16_t tmp;
892             uint16_t a;
893             uint32_t m, d;
894             uint32_t r;
895
896             a = *mask++;
897             d = *dst;
898
899             m = MUL_UN8 (sa, a, tmp);
900             r = ADD_UN8 (m, d, tmp);
901
902             *dst++ = r;
903         }
904     }
905 }
906
907 #ifdef WORDS_BIGENDIAN
908 #define CREATE_BITMASK(n) (0x80000000 >> (n))
909 #define UPDATE_BITMASK(n) ((n) >> 1)
910 #else
911 #define CREATE_BITMASK(n) (1 << (n))
912 #define UPDATE_BITMASK(n) ((n) << 1)
913 #endif
914
915 #define TEST_BIT(p, n)                                  \
916     (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
917 #define SET_BIT(p, n)                                                   \
918     do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
919
920 static void
921 fast_composite_add_1_1 (pixman_implementation_t *imp,
922                         pixman_composite_info_t *info)
923 {
924     PIXMAN_COMPOSITE_ARGS (info);
925     uint32_t     *dst_line, *dst;
926     uint32_t     *src_line, *src;
927     int           dst_stride, src_stride;
928     int32_t       w;
929
930     PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
931                            src_stride, src_line, 1);
932     PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
933                            dst_stride, dst_line, 1);
934
935     while (height--)
936     {
937         dst = dst_line;
938         dst_line += dst_stride;
939         src = src_line;
940         src_line += src_stride;
941         w = width;
942
943         while (w--)
944         {
945             /*
946              * TODO: improve performance by processing uint32_t data instead
947              *       of individual bits
948              */
949             if (TEST_BIT (src, src_x + w))
950                 SET_BIT (dst, dest_x + w);
951         }
952     }
953 }
954
955 static void
956 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
957                               pixman_composite_info_t *info)
958 {
959     PIXMAN_COMPOSITE_ARGS (info);
960     uint32_t     src, srca;
961     uint32_t    *dst, *dst_line;
962     uint32_t    *mask, *mask_line;
963     int          mask_stride, dst_stride;
964     uint32_t     bitcache, bitmask;
965     int32_t      w;
966
967     if (width <= 0)
968         return;
969
970     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
971     srca = src >> 24;
972     if (src == 0)
973         return;
974
975     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
976                            dst_stride, dst_line, 1);
977     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
978                            mask_stride, mask_line, 1);
979     mask_line += mask_x >> 5;
980
981     if (srca == 0xff)
982     {
983         while (height--)
984         {
985             dst = dst_line;
986             dst_line += dst_stride;
987             mask = mask_line;
988             mask_line += mask_stride;
989             w = width;
990
991             bitcache = *mask++;
992             bitmask = CREATE_BITMASK (mask_x & 31);
993
994             while (w--)
995             {
996                 if (bitmask == 0)
997                 {
998                     bitcache = *mask++;
999                     bitmask = CREATE_BITMASK (0);
1000                 }
1001                 if (bitcache & bitmask)
1002                     *dst = src;
1003                 bitmask = UPDATE_BITMASK (bitmask);
1004                 dst++;
1005             }
1006         }
1007     }
1008     else
1009     {
1010         while (height--)
1011         {
1012             dst = dst_line;
1013             dst_line += dst_stride;
1014             mask = mask_line;
1015             mask_line += mask_stride;
1016             w = width;
1017
1018             bitcache = *mask++;
1019             bitmask = CREATE_BITMASK (mask_x & 31);
1020
1021             while (w--)
1022             {
1023                 if (bitmask == 0)
1024                 {
1025                     bitcache = *mask++;
1026                     bitmask = CREATE_BITMASK (0);
1027                 }
1028                 if (bitcache & bitmask)
1029                     *dst = over (src, *dst);
1030                 bitmask = UPDATE_BITMASK (bitmask);
1031                 dst++;
1032             }
1033         }
1034     }
1035 }
1036
1037 static void
1038 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
1039                               pixman_composite_info_t *info)
1040 {
1041     PIXMAN_COMPOSITE_ARGS (info);
1042     uint32_t     src, srca;
1043     uint16_t    *dst, *dst_line;
1044     uint32_t    *mask, *mask_line;
1045     int          mask_stride, dst_stride;
1046     uint32_t     bitcache, bitmask;
1047     int32_t      w;
1048     uint32_t     d;
1049     uint16_t     src565;
1050
1051     if (width <= 0)
1052         return;
1053
1054     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1055     srca = src >> 24;
1056     if (src == 0)
1057         return;
1058
1059     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
1060                            dst_stride, dst_line, 1);
1061     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1062                            mask_stride, mask_line, 1);
1063     mask_line += mask_x >> 5;
1064
1065     if (srca == 0xff)
1066     {
1067         src565 = convert_8888_to_0565 (src);
1068         while (height--)
1069         {
1070             dst = dst_line;
1071             dst_line += dst_stride;
1072             mask = mask_line;
1073             mask_line += mask_stride;
1074             w = width;
1075
1076             bitcache = *mask++;
1077             bitmask = CREATE_BITMASK (mask_x & 31);
1078
1079             while (w--)
1080             {
1081                 if (bitmask == 0)
1082                 {
1083                     bitcache = *mask++;
1084                     bitmask = CREATE_BITMASK (0);
1085                 }
1086                 if (bitcache & bitmask)
1087                     *dst = src565;
1088                 bitmask = UPDATE_BITMASK (bitmask);
1089                 dst++;
1090             }
1091         }
1092     }
1093     else
1094     {
1095         while (height--)
1096         {
1097             dst = dst_line;
1098             dst_line += dst_stride;
1099             mask = mask_line;
1100             mask_line += mask_stride;
1101             w = width;
1102
1103             bitcache = *mask++;
1104             bitmask = CREATE_BITMASK (mask_x & 31);
1105
1106             while (w--)
1107             {
1108                 if (bitmask == 0)
1109                 {
1110                     bitcache = *mask++;
1111                     bitmask = CREATE_BITMASK (0);
1112                 }
1113                 if (bitcache & bitmask)
1114                 {
1115                     d = over (src, convert_0565_to_0888 (*dst));
1116                     *dst = convert_8888_to_0565 (d);
1117                 }
1118                 bitmask = UPDATE_BITMASK (bitmask);
1119                 dst++;
1120             }
1121         }
1122     }
1123 }
1124
1125 /*
1126  * Simple bitblt
1127  */
1128
1129 static void
1130 fast_composite_solid_fill (pixman_implementation_t *imp,
1131                            pixman_composite_info_t *info)
1132 {
1133     PIXMAN_COMPOSITE_ARGS (info);
1134     uint32_t src;
1135
1136     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1137
1138     if (dest_image->bits.format == PIXMAN_a1)
1139     {
1140         src = src >> 31;
1141     }
1142     else if (dest_image->bits.format == PIXMAN_a8)
1143     {
1144         src = src >> 24;
1145     }
1146     else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
1147              dest_image->bits.format == PIXMAN_b5g6r5)
1148     {
1149         src = convert_8888_to_0565 (src);
1150     }
1151
1152     pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
1153                  PIXMAN_FORMAT_BPP (dest_image->bits.format),
1154                  dest_x, dest_y,
1155                  width, height,
1156                  src);
1157 }
1158
1159 static void
1160 fast_composite_src_memcpy (pixman_implementation_t *imp,
1161                            pixman_composite_info_t *info)
1162 {
1163     PIXMAN_COMPOSITE_ARGS (info);
1164     int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
1165     uint32_t n_bytes = width * bpp;
1166     int dst_stride, src_stride;
1167     uint8_t    *dst;
1168     uint8_t    *src;
1169
1170     src_stride = src_image->bits.rowstride * 4;
1171     dst_stride = dest_image->bits.rowstride * 4;
1172
1173     src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
1174     dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
1175
1176     while (height--)
1177     {
1178         memcpy (dst, src, n_bytes);
1179
1180         dst += dst_stride;
1181         src += src_stride;
1182     }
1183 }
1184
1185 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
1186 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
1187 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
1188 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1189 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
1190 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
1191 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1192 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
1193 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
1194 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
1195 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
1196 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
1197 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
1198 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
1199 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
1200 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
1201 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
1202 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
1203 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
1204 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
1205
1206 #define REPEAT_MIN_WIDTH    32
1207
1208 static void
1209 fast_composite_tiled_repeat (pixman_implementation_t *imp,
1210                              pixman_composite_info_t *info)
1211 {
1212     PIXMAN_COMPOSITE_ARGS (info);
1213     pixman_composite_func_t func;
1214     pixman_format_code_t mask_format;
1215     uint32_t src_flags, mask_flags;
1216     int32_t sx, sy;
1217     int32_t width_remain;
1218     int32_t num_pixels;
1219     int32_t src_width;
1220     int32_t i, j;
1221     pixman_image_t extended_src_image;
1222     uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
1223     pixman_bool_t need_src_extension;
1224     uint32_t *src_line;
1225     int32_t src_stride;
1226     int32_t src_bpp;
1227     pixman_composite_info_t info2 = *info;
1228
1229     src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
1230                     FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
1231
1232     if (mask_image)
1233     {
1234         mask_format = mask_image->common.extended_format_code;
1235         mask_flags = info->mask_flags;
1236     }
1237     else
1238     {
1239         mask_format = PIXMAN_null;
1240         mask_flags = FAST_PATH_IS_OPAQUE;
1241     }
1242
1243     _pixman_implementation_lookup_composite (
1244         imp->toplevel, info->op,
1245         src_image->common.extended_format_code, src_flags,
1246         mask_format, mask_flags,
1247         dest_image->common.extended_format_code, info->dest_flags,
1248         &imp, &func);
1249
1250     src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
1251
1252     if (src_image->bits.width < REPEAT_MIN_WIDTH                &&
1253         (src_bpp == 32 || src_bpp == 16 || src_bpp == 8)        &&
1254         !src_image->bits.indexed)
1255     {
1256         sx = src_x;
1257         sx = MOD (sx, src_image->bits.width);
1258         sx += width;
1259         src_width = 0;
1260
1261         while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
1262             src_width += src_image->bits.width;
1263
1264         src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
1265
1266         /* Initialize/validate stack-allocated temporary image */
1267         _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
1268                                  src_width, 1, &extended_src[0], src_stride,
1269                                  FALSE);
1270         _pixman_image_validate (&extended_src_image);
1271
1272         info2.src_image = &extended_src_image;
1273         need_src_extension = TRUE;
1274     }
1275     else
1276     {
1277         src_width = src_image->bits.width;
1278         need_src_extension = FALSE;
1279     }
1280
1281     sx = src_x;
1282     sy = src_y;
1283
1284     while (--height >= 0)
1285     {
1286         sx = MOD (sx, src_width);
1287         sy = MOD (sy, src_image->bits.height);
1288
1289         if (need_src_extension)
1290         {
1291             if (src_bpp == 32)
1292             {
1293                 PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
1294
1295                 for (i = 0; i < src_width; )
1296                 {
1297                     for (j = 0; j < src_image->bits.width; j++, i++)
1298                         extended_src[i] = src_line[j];
1299                 }
1300             }
1301             else if (src_bpp == 16)
1302             {
1303                 uint16_t *src_line_16;
1304
1305                 PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
1306                                        src_line_16, 1);
1307                 src_line = (uint32_t*)src_line_16;
1308
1309                 for (i = 0; i < src_width; )
1310                 {
1311                     for (j = 0; j < src_image->bits.width; j++, i++)
1312                         ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
1313                 }
1314             }
1315             else if (src_bpp == 8)
1316             {
1317                 uint8_t *src_line_8;
1318
1319                 PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
1320                                        src_line_8, 1);
1321                 src_line = (uint32_t*)src_line_8;
1322
1323                 for (i = 0; i < src_width; )
1324                 {
1325                     for (j = 0; j < src_image->bits.width; j++, i++)
1326                         ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
1327                 }
1328             }
1329
1330             info2.src_y = 0;
1331         }
1332         else
1333         {
1334             info2.src_y = sy;
1335         }
1336
1337         width_remain = width;
1338
1339         while (width_remain > 0)
1340         {
1341             num_pixels = src_width - sx;
1342
1343             if (num_pixels > width_remain)
1344                 num_pixels = width_remain;
1345
1346             info2.src_x = sx;
1347             info2.width = num_pixels;
1348             info2.height = 1;
1349
1350             func (imp, &info2);
1351
1352             width_remain -= num_pixels;
1353             info2.mask_x += num_pixels;
1354             info2.dest_x += num_pixels;
1355             sx = 0;
1356         }
1357
1358         sx = src_x;
1359         sy++;
1360         info2.mask_x = info->mask_x;
1361         info2.mask_y++;
1362         info2.dest_x = info->dest_x;
1363         info2.dest_y++;
1364     }
1365
1366     if (need_src_extension)
1367         _pixman_image_fini (&extended_src_image);
1368 }
1369
1370 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
1371 static force_inline void
1372 scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
1373                                      const uint16_t * src,
1374                                      int32_t          w,
1375                                      pixman_fixed_t   vx,
1376                                      pixman_fixed_t   unit_x,
1377                                      pixman_fixed_t   max_vx,
1378                                      pixman_bool_t    fully_transparent_src)
1379 {
1380     uint16_t tmp1, tmp2, tmp3, tmp4;
1381     while ((w -= 4) >= 0)
1382     {
1383         tmp1 = *(src + pixman_fixed_to_int (vx));
1384         vx += unit_x;
1385         tmp2 = *(src + pixman_fixed_to_int (vx));
1386         vx += unit_x;
1387         tmp3 = *(src + pixman_fixed_to_int (vx));
1388         vx += unit_x;
1389         tmp4 = *(src + pixman_fixed_to_int (vx));
1390         vx += unit_x;
1391         *dst++ = tmp1;
1392         *dst++ = tmp2;
1393         *dst++ = tmp3;
1394         *dst++ = tmp4;
1395     }
1396     if (w & 2)
1397     {
1398         tmp1 = *(src + pixman_fixed_to_int (vx));
1399         vx += unit_x;
1400         tmp2 = *(src + pixman_fixed_to_int (vx));
1401         vx += unit_x;
1402         *dst++ = tmp1;
1403         *dst++ = tmp2;
1404     }
1405     if (w & 1)
1406         *dst = *(src + pixman_fixed_to_int (vx));
1407 }
1408
1409 FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
1410                        scaled_nearest_scanline_565_565_SRC,
1411                        uint16_t, uint16_t, COVER)
1412 FAST_NEAREST_MAINLOOP (565_565_none_SRC,
1413                        scaled_nearest_scanline_565_565_SRC,
1414                        uint16_t, uint16_t, NONE)
1415 FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
1416                        scaled_nearest_scanline_565_565_SRC,
1417                        uint16_t, uint16_t, PAD)
1418
1419 static force_inline uint32_t
1420 fetch_nearest (pixman_repeat_t src_repeat,
1421                pixman_format_code_t format,
1422                uint32_t *src, int x, int src_width)
1423 {
1424     if (repeat (src_repeat, &x, src_width))
1425     {
1426         if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
1427             return *(src + x) | 0xff000000;
1428         else
1429             return *(src + x);
1430     }
1431     else
1432     {
1433         return 0;
1434     }
1435 }
1436
1437 static force_inline void
1438 combine_over (uint32_t s, uint32_t *dst)
1439 {
1440     if (s)
1441     {
1442         uint8_t ia = 0xff - (s >> 24);
1443
1444         if (ia)
1445             UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
1446         else
1447             *dst = s;
1448     }
1449 }
1450
1451 static force_inline void
1452 combine_src (uint32_t s, uint32_t *dst)
1453 {
1454     *dst = s;
1455 }
1456
1457 static void
1458 fast_composite_scaled_nearest (pixman_implementation_t *imp,
1459                                pixman_composite_info_t *info)
1460 {
1461     PIXMAN_COMPOSITE_ARGS (info);
1462     uint32_t       *dst_line;
1463     uint32_t       *src_line;
1464     int             dst_stride, src_stride;
1465     int             src_width, src_height;
1466     pixman_repeat_t src_repeat;
1467     pixman_fixed_t unit_x, unit_y;
1468     pixman_format_code_t src_format;
1469     pixman_vector_t v;
1470     pixman_fixed_t vy;
1471
1472     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1473     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
1474      * transformed from destination space to source space
1475      */
1476     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
1477
1478     /* reference point is the center of the pixel */
1479     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
1480     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
1481     v.vector[2] = pixman_fixed_1;
1482
1483     if (!pixman_transform_point_3d (src_image->common.transform, &v))
1484         return;
1485
1486     unit_x = src_image->common.transform->matrix[0][0];
1487     unit_y = src_image->common.transform->matrix[1][1];
1488
1489     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
1490     v.vector[0] -= pixman_fixed_e;
1491     v.vector[1] -= pixman_fixed_e;
1492
1493     src_height = src_image->bits.height;
1494     src_width = src_image->bits.width;
1495     src_repeat = src_image->common.repeat;
1496     src_format = src_image->bits.format;
1497
1498     vy = v.vector[1];
1499     while (height--)
1500     {
1501         pixman_fixed_t vx = v.vector[0];
1502         int y = pixman_fixed_to_int (vy);
1503         uint32_t *dst = dst_line;
1504
1505         dst_line += dst_stride;
1506
1507         /* adjust the y location by a unit vector in the y direction
1508          * this is equivalent to transforming y+1 of the destination point to source space */
1509         vy += unit_y;
1510
1511         if (!repeat (src_repeat, &y, src_height))
1512         {
1513             if (op == PIXMAN_OP_SRC)
1514                 memset (dst, 0, sizeof (*dst) * width);
1515         }
1516         else
1517         {
1518             int w = width;
1519
1520             uint32_t *src = src_line + y * src_stride;
1521
1522             while (w >= 2)
1523             {
1524                 uint32_t s1, s2;
1525                 int x1, x2;
1526
1527                 x1 = pixman_fixed_to_int (vx);
1528                 vx += unit_x;
1529
1530                 x2 = pixman_fixed_to_int (vx);
1531                 vx += unit_x;
1532
1533                 w -= 2;
1534
1535                 s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
1536                 s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
1537
1538                 if (op == PIXMAN_OP_OVER)
1539                 {
1540                     combine_over (s1, dst++);
1541                     combine_over (s2, dst++);
1542                 }
1543                 else
1544                 {
1545                     combine_src (s1, dst++);
1546                     combine_src (s2, dst++);
1547                 }
1548             }
1549
1550             while (w--)
1551             {
1552                 uint32_t s;
1553                 int x;
1554
1555                 x = pixman_fixed_to_int (vx);
1556                 vx += unit_x;
1557
1558                 s = fetch_nearest (src_repeat, src_format, src, x, src_width);
1559
1560                 if (op == PIXMAN_OP_OVER)
1561                     combine_over (s, dst++);
1562                 else
1563                     combine_src (s, dst++);
1564             }
1565         }
1566     }
1567 }
1568
1569 #define CACHE_LINE_SIZE 64
1570
1571 #define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
1572                                                                               \
1573 static void                                                                   \
1574 blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
1575                                  int             dst_stride,                  \
1576                                  const pix_type *src,                         \
1577                                  int             src_stride,                  \
1578                                  int             w,                           \
1579                                  int             h)                           \
1580 {                                                                             \
1581     int x, y;                                                                 \
1582     for (y = 0; y < h; y++)                                                   \
1583     {                                                                         \
1584         const pix_type *s = src + (h - y - 1);                                \
1585         pix_type *d = dst + dst_stride * y;                                   \
1586         for (x = 0; x < w; x++)                                               \
1587         {                                                                     \
1588             *d++ = *s;                                                        \
1589             s += src_stride;                                                  \
1590         }                                                                     \
1591     }                                                                         \
1592 }                                                                             \
1593                                                                               \
1594 static void                                                                   \
1595 blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
1596                                   int             dst_stride,                 \
1597                                   const pix_type *src,                        \
1598                                   int             src_stride,                 \
1599                                   int             w,                          \
1600                                   int             h)                          \
1601 {                                                                             \
1602     int x, y;                                                                 \
1603     for (y = 0; y < h; y++)                                                   \
1604     {                                                                         \
1605         const pix_type *s = src + src_stride * (w - 1) + y;                   \
1606         pix_type *d = dst + dst_stride * y;                                   \
1607         for (x = 0; x < w; x++)                                               \
1608         {                                                                     \
1609             *d++ = *s;                                                        \
1610             s -= src_stride;                                                  \
1611         }                                                                     \
1612     }                                                                         \
1613 }                                                                             \
1614                                                                               \
1615 static void                                                                   \
1616 blt_rotated_90_##suffix (pix_type       *dst,                                 \
1617                          int             dst_stride,                          \
1618                          const pix_type *src,                                 \
1619                          int             src_stride,                          \
1620                          int             W,                                   \
1621                          int             H)                                   \
1622 {                                                                             \
1623     int x;                                                                    \
1624     int leading_pixels = 0, trailing_pixels = 0;                              \
1625     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1626                                                                               \
1627     /*                                                                        \
1628      * split processing into handling destination as TILE_SIZExH cache line   \
1629      * aligned vertical stripes (optimistically assuming that destination     \
1630      * stride is a multiple of cache line, if not - it will be just a bit     \
1631      * slower)                                                                \
1632      */                                                                       \
1633                                                                               \
1634     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1635     {                                                                         \
1636         leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1637                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1638         if (leading_pixels > W)                                               \
1639             leading_pixels = W;                                               \
1640                                                                               \
1641         /* unaligned leading part NxH (where N < TILE_SIZE) */                \
1642         blt_rotated_90_trivial_##suffix (                                     \
1643             dst,                                                              \
1644             dst_stride,                                                       \
1645             src,                                                              \
1646             src_stride,                                                       \
1647             leading_pixels,                                                   \
1648             H);                                                               \
1649                                                                               \
1650         dst += leading_pixels;                                                \
1651         src += leading_pixels * src_stride;                                   \
1652         W -= leading_pixels;                                                  \
1653     }                                                                         \
1654                                                                               \
1655     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1656     {                                                                         \
1657         trailing_pixels = (((uintptr_t)(dst + W) &                            \
1658                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1659         if (trailing_pixels > W)                                              \
1660             trailing_pixels = W;                                              \
1661         W -= trailing_pixels;                                                 \
1662     }                                                                         \
1663                                                                               \
1664     for (x = 0; x < W; x += TILE_SIZE)                                        \
1665     {                                                                         \
1666         /* aligned middle part TILE_SIZExH */                                 \
1667         blt_rotated_90_trivial_##suffix (                                     \
1668             dst + x,                                                          \
1669             dst_stride,                                                       \
1670             src + src_stride * x,                                             \
1671             src_stride,                                                       \
1672             TILE_SIZE,                                                        \
1673             H);                                                               \
1674     }                                                                         \
1675                                                                               \
1676     if (trailing_pixels)                                                      \
1677     {                                                                         \
1678         /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1679         blt_rotated_90_trivial_##suffix (                                     \
1680             dst + W,                                                          \
1681             dst_stride,                                                       \
1682             src + W * src_stride,                                             \
1683             src_stride,                                                       \
1684             trailing_pixels,                                                  \
1685             H);                                                               \
1686     }                                                                         \
1687 }                                                                             \
1688                                                                               \
1689 static void                                                                   \
1690 blt_rotated_270_##suffix (pix_type       *dst,                                \
1691                           int             dst_stride,                         \
1692                           const pix_type *src,                                \
1693                           int             src_stride,                         \
1694                           int             W,                                  \
1695                           int             H)                                  \
1696 {                                                                             \
1697     int x;                                                                    \
1698     int leading_pixels = 0, trailing_pixels = 0;                              \
1699     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1700                                                                               \
1701     /*                                                                        \
1702      * split processing into handling destination as TILE_SIZExH cache line   \
1703      * aligned vertical stripes (optimistically assuming that destination     \
1704      * stride is a multiple of cache line, if not - it will be just a bit     \
1705      * slower)                                                                \
1706      */                                                                       \
1707                                                                               \
1708     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1709     {                                                                         \
1710         leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1711                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1712         if (leading_pixels > W)                                               \
1713             leading_pixels = W;                                               \
1714                                                                               \
1715         /* unaligned leading part NxH (where N < TILE_SIZE) */                \
1716         blt_rotated_270_trivial_##suffix (                                    \
1717             dst,                                                              \
1718             dst_stride,                                                       \
1719             src + src_stride * (W - leading_pixels),                          \
1720             src_stride,                                                       \
1721             leading_pixels,                                                   \
1722             H);                                                               \
1723                                                                               \
1724         dst += leading_pixels;                                                \
1725         W -= leading_pixels;                                                  \
1726     }                                                                         \
1727                                                                               \
1728     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1729     {                                                                         \
1730         trailing_pixels = (((uintptr_t)(dst + W) &                            \
1731                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1732         if (trailing_pixels > W)                                              \
1733             trailing_pixels = W;                                              \
1734         W -= trailing_pixels;                                                 \
1735         src += trailing_pixels * src_stride;                                  \
1736     }                                                                         \
1737                                                                               \
1738     for (x = 0; x < W; x += TILE_SIZE)                                        \
1739     {                                                                         \
1740         /* aligned middle part TILE_SIZExH */                                 \
1741         blt_rotated_270_trivial_##suffix (                                    \
1742             dst + x,                                                          \
1743             dst_stride,                                                       \
1744             src + src_stride * (W - x - TILE_SIZE),                           \
1745             src_stride,                                                       \
1746             TILE_SIZE,                                                        \
1747             H);                                                               \
1748     }                                                                         \
1749                                                                               \
1750     if (trailing_pixels)                                                      \
1751     {                                                                         \
1752         /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1753         blt_rotated_270_trivial_##suffix (                                    \
1754             dst + W,                                                          \
1755             dst_stride,                                                       \
1756             src - trailing_pixels * src_stride,                               \
1757             src_stride,                                                       \
1758             trailing_pixels,                                                  \
1759             H);                                                               \
1760     }                                                                         \
1761 }                                                                             \
1762                                                                               \
1763 static void                                                                   \
1764 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
1765                                    pixman_composite_info_t *info)             \
1766 {                                                                             \
1767     PIXMAN_COMPOSITE_ARGS (info);                                             \
1768     pix_type       *dst_line;                                                 \
1769     pix_type       *src_line;                                                 \
1770     int             dst_stride, src_stride;                                   \
1771     int             src_x_t, src_y_t;                                         \
1772                                                                               \
1773     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1774                            dst_stride, dst_line, 1);                          \
1775     src_x_t = -src_y + pixman_fixed_to_int (                                  \
1776                                 src_image->common.transform->matrix[0][2] +   \
1777                                 pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
1778     src_y_t = src_x + pixman_fixed_to_int (                                   \
1779                                 src_image->common.transform->matrix[1][2] +   \
1780                                 pixman_fixed_1 / 2 - pixman_fixed_e);         \
1781     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1782                            src_stride, src_line, 1);                          \
1783     blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
1784                              width, height);                                  \
1785 }                                                                             \
1786                                                                               \
1787 static void                                                                   \
1788 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
1789                                     pixman_composite_info_t *info)            \
1790 {                                                                             \
1791     PIXMAN_COMPOSITE_ARGS (info);                                             \
1792     pix_type       *dst_line;                                                 \
1793     pix_type       *src_line;                                                 \
1794     int             dst_stride, src_stride;                                   \
1795     int             src_x_t, src_y_t;                                         \
1796                                                                               \
1797     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1798                            dst_stride, dst_line, 1);                          \
1799     src_x_t = src_y + pixman_fixed_to_int (                                   \
1800                                 src_image->common.transform->matrix[0][2] +   \
1801                                 pixman_fixed_1 / 2 - pixman_fixed_e);         \
1802     src_y_t = -src_x + pixman_fixed_to_int (                                  \
1803                                 src_image->common.transform->matrix[1][2] +   \
1804                                 pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
1805     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1806                            src_stride, src_line, 1);                          \
1807     blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
1808                               width, height);                                 \
1809 }
1810
1811 FAST_SIMPLE_ROTATE (8, uint8_t)
1812 FAST_SIMPLE_ROTATE (565, uint16_t)
1813 FAST_SIMPLE_ROTATE (8888, uint32_t)
1814
1815 static const pixman_fast_path_t c_fast_paths[] =
1816 {
1817     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
1818     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
1819     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
1820     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
1821     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
1822     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
1823     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
1824     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
1825     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
1826     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
1827     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
1828     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
1829     PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
1830     PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
1831     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
1832     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
1833     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
1834     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
1835     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
1836     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
1837     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
1838     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
1839     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
1840     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
1841     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
1842     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
1843     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
1844     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
1845     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
1846     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
1847     PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
1848     PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
1849     PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
1850     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
1851     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
1852     PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
1853     PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
1854     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
1855     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
1856     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
1857     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
1858     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
1859     PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
1860     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
1861     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
1862     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
1863     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
1864     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1865     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
1866     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1867     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1868     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
1869     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1870     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
1871     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
1872     PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
1873     PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
1874     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
1875     PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
1876     PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
1877     PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1878     PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1879     PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
1880     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
1881     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
1882
1883     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
1884     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
1885     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
1886     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
1887
1888     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
1889     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
1890
1891     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
1892     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
1893
1894     SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
1895
1896     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1897     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1898     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1899     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1900     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1901     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1902
1903     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
1904     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
1905     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
1906     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
1907
1908     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
1909
1910 #define NEAREST_FAST_PATH(op,s,d)               \
1911     {   PIXMAN_OP_ ## op,                       \
1912         PIXMAN_ ## s, SCALED_NEAREST_FLAGS,     \
1913         PIXMAN_null, 0,                         \
1914         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1915         fast_composite_scaled_nearest,          \
1916     }
1917
1918     NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
1919     NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
1920     NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
1921     NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
1922
1923     NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
1924     NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
1925     NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
1926     NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
1927
1928     NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
1929     NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
1930     NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
1931     NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
1932
1933     NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
1934     NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
1935     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
1936     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
1937
1938 #define SIMPLE_ROTATE_FLAGS(angle)                                        \
1939     (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM   |                         \
1940      FAST_PATH_NEAREST_FILTER                   |                         \
1941      FAST_PATH_SAMPLES_COVER_CLIP_NEAREST       |                         \
1942      FAST_PATH_STANDARD_FLAGS)
1943
1944 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)                            \
1945     {   PIXMAN_OP_ ## op,                                                 \
1946         PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),                           \
1947         PIXMAN_null, 0,                                                   \
1948         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                           \
1949         fast_composite_rotate_90_##suffix,                                \
1950     },                                                                    \
1951     {   PIXMAN_OP_ ## op,                                                 \
1952         PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),                          \
1953         PIXMAN_null, 0,                                                   \
1954         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                           \
1955         fast_composite_rotate_270_##suffix,                               \
1956     }
1957
1958     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
1959     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
1960     SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
1961     SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
1962     SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
1963
1964     /* Simple repeat fast path entry. */
1965     {   PIXMAN_OP_any,
1966         PIXMAN_any,
1967         (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
1968          FAST_PATH_NORMAL_REPEAT),
1969         PIXMAN_any, 0,
1970         PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
1971         fast_composite_tiled_repeat
1972     },
1973
1974     {   PIXMAN_OP_NONE  },
1975 };
1976
1977 #ifdef WORDS_BIGENDIAN
1978 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
1979 #else
1980 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
1981 #endif
1982
1983 static force_inline void
1984 pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
1985 {
1986     if (offs)
1987     {
1988         int leading_pixels = 32 - offs;
1989         if (leading_pixels >= width)
1990         {
1991             if (v)
1992                 *dst |= A1_FILL_MASK (width, offs);
1993             else
1994                 *dst &= ~A1_FILL_MASK (width, offs);
1995             return;
1996         }
1997         else
1998         {
1999             if (v)
2000                 *dst++ |= A1_FILL_MASK (leading_pixels, offs);
2001             else
2002                 *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
2003             width -= leading_pixels;
2004         }
2005     }
2006     while (width >= 32)
2007     {
2008         if (v)
2009             *dst++ = 0xFFFFFFFF;
2010         else
2011             *dst++ = 0;
2012         width -= 32;
2013     }
2014     if (width > 0)
2015     {
2016         if (v)
2017             *dst |= A1_FILL_MASK (width, 0);
2018         else
2019             *dst &= ~A1_FILL_MASK (width, 0);
2020     }
2021 }
2022
2023 static void
2024 pixman_fill1 (uint32_t *bits,
2025               int       stride,
2026               int       x,
2027               int       y,
2028               int       width,
2029               int       height,
2030               uint32_t  filler)
2031 {
2032     uint32_t *dst = bits + y * stride + (x >> 5);
2033     int offs = x & 31;
2034
2035     if (filler & 1)
2036     {
2037         while (height--)
2038         {
2039             pixman_fill1_line (dst, offs, width, 1);
2040             dst += stride;
2041         }
2042     }
2043     else
2044     {
2045         while (height--)
2046         {
2047             pixman_fill1_line (dst, offs, width, 0);
2048             dst += stride;
2049         }
2050     }
2051 }
2052
2053 static void
2054 pixman_fill8 (uint32_t *bits,
2055               int       stride,
2056               int       x,
2057               int       y,
2058               int       width,
2059               int       height,
2060               uint32_t  filler)
2061 {
2062     int byte_stride = stride * (int) sizeof (uint32_t);
2063     uint8_t *dst = (uint8_t *) bits;
2064     uint8_t v = filler & 0xff;
2065     int i;
2066
2067     dst = dst + y * byte_stride + x;
2068
2069     while (height--)
2070     {
2071         for (i = 0; i < width; ++i)
2072             dst[i] = v;
2073
2074         dst += byte_stride;
2075     }
2076 }
2077
2078 static void
2079 pixman_fill16 (uint32_t *bits,
2080                int       stride,
2081                int       x,
2082                int       y,
2083                int       width,
2084                int       height,
2085                uint32_t  filler)
2086 {
2087     int short_stride =
2088         (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
2089     uint16_t *dst = (uint16_t *)bits;
2090     uint16_t v = filler & 0xffff;
2091     int i;
2092
2093     dst = dst + y * short_stride + x;
2094
2095     while (height--)
2096     {
2097         for (i = 0; i < width; ++i)
2098             dst[i] = v;
2099
2100         dst += short_stride;
2101     }
2102 }
2103
2104 static void
2105 pixman_fill32 (uint32_t *bits,
2106                int       stride,
2107                int       x,
2108                int       y,
2109                int       width,
2110                int       height,
2111                uint32_t  filler)
2112 {
2113     int i;
2114
2115     bits = bits + y * stride + x;
2116
2117     while (height--)
2118     {
2119         for (i = 0; i < width; ++i)
2120             bits[i] = filler;
2121
2122         bits += stride;
2123     }
2124 }
2125
2126 static pixman_bool_t
2127 fast_path_fill (pixman_implementation_t *imp,
2128                 uint32_t *               bits,
2129                 int                      stride,
2130                 int                      bpp,
2131                 int                      x,
2132                 int                      y,
2133                 int                      width,
2134                 int                      height,
2135                 uint32_t                 filler)
2136 {
2137     switch (bpp)
2138     {
2139     case 1:
2140         pixman_fill1 (bits, stride, x, y, width, height, filler);
2141         break;
2142
2143     case 8:
2144         pixman_fill8 (bits, stride, x, y, width, height, filler);
2145         break;
2146
2147     case 16:
2148         pixman_fill16 (bits, stride, x, y, width, height, filler);
2149         break;
2150
2151     case 32:
2152         pixman_fill32 (bits, stride, x, y, width, height, filler);
2153         break;
2154
2155     default:
2156         return FALSE;
2157     }
2158
2159     return TRUE;
2160 }
2161
2162 /*****************************************************************************/
2163
2164 static uint32_t *
2165 fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
2166 {
2167     int32_t w = iter->width;
2168     uint32_t *dst = iter->buffer;
2169     const uint16_t *src = (const uint16_t *)iter->bits;
2170
2171     iter->bits += iter->stride;
2172
2173     /* Align the source buffer at 4 bytes boundary */
2174     if (w > 0 && ((uintptr_t)src & 3))
2175     {
2176         *dst++ = convert_0565_to_8888 (*src++);
2177         w--;
2178     }
2179     /* Process two pixels per iteration */
2180     while ((w -= 2) >= 0)
2181     {
2182         uint32_t sr, sb, sg, t0, t1;
2183         uint32_t s = *(const uint32_t *)src;
2184         src += 2;
2185         sr = (s >> 8) & 0x00F800F8;
2186         sb = (s << 3) & 0x00F800F8;
2187         sg = (s >> 3) & 0x00FC00FC;
2188         sr |= sr >> 5;
2189         sb |= sb >> 5;
2190         sg |= sg >> 6;
2191         t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
2192              (sb & 0xFF) | 0xFF000000;
2193         t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
2194              (sb >> 16) | 0xFF000000;
2195 #ifdef WORDS_BIGENDIAN
2196         *dst++ = t1;
2197         *dst++ = t0;
2198 #else
2199         *dst++ = t0;
2200         *dst++ = t1;
2201 #endif
2202     }
2203     if (w & 1)
2204     {
2205         *dst = convert_0565_to_8888 (*src);
2206     }
2207
2208     return iter->buffer;
2209 }
2210
2211 static uint32_t *
2212 fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
2213 {
2214     iter->bits += iter->stride;
2215     return iter->buffer;
2216 }
2217
2218 /* Helper function for a workaround, which tries to ensure that 0x1F001F
2219  * constant is always allocated in a register on RISC architectures.
2220  */
2221 static force_inline uint32_t
2222 convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
2223 {
2224     uint32_t a, b;
2225     a = (s >> 3) & x1F001F;
2226     b = s & 0xFC00;
2227     a |= a >> 5;
2228     a |= b >> 5;
2229     return a;
2230 }
2231
2232 static void
2233 fast_write_back_r5g6b5 (pixman_iter_t *iter)
2234 {
2235     int32_t w = iter->width;
2236     uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
2237     const uint32_t *src = iter->buffer;
2238     /* Workaround to ensure that x1F001F variable is allocated in a register */
2239     static volatile uint32_t volatile_x1F001F = 0x1F001F;
2240     uint32_t x1F001F = volatile_x1F001F;
2241
2242     while ((w -= 4) >= 0)
2243     {
2244         uint32_t s1 = *src++;
2245         uint32_t s2 = *src++;
2246         uint32_t s3 = *src++;
2247         uint32_t s4 = *src++;
2248         *dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
2249         *dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
2250         *dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
2251         *dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
2252     }
2253     if (w & 2)
2254     {
2255         *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2256         *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2257     }
2258     if (w & 1)
2259     {
2260         *dst = convert_8888_to_0565_workaround (*src, x1F001F);
2261     }
2262 }
2263
2264 typedef struct
2265 {
2266     int         y;
2267     uint64_t *  buffer;
2268 } line_t;
2269
2270 typedef struct
2271 {
2272     line_t              lines[2];
2273     pixman_fixed_t      y;
2274     pixman_fixed_t      x;
2275     uint64_t            data[1];
2276 } bilinear_info_t;
2277
2278 static void
2279 fetch_horizontal (bits_image_t *image, line_t *line,
2280                   int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
2281 {
2282     uint32_t *bits = image->bits + y * image->rowstride;
2283     int i;
2284
2285     for (i = 0; i < n; ++i)
2286     {
2287         int x0 = pixman_fixed_to_int (x);
2288         int x1 = x0 + 1;
2289         int32_t dist_x;
2290
2291         uint32_t left = *(bits + x0);
2292         uint32_t right = *(bits + x1);
2293
2294         dist_x = pixman_fixed_to_bilinear_weight (x);
2295         dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS);
2296
2297 #if SIZEOF_LONG <= 4
2298         {
2299             uint32_t lag, rag, ag;
2300             uint32_t lrb, rrb, rb;
2301
2302             lag = (left & 0xff00ff00) >> 8;
2303             rag = (right & 0xff00ff00) >> 8;
2304             ag = (lag << 8) + dist_x * (rag - lag);
2305
2306             lrb = (left & 0x00ff00ff);
2307             rrb = (right & 0x00ff00ff);
2308             rb = (lrb << 8) + dist_x * (rrb - lrb);
2309
2310             *((uint32_t *)(line->buffer + i)) = ag;
2311             *((uint32_t *)(line->buffer + i) + 1) = rb;
2312         }
2313 #else
2314         {
2315             uint64_t lagrb, ragrb;
2316             uint32_t lag, rag;
2317             uint32_t lrb, rrb;
2318
2319             lag = (left & 0xff00ff00);
2320             lrb = (left & 0x00ff00ff);
2321             rag = (right & 0xff00ff00);
2322             rrb = (right & 0x00ff00ff);
2323             lagrb = (((uint64_t)lag) << 24) | lrb;
2324             ragrb = (((uint64_t)rag) << 24) | rrb;
2325
2326             line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb);
2327         }
2328 #endif
2329
2330         x += ux;
2331     }
2332
2333     line->y = y;
2334 }
2335
2336 static uint32_t *
2337 fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
2338 {
2339     pixman_fixed_t fx, ux;
2340     bilinear_info_t *info = iter->data;
2341     line_t *line0, *line1;
2342     int y0, y1;
2343     int32_t dist_y;
2344     int i;
2345
2346     fx = info->x;
2347     ux = iter->image->common.transform->matrix[0][0];
2348
2349     y0 = pixman_fixed_to_int (info->y);
2350     y1 = y0 + 1;
2351     dist_y = pixman_fixed_to_bilinear_weight (info->y);
2352     dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS);
2353
2354     line0 = &info->lines[y0 & 0x01];
2355     line1 = &info->lines[y1 & 0x01];
2356
2357     if (line0->y != y0)
2358     {
2359         fetch_horizontal (
2360             &iter->image->bits, line0, y0, fx, ux, iter->width);
2361     }
2362
2363     if (line1->y != y1)
2364     {
2365         fetch_horizontal (
2366             &iter->image->bits, line1, y1, fx, ux, iter->width);
2367     }
2368
2369     for (i = 0; i < iter->width; ++i)
2370     {
2371 #if SIZEOF_LONG <= 4
2372         uint32_t ta, tr, tg, tb;
2373         uint32_t ba, br, bg, bb;
2374         uint32_t tag, trb;
2375         uint32_t bag, brb;
2376         uint32_t a, r, g, b;
2377
2378         tag = *((uint32_t *)(line0->buffer + i));
2379         trb = *((uint32_t *)(line0->buffer + i) + 1);
2380         bag = *((uint32_t *)(line1->buffer + i));
2381         brb = *((uint32_t *)(line1->buffer + i) + 1);
2382
2383         ta = tag >> 16;
2384         ba = bag >> 16;
2385         a = (ta << 8) + dist_y * (ba - ta);
2386
2387         tr = trb >> 16;
2388         br = brb >> 16;
2389         r = (tr << 8) + dist_y * (br - tr);
2390
2391         tg = tag & 0xffff;
2392         bg = bag & 0xffff;
2393         g = (tg << 8) + dist_y * (bg - tg);
2394         
2395         tb = trb & 0xffff;
2396         bb = brb & 0xffff;
2397         b = (tb << 8) + dist_y * (bb - tb);
2398
2399         a = (a <<  8) & 0xff000000;
2400         r = (r <<  0) & 0x00ff0000;
2401         g = (g >>  8) & 0x0000ff00;
2402         b = (b >> 16) & 0x000000ff;
2403 #else
2404         uint64_t top = line0->buffer[i];
2405         uint64_t bot = line1->buffer[i];
2406         uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16;
2407         uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16;
2408         uint64_t tgb = (top & 0x0000ffff0000ffffULL);
2409         uint64_t bgb = (bot & 0x0000ffff0000ffffULL);
2410         uint64_t ar, gb;
2411         uint32_t a, r, g, b;
2412
2413         ar = (tar << 8) + dist_y * (bar - tar);
2414         gb = (tgb << 8) + dist_y * (bgb - tgb);
2415
2416         a = ((ar >> 24) & 0xff000000);
2417         r = ((ar >>  0) & 0x00ff0000);
2418         g = ((gb >> 40) & 0x0000ff00);
2419         b = ((gb >> 16) & 0x000000ff);
2420 #endif
2421
2422         iter->buffer[i] = a | r | g | b;
2423     }
2424
2425     info->y += iter->image->common.transform->matrix[1][1];
2426
2427     return iter->buffer;
2428 }
2429
2430 static void
2431 bilinear_cover_iter_fini (pixman_iter_t *iter)
2432 {
2433     free (iter->data);
2434 }
2435
2436 static void
2437 fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
2438 {
2439     int width = iter->width;
2440     bilinear_info_t *info;
2441     pixman_vector_t v;
2442
2443     /* Reference point is the center of the pixel */
2444     v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
2445     v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
2446     v.vector[2] = pixman_fixed_1;
2447
2448     if (!pixman_transform_point_3d (iter->image->common.transform, &v))
2449         goto fail;
2450
2451     info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t));
2452     if (!info)
2453         goto fail;
2454
2455     info->x = v.vector[0] - pixman_fixed_1 / 2;
2456     info->y = v.vector[1] - pixman_fixed_1 / 2;
2457
2458     /* It is safe to set the y coordinates to -1 initially
2459      * because COVER_CLIP_BILINEAR ensures that we will only
2460      * be asked to fetch lines in the [0, height) interval
2461      */
2462     info->lines[0].y = -1;
2463     info->lines[0].buffer = &(info->data[0]);
2464     info->lines[1].y = -1;
2465     info->lines[1].buffer = &(info->data[width]);
2466
2467     iter->get_scanline = fast_fetch_bilinear_cover;
2468     iter->fini = bilinear_cover_iter_fini;
2469
2470     iter->data = info;
2471     return;
2472
2473 fail:
2474     /* Something went wrong, either a bad matrix or OOM; in such cases,
2475      * we don't guarantee any particular rendering.
2476      */
2477     _pixman_log_error (
2478         FUNC, "Allocation failure or bad matrix, skipping rendering\n");
2479     
2480     iter->get_scanline = _pixman_iter_get_scanline_noop;
2481     iter->fini = NULL;
2482 }
2483
2484 static uint32_t *
2485 bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter,
2486                                           const uint32_t *mask)
2487 {
2488
2489     pixman_image_t * ima = iter->image;
2490     int              offset = iter->x;
2491     int              line = iter->y++;
2492     int              width = iter->width;
2493     uint32_t *       buffer = iter->buffer;
2494
2495     bits_image_t *bits = &ima->bits;
2496     pixman_fixed_t x_top, x_bottom, x;
2497     pixman_fixed_t ux_top, ux_bottom, ux;
2498     pixman_vector_t v;
2499     uint32_t top_mask, bottom_mask;
2500     uint32_t *top_row;
2501     uint32_t *bottom_row;
2502     uint32_t *end;
2503     uint32_t zero[2] = { 0, 0 };
2504     uint32_t one = 1;
2505     int y, y1, y2;
2506     int disty;
2507     int mask_inc;
2508     int w;
2509
2510     /* reference point is the center of the pixel */
2511     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
2512     v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
2513     v.vector[2] = pixman_fixed_1;
2514
2515     if (!pixman_transform_point_3d (bits->common.transform, &v))
2516         return iter->buffer;
2517
2518     ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
2519     x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
2520
2521     y = v.vector[1] - pixman_fixed_1/2;
2522     disty = pixman_fixed_to_bilinear_weight (y);
2523
2524     /* Load the pointers to the first and second lines from the source
2525      * image that bilinear code must read.
2526      *
2527      * The main trick in this code is about the check if any line are
2528      * outside of the image;
2529      *
2530      * When I realize that a line (any one) is outside, I change
2531      * the pointer to a dummy area with zeros. Once I change this, I
2532      * must be sure the pointer will not change, so I set the
2533      * variables to each pointer increments inside the loop.
2534      */
2535     y1 = pixman_fixed_to_int (y);
2536     y2 = y1 + 1;
2537
2538     if (y1 < 0 || y1 >= bits->height)
2539     {
2540         top_row = zero;
2541         x_top = 0;
2542         ux_top = 0;
2543     }
2544     else
2545     {
2546         top_row = bits->bits + y1 * bits->rowstride;
2547         x_top = x;
2548         ux_top = ux;
2549     }
2550
2551     if (y2 < 0 || y2 >= bits->height)
2552     {
2553         bottom_row = zero;
2554         x_bottom = 0;
2555         ux_bottom = 0;
2556     }
2557     else
2558     {
2559         bottom_row = bits->bits + y2 * bits->rowstride;
2560         x_bottom = x;
2561         ux_bottom = ux;
2562     }
2563
2564     /* Instead of checking whether the operation uses the mast in
2565      * each loop iteration, verify this only once and prepare the
2566      * variables to make the code smaller inside the loop.
2567      */
2568     if (!mask)
2569     {
2570         mask_inc = 0;
2571         mask = &one;
2572     }
2573     else
2574     {
2575         /* If have a mask, prepare the variables to check it */
2576         mask_inc = 1;
2577     }
2578
2579     /* If both are zero, then the whole thing is zero */
2580     if (top_row == zero && bottom_row == zero)
2581     {
2582         memset (buffer, 0, width * sizeof (uint32_t));
2583         return iter->buffer;
2584     }
2585     else if (bits->format == PIXMAN_x8r8g8b8)
2586     {
2587         if (top_row == zero)
2588         {
2589             top_mask = 0;
2590             bottom_mask = 0xff000000;
2591         }
2592         else if (bottom_row == zero)
2593         {
2594             top_mask = 0xff000000;
2595             bottom_mask = 0;
2596         }
2597         else
2598         {
2599             top_mask = 0xff000000;
2600             bottom_mask = 0xff000000;
2601         }
2602     }
2603     else
2604     {
2605         top_mask = 0;
2606         bottom_mask = 0;
2607     }
2608
2609     end = buffer + width;
2610
2611     /* Zero fill to the left of the image */
2612     while (buffer < end && x < pixman_fixed_minus_1)
2613     {
2614         *buffer++ = 0;
2615         x += ux;
2616         x_top += ux_top;
2617         x_bottom += ux_bottom;
2618         mask += mask_inc;
2619     }
2620
2621     /* Left edge
2622      */
2623     while (buffer < end && x < 0)
2624     {
2625         uint32_t tr, br;
2626         int32_t distx;
2627
2628         tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
2629         br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
2630
2631         distx = pixman_fixed_to_bilinear_weight (x);
2632
2633         *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
2634
2635         x += ux;
2636         x_top += ux_top;
2637         x_bottom += ux_bottom;
2638         mask += mask_inc;
2639     }
2640
2641     /* Main part */
2642     w = pixman_int_to_fixed (bits->width - 1);
2643
2644     while (buffer < end  &&  x < w)
2645     {
2646         if (*mask)
2647         {
2648             uint32_t tl, tr, bl, br;
2649             int32_t distx;
2650
2651             tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
2652             tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask;
2653             bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
2654             br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
2655
2656             distx = pixman_fixed_to_bilinear_weight (x);
2657
2658             *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
2659         }
2660
2661         buffer++;
2662         x += ux;
2663         x_top += ux_top;
2664         x_bottom += ux_bottom;
2665         mask += mask_inc;
2666     }
2667
2668     /* Right Edge */
2669     w = pixman_int_to_fixed (bits->width);
2670     while (buffer < end  &&  x < w)
2671     {
2672         if (*mask)
2673         {
2674             uint32_t tl, bl;
2675             int32_t distx;
2676
2677             tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
2678             bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
2679
2680             distx = pixman_fixed_to_bilinear_weight (x);
2681
2682             *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
2683         }
2684
2685         buffer++;
2686         x += ux;
2687         x_top += ux_top;
2688         x_bottom += ux_bottom;
2689         mask += mask_inc;
2690     }
2691
2692     /* Zero fill to the left of the image */
2693     while (buffer < end)
2694         *buffer++ = 0;
2695
2696     return iter->buffer;
2697 }
2698
2699 typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
2700
2701 static force_inline void
2702 bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
2703                                                int              offset,
2704                                                int              line,
2705                                                int              width,
2706                                                uint32_t *       buffer,
2707                                                const uint32_t * mask,
2708
2709                                                convert_pixel_t  convert_pixel,
2710                                                pixman_format_code_t     format,
2711                                                pixman_repeat_t  repeat_mode)
2712 {
2713     bits_image_t *bits = &image->bits;
2714     pixman_fixed_t *params = image->common.filter_params;
2715     int cwidth = pixman_fixed_to_int (params[0]);
2716     int cheight = pixman_fixed_to_int (params[1]);
2717     int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
2718     int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
2719     int x_phase_bits = pixman_fixed_to_int (params[2]);
2720     int y_phase_bits = pixman_fixed_to_int (params[3]);
2721     int x_phase_shift = 16 - x_phase_bits;
2722     int y_phase_shift = 16 - y_phase_bits;
2723     pixman_fixed_t vx, vy;
2724     pixman_fixed_t ux, uy;
2725     pixman_vector_t v;
2726     int k;
2727
2728     /* reference point is the center of the pixel */
2729     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
2730     v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
2731     v.vector[2] = pixman_fixed_1;
2732
2733     if (!pixman_transform_point_3d (image->common.transform, &v))
2734         return;
2735
2736     ux = image->common.transform->matrix[0][0];
2737     uy = image->common.transform->matrix[1][0];
2738
2739     vx = v.vector[0];
2740     vy = v.vector[1];
2741
2742     for (k = 0; k < width; ++k)
2743     {
2744         pixman_fixed_t *y_params;
2745         int satot, srtot, sgtot, sbtot;
2746         pixman_fixed_t x, y;
2747         int32_t x1, x2, y1, y2;
2748         int32_t px, py;
2749         int i, j;
2750
2751         if (mask && !mask[k])
2752             goto next;
2753
2754         /* Round x and y to the middle of the closest phase before continuing. This
2755          * ensures that the convolution matrix is aligned right, since it was
2756          * positioned relative to a particular phase (and not relative to whatever
2757          * exact fraction we happen to get here).
2758          */
2759         x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
2760         y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
2761
2762         px = (x & 0xffff) >> x_phase_shift;
2763         py = (y & 0xffff) >> y_phase_shift;
2764
2765         x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
2766         y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
2767         x2 = x1 + cwidth;
2768         y2 = y1 + cheight;
2769
2770         satot = srtot = sgtot = sbtot = 0;
2771
2772         y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
2773
2774         for (i = y1; i < y2; ++i)
2775         {
2776             pixman_fixed_t fy = *y_params++;
2777
2778             if (fy)
2779             {
2780                 pixman_fixed_t *x_params = params + 4 + px * cwidth;
2781
2782                 for (j = x1; j < x2; ++j)
2783                 {
2784                     pixman_fixed_t fx = *x_params++;
2785                     int rx = j;
2786                     int ry = i;
2787                     
2788                     if (fx)
2789                     {
2790                         pixman_fixed_t f;
2791                         uint32_t pixel, mask;
2792                         uint8_t *row;
2793
2794                         mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
2795
2796                         if (repeat_mode != PIXMAN_REPEAT_NONE)
2797                         {
2798                             repeat (repeat_mode, &rx, bits->width);
2799                             repeat (repeat_mode, &ry, bits->height);
2800
2801                             row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
2802                             pixel = convert_pixel (row, rx) | mask;
2803                         }
2804                         else
2805                         {
2806                             if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height)
2807                             {
2808                                 pixel = 0;
2809                             }
2810                             else
2811                             {
2812                                 row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
2813                                 pixel = convert_pixel (row, rx) | mask;
2814                             }
2815                         }
2816
2817                         f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16;
2818                         srtot += (int)RED_8 (pixel) * f;
2819                         sgtot += (int)GREEN_8 (pixel) * f;
2820                         sbtot += (int)BLUE_8 (pixel) * f;
2821                         satot += (int)ALPHA_8 (pixel) * f;
2822                     }
2823                 }
2824             }
2825         }
2826
2827         satot = (satot + 0x8000) >> 16;
2828         srtot = (srtot + 0x8000) >> 16;
2829         sgtot = (sgtot + 0x8000) >> 16;
2830         sbtot = (sbtot + 0x8000) >> 16;
2831
2832         satot = CLIP (satot, 0, 0xff);
2833         srtot = CLIP (srtot, 0, 0xff);
2834         sgtot = CLIP (sgtot, 0, 0xff);
2835         sbtot = CLIP (sbtot, 0, 0xff);
2836
2837         buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0);
2838
2839     next:
2840         vx += ux;
2841         vy += uy;
2842     }
2843 }
2844
2845 static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
2846
2847 static force_inline void
2848 bits_image_fetch_bilinear_affine (pixman_image_t * image,
2849                                   int              offset,
2850                                   int              line,
2851                                   int              width,
2852                                   uint32_t *       buffer,
2853                                   const uint32_t * mask,
2854
2855                                   convert_pixel_t       convert_pixel,
2856                                   pixman_format_code_t  format,
2857                                   pixman_repeat_t       repeat_mode)
2858 {
2859     pixman_fixed_t x, y;
2860     pixman_fixed_t ux, uy;
2861     pixman_vector_t v;
2862     bits_image_t *bits = &image->bits;
2863     int i;
2864
2865     /* reference point is the center of the pixel */
2866     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
2867     v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
2868     v.vector[2] = pixman_fixed_1;
2869
2870     if (!pixman_transform_point_3d (image->common.transform, &v))
2871         return;
2872
2873     ux = image->common.transform->matrix[0][0];
2874     uy = image->common.transform->matrix[1][0];
2875
2876     x = v.vector[0];
2877     y = v.vector[1];
2878
2879     for (i = 0; i < width; ++i)
2880     {
2881         int x1, y1, x2, y2;
2882         uint32_t tl, tr, bl, br;
2883         int32_t distx, disty;
2884         int width = image->bits.width;
2885         int height = image->bits.height;
2886         const uint8_t *row1;
2887         const uint8_t *row2;
2888
2889         if (mask && !mask[i])
2890             goto next;
2891
2892         x1 = x - pixman_fixed_1 / 2;
2893         y1 = y - pixman_fixed_1 / 2;
2894
2895         distx = pixman_fixed_to_bilinear_weight (x1);
2896         disty = pixman_fixed_to_bilinear_weight (y1);
2897
2898         y1 = pixman_fixed_to_int (y1);
2899         y2 = y1 + 1;
2900         x1 = pixman_fixed_to_int (x1);
2901         x2 = x1 + 1;
2902
2903         if (repeat_mode != PIXMAN_REPEAT_NONE)
2904         {
2905             uint32_t mask;
2906
2907             mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
2908
2909             repeat (repeat_mode, &x1, width);
2910             repeat (repeat_mode, &y1, height);
2911             repeat (repeat_mode, &x2, width);
2912             repeat (repeat_mode, &y2, height);
2913
2914             row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
2915             row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
2916
2917             tl = convert_pixel (row1, x1) | mask;
2918             tr = convert_pixel (row1, x2) | mask;
2919             bl = convert_pixel (row2, x1) | mask;
2920             br = convert_pixel (row2, x2) | mask;
2921         }
2922         else
2923         {
2924             uint32_t mask1, mask2;
2925             int bpp;
2926
2927             /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
2928              * which means if you use it in expressions, those
2929              * expressions become unsigned themselves. Since
2930              * the variables below can be negative in some cases,
2931              * that will lead to crashes on 64 bit architectures.
2932              *
2933              * So this line makes sure bpp is signed
2934              */
2935             bpp = PIXMAN_FORMAT_BPP (format);
2936
2937             if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
2938             {
2939                 buffer[i] = 0;
2940                 goto next;
2941             }
2942
2943             if (y2 == 0)
2944             {
2945                 row1 = zero;
2946                 mask1 = 0;
2947             }
2948             else
2949             {
2950                 row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
2951                 row1 += bpp / 8 * x1;
2952
2953                 mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
2954             }
2955
2956             if (y1 == height - 1)
2957             {
2958                 row2 = zero;
2959                 mask2 = 0;
2960             }
2961             else
2962             {
2963                 row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
2964                 row2 += bpp / 8 * x1;
2965
2966                 mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
2967             }
2968
2969             if (x2 == 0)
2970             {
2971                 tl = 0;
2972                 bl = 0;
2973             }
2974             else
2975             {
2976                 tl = convert_pixel (row1, 0) | mask1;
2977                 bl = convert_pixel (row2, 0) | mask2;
2978             }
2979
2980             if (x1 == width - 1)
2981             {
2982                 tr = 0;
2983                 br = 0;
2984             }
2985             else
2986             {
2987                 tr = convert_pixel (row1, 1) | mask1;
2988                 br = convert_pixel (row2, 1) | mask2;
2989             }
2990         }
2991
2992         buffer[i] = bilinear_interpolation (
2993             tl, tr, bl, br, distx, disty);
2994
2995     next:
2996         x += ux;
2997         y += uy;
2998     }
2999 }
3000
3001 static force_inline void
3002 bits_image_fetch_nearest_affine (pixman_image_t * image,
3003                                  int              offset,
3004                                  int              line,
3005                                  int              width,
3006                                  uint32_t *       buffer,
3007                                  const uint32_t * mask,
3008                                  
3009                                  convert_pixel_t        convert_pixel,
3010                                  pixman_format_code_t   format,
3011                                  pixman_repeat_t        repeat_mode)
3012 {
3013     pixman_fixed_t x, y;
3014     pixman_fixed_t ux, uy;
3015     pixman_vector_t v;
3016     bits_image_t *bits = &image->bits;
3017     int i;
3018
3019     /* reference point is the center of the pixel */
3020     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
3021     v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
3022     v.vector[2] = pixman_fixed_1;
3023
3024     if (!pixman_transform_point_3d (image->common.transform, &v))
3025         return;
3026
3027     ux = image->common.transform->matrix[0][0];
3028     uy = image->common.transform->matrix[1][0];
3029
3030     x = v.vector[0];
3031     y = v.vector[1];
3032
3033     for (i = 0; i < width; ++i)
3034     {
3035         int width, height, x0, y0;
3036         const uint8_t *row;
3037
3038         if (mask && !mask[i])
3039             goto next;
3040         
3041         width = image->bits.width;
3042         height = image->bits.height;
3043         x0 = pixman_fixed_to_int (x - pixman_fixed_e);
3044         y0 = pixman_fixed_to_int (y - pixman_fixed_e);
3045
3046         if (repeat_mode == PIXMAN_REPEAT_NONE &&
3047             (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width))
3048         {
3049             buffer[i] = 0;
3050         }
3051         else
3052         {
3053             uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
3054
3055             if (repeat_mode != PIXMAN_REPEAT_NONE)
3056             {
3057                 repeat (repeat_mode, &x0, width);
3058                 repeat (repeat_mode, &y0, height);
3059             }
3060
3061             row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0;
3062
3063             buffer[i] = convert_pixel (row, x0) | mask;
3064         }
3065
3066     next:
3067         x += ux;
3068         y += uy;
3069     }
3070 }
3071
3072 static force_inline uint32_t
3073 convert_a8r8g8b8 (const uint8_t *row, int x)
3074 {
3075     return *(((uint32_t *)row) + x);
3076 }
3077
3078 static force_inline uint32_t
3079 convert_x8r8g8b8 (const uint8_t *row, int x)
3080 {
3081     return *(((uint32_t *)row) + x);
3082 }
3083
3084 static force_inline uint32_t
3085 convert_a8 (const uint8_t *row, int x)
3086 {
3087     return *(row + x) << 24;
3088 }
3089
3090 static force_inline uint32_t
3091 convert_r5g6b5 (const uint8_t *row, int x)
3092 {
3093     return convert_0565_to_0888 (*((uint16_t *)row + x));
3094 }
3095
3096 #define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode)  \
3097     static uint32_t *                                                   \
3098     bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t   *iter, \
3099                                                             const uint32_t * mask) \
3100     {                                                                   \
3101         bits_image_fetch_separable_convolution_affine (                 \
3102             iter->image,                                                \
3103             iter->x, iter->y++,                                         \
3104             iter->width,                                                \
3105             iter->buffer, mask,                                         \
3106             convert_ ## format,                                         \
3107             PIXMAN_ ## format,                                          \
3108             repeat_mode);                                               \
3109                                                                         \
3110         return iter->buffer;                                            \
3111     }
3112
3113 #define MAKE_BILINEAR_FETCHER(name, format, repeat_mode)                \
3114     static uint32_t *                                                   \
3115     bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t   *iter,   \
3116                                                const uint32_t * mask)   \
3117     {                                                                   \
3118         bits_image_fetch_bilinear_affine (iter->image,                  \
3119                                           iter->x, iter->y++,           \
3120                                           iter->width,                  \
3121                                           iter->buffer, mask,           \
3122                                           convert_ ## format,           \
3123                                           PIXMAN_ ## format,            \
3124                                           repeat_mode);                 \
3125         return iter->buffer;                                            \
3126     }
3127
3128 #define MAKE_NEAREST_FETCHER(name, format, repeat_mode)                 \
3129     static uint32_t *                                                   \
3130     bits_image_fetch_nearest_affine_ ## name (pixman_iter_t   *iter,    \
3131                                               const uint32_t * mask)    \
3132     {                                                                   \
3133         bits_image_fetch_nearest_affine (iter->image,                   \
3134                                          iter->x, iter->y++,            \
3135                                          iter->width,                   \
3136                                          iter->buffer, mask,            \
3137                                          convert_ ## format,            \
3138                                          PIXMAN_ ## format,             \
3139                                          repeat_mode);                  \
3140         return iter->buffer;                                            \
3141     }
3142
3143 #define MAKE_FETCHERS(name, format, repeat_mode)                        \
3144     MAKE_NEAREST_FETCHER (name, format, repeat_mode)                    \
3145     MAKE_BILINEAR_FETCHER (name, format, repeat_mode)                   \
3146     MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode)
3147
3148 MAKE_FETCHERS (pad_a8r8g8b8,     a8r8g8b8, PIXMAN_REPEAT_PAD)
3149 MAKE_FETCHERS (none_a8r8g8b8,    a8r8g8b8, PIXMAN_REPEAT_NONE)
3150 MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT)
3151 MAKE_FETCHERS (normal_a8r8g8b8,  a8r8g8b8, PIXMAN_REPEAT_NORMAL)
3152 MAKE_FETCHERS (pad_x8r8g8b8,     x8r8g8b8, PIXMAN_REPEAT_PAD)
3153 MAKE_FETCHERS (none_x8r8g8b8,    x8r8g8b8, PIXMAN_REPEAT_NONE)
3154 MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT)
3155 MAKE_FETCHERS (normal_x8r8g8b8,  x8r8g8b8, PIXMAN_REPEAT_NORMAL)
3156 MAKE_FETCHERS (pad_a8,           a8,       PIXMAN_REPEAT_PAD)
3157 MAKE_FETCHERS (none_a8,          a8,       PIXMAN_REPEAT_NONE)
3158 MAKE_FETCHERS (reflect_a8,       a8,       PIXMAN_REPEAT_REFLECT)
3159 MAKE_FETCHERS (normal_a8,        a8,       PIXMAN_REPEAT_NORMAL)
3160 MAKE_FETCHERS (pad_r5g6b5,       r5g6b5,   PIXMAN_REPEAT_PAD)
3161 MAKE_FETCHERS (none_r5g6b5,      r5g6b5,   PIXMAN_REPEAT_NONE)
3162 MAKE_FETCHERS (reflect_r5g6b5,   r5g6b5,   PIXMAN_REPEAT_REFLECT)
3163 MAKE_FETCHERS (normal_r5g6b5,    r5g6b5,   PIXMAN_REPEAT_NORMAL)
3164
3165 #define IMAGE_FLAGS                                                     \
3166     (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |                \
3167      FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
3168
3169 static const pixman_iter_info_t fast_iters[] = 
3170 {
3171     { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC,
3172       _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL },
3173
3174     { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
3175       ITER_NARROW | ITER_DEST,
3176       _pixman_iter_init_bits_stride,
3177       fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
3178     
3179     { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
3180       ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA,
3181       _pixman_iter_init_bits_stride,
3182       fast_dest_fetch_noop, fast_write_back_r5g6b5 },
3183
3184     { PIXMAN_a8r8g8b8,
3185       (FAST_PATH_STANDARD_FLAGS                 |
3186        FAST_PATH_SCALE_TRANSFORM                |
3187        FAST_PATH_BILINEAR_FILTER                |
3188        FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
3189       ITER_NARROW | ITER_SRC,
3190       fast_bilinear_cover_iter_init,
3191       NULL, NULL
3192     },
3193
3194 #define FAST_BILINEAR_FLAGS                                             \
3195     (FAST_PATH_NO_ALPHA_MAP             |                               \
3196      FAST_PATH_NO_ACCESSORS             |                               \
3197      FAST_PATH_HAS_TRANSFORM            |                               \
3198      FAST_PATH_AFFINE_TRANSFORM         |                               \
3199      FAST_PATH_X_UNIT_POSITIVE          |                               \
3200      FAST_PATH_Y_UNIT_ZERO              |                               \
3201      FAST_PATH_NONE_REPEAT              |                               \
3202      FAST_PATH_BILINEAR_FILTER)
3203
3204     { PIXMAN_a8r8g8b8,
3205       FAST_BILINEAR_FLAGS,
3206       ITER_NARROW | ITER_SRC,
3207       NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
3208     },
3209
3210     { PIXMAN_x8r8g8b8,
3211       FAST_BILINEAR_FLAGS,
3212       ITER_NARROW | ITER_SRC,
3213       NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
3214     },
3215
3216 #define GENERAL_BILINEAR_FLAGS                                          \
3217     (FAST_PATH_NO_ALPHA_MAP             |                               \
3218      FAST_PATH_NO_ACCESSORS             |                               \
3219      FAST_PATH_HAS_TRANSFORM            |                               \
3220      FAST_PATH_AFFINE_TRANSFORM         |                               \
3221      FAST_PATH_BILINEAR_FILTER)
3222
3223 #define GENERAL_NEAREST_FLAGS                                           \
3224     (FAST_PATH_NO_ALPHA_MAP             |                               \
3225      FAST_PATH_NO_ACCESSORS             |                               \
3226      FAST_PATH_HAS_TRANSFORM            |                               \
3227      FAST_PATH_AFFINE_TRANSFORM         |                               \
3228      FAST_PATH_NEAREST_FILTER)
3229
3230 #define GENERAL_SEPARABLE_CONVOLUTION_FLAGS                             \
3231     (FAST_PATH_NO_ALPHA_MAP            |                                \
3232      FAST_PATH_NO_ACCESSORS            |                                \
3233      FAST_PATH_HAS_TRANSFORM           |                                \
3234      FAST_PATH_AFFINE_TRANSFORM        |                                \
3235      FAST_PATH_SEPARABLE_CONVOLUTION_FILTER)
3236     
3237 #define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)   \
3238     { PIXMAN_ ## format,                                                \
3239       GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
3240       ITER_NARROW | ITER_SRC,                                           \
3241       NULL, bits_image_fetch_separable_convolution_affine_ ## name, NULL \
3242     },
3243
3244 #define BILINEAR_AFFINE_FAST_PATH(name, format, repeat)                 \
3245     { PIXMAN_ ## format,                                                \
3246       GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,         \
3247       ITER_NARROW | ITER_SRC,                                           \
3248       NULL, bits_image_fetch_bilinear_affine_ ## name, NULL,            \
3249     },
3250
3251 #define NEAREST_AFFINE_FAST_PATH(name, format, repeat)                  \
3252     { PIXMAN_ ## format,                                                \
3253       GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,          \
3254       ITER_NARROW | ITER_SRC,                                           \
3255       NULL, bits_image_fetch_nearest_affine_ ## name, NULL              \
3256     },
3257
3258 #define AFFINE_FAST_PATHS(name, format, repeat)                         \
3259     SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)        \
3260     BILINEAR_AFFINE_FAST_PATH(name, format, repeat)                     \
3261     NEAREST_AFFINE_FAST_PATH(name, format, repeat)
3262     
3263     AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
3264     AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
3265     AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
3266     AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL)
3267     AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD)
3268     AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE)
3269     AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
3270     AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL)
3271     AFFINE_FAST_PATHS (pad_a8, a8, PAD)
3272     AFFINE_FAST_PATHS (none_a8, a8, NONE)
3273     AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT)
3274     AFFINE_FAST_PATHS (normal_a8, a8, NORMAL)
3275     AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD)
3276     AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE)
3277     AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT)
3278     AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL)
3279
3280     { PIXMAN_null },
3281 };
3282
3283 pixman_implementation_t *
3284 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
3285 {
3286     pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
3287
3288     imp->fill = fast_path_fill;
3289     imp->iter_info = fast_iters;
3290
3291     return imp;
3292 }