2 * Copyright © 2007 Luca Barbato
4 * Permission to use, copy, modify, distribute, and sell this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * the above copyright notice appear in all copies and that both that
7 * copyright notice and this permission notice appear in supporting
8 * documentation, and that the name of Luca Barbato not be used in advertising or
9 * publicity pertaining to distribution of the software without specific,
10 * written prior permission. Luca Barbato makes no representations about the
11 * suitability of this software for any purpose. It is provided "as is"
12 * without express or implied warranty.
14 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
23 * Author: Luca Barbato (lu_zero@gentoo.org)
25 * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
31 #include "pixman-private.h"
32 #include "pixman-combine32.h"
37 static force_inline vector unsigned int
38 splat_alpha (vector unsigned int pix)
40 return vec_perm (pix, pix,
41 (vector unsigned char)AVV (
42 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
43 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
46 static force_inline vector unsigned int
47 pix_multiply (vector unsigned int p, vector unsigned int a)
49 vector unsigned short hi, lo, mod;
52 hi = (vector unsigned short)
53 vec_mergeh ((vector unsigned char)AVV (0),
54 (vector unsigned char)p);
56 mod = (vector unsigned short)
57 vec_mergeh ((vector unsigned char)AVV (0),
58 (vector unsigned char)a);
60 hi = vec_mladd (hi, mod, (vector unsigned short)
61 AVV (0x0080, 0x0080, 0x0080, 0x0080,
62 0x0080, 0x0080, 0x0080, 0x0080));
64 hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
66 hi = vec_sr (hi, vec_splat_u16 (8));
69 lo = (vector unsigned short)
70 vec_mergel ((vector unsigned char)AVV (0),
71 (vector unsigned char)p);
72 mod = (vector unsigned short)
73 vec_mergel ((vector unsigned char)AVV (0),
74 (vector unsigned char)a);
76 lo = vec_mladd (lo, mod, (vector unsigned short)
77 AVV (0x0080, 0x0080, 0x0080, 0x0080,
78 0x0080, 0x0080, 0x0080, 0x0080));
80 lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
82 lo = vec_sr (lo, vec_splat_u16 (8));
84 return (vector unsigned int)vec_packsu (hi, lo);
87 static force_inline vector unsigned int
88 pix_add (vector unsigned int a, vector unsigned int b)
90 return (vector unsigned int)vec_adds ((vector unsigned char)a,
91 (vector unsigned char)b);
94 static force_inline vector unsigned int
95 pix_add_mul (vector unsigned int x,
96 vector unsigned int a,
97 vector unsigned int y,
98 vector unsigned int b)
100 vector unsigned int t1, t2;
102 t1 = pix_multiply (x, a);
103 t2 = pix_multiply (y, b);
105 return pix_add (t1, t2);
108 static force_inline vector unsigned int
109 negate (vector unsigned int src)
111 return vec_nor (src, src);
114 /* dest*~srca + src */
115 static force_inline vector unsigned int
116 over (vector unsigned int src,
117 vector unsigned int srca,
118 vector unsigned int dest)
120 vector unsigned char tmp = (vector unsigned char)
121 pix_multiply (dest, negate (srca));
123 tmp = vec_adds ((vector unsigned char)src, tmp);
124 return (vector unsigned int)tmp;
127 /* in == pix_multiply */
128 #define in_over(src, srca, mask, dest) \
129 over (pix_multiply (src, mask), \
130 pix_multiply (srca, mask), dest)
133 #define COMPUTE_SHIFT_MASK(source) \
134 source ## _mask = vec_lvsl (0, source);
136 #define COMPUTE_SHIFT_MASKS(dest, source) \
137 source ## _mask = vec_lvsl (0, source);
139 #define COMPUTE_SHIFT_MASKC(dest, source, mask) \
140 mask ## _mask = vec_lvsl (0, mask); \
141 source ## _mask = vec_lvsl (0, source);
143 /* notice you have to declare temp vars...
144 * Note: tmp3 and tmp4 must remain untouched!
147 #define LOAD_VECTORS(dest, source) \
148 tmp1 = (typeof(tmp1))vec_ld (0, source); \
149 tmp2 = (typeof(tmp2))vec_ld (15, source); \
150 v ## source = (typeof(v ## source)) \
151 vec_perm (tmp1, tmp2, source ## _mask); \
152 v ## dest = (typeof(v ## dest))vec_ld (0, dest);
154 #define LOAD_VECTORSC(dest, source, mask) \
155 tmp1 = (typeof(tmp1))vec_ld (0, source); \
156 tmp2 = (typeof(tmp2))vec_ld (15, source); \
157 v ## source = (typeof(v ## source)) \
158 vec_perm (tmp1, tmp2, source ## _mask); \
159 tmp1 = (typeof(tmp1))vec_ld (0, mask); \
160 v ## dest = (typeof(v ## dest))vec_ld (0, dest); \
161 tmp2 = (typeof(tmp2))vec_ld (15, mask); \
162 v ## mask = (typeof(v ## mask)) \
163 vec_perm (tmp1, tmp2, mask ## _mask);
165 #define LOAD_VECTORSM(dest, source, mask) \
166 LOAD_VECTORSC (dest, source, mask) \
167 v ## source = pix_multiply (v ## source, \
168 splat_alpha (v ## mask));
170 #define STORE_VECTOR(dest) \
171 vec_st ((vector unsigned int) v ## dest, 0, dest);
174 vmx_combine_over_u_no_mask (uint32_t * dest,
179 vector unsigned int vdest, vsrc;
180 vector unsigned char tmp1, tmp2, src_mask;
182 while (width && ((uintptr_t)dest & 15))
186 uint32_t ia = ALPHA_8 (~s);
188 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
194 COMPUTE_SHIFT_MASKS (dest, src);
196 /* printf ("%s\n",__PRETTY_FUNCTION__); */
197 for (i = width / 4; i > 0; i--)
200 LOAD_VECTORS (dest, src);
202 vdest = over (vsrc, splat_alpha (vsrc), vdest);
210 for (i = width % 4; --i >= 0;)
213 uint32_t d = dest[i];
214 uint32_t ia = ALPHA_8 (~s);
216 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
223 vmx_combine_over_u_mask (uint32_t * dest,
225 const uint32_t *mask,
229 vector unsigned int vdest, vsrc, vmask;
230 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
232 while (width && ((uintptr_t)dest & 15))
234 uint32_t m = ALPHA_8 (*mask++);
239 UN8x4_MUL_UN8 (s, m);
243 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
248 COMPUTE_SHIFT_MASKC (dest, src, mask);
250 /* printf ("%s\n",__PRETTY_FUNCTION__); */
251 for (i = width / 4; i > 0; i--)
253 LOAD_VECTORSM (dest, src, mask);
255 vdest = over (vsrc, splat_alpha (vsrc), vdest);
264 for (i = width % 4; --i >= 0;)
266 uint32_t m = ALPHA_8 (mask[i]);
268 uint32_t d = dest[i];
271 UN8x4_MUL_UN8 (s, m);
275 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
281 vmx_combine_over_u (pixman_implementation_t *imp,
284 const uint32_t * src,
285 const uint32_t * mask,
289 vmx_combine_over_u_mask (dest, src, mask, width);
291 vmx_combine_over_u_no_mask (dest, src, width);
295 vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
300 vector unsigned int vdest, vsrc;
301 vector unsigned char tmp1, tmp2, src_mask;
303 while (width && ((uintptr_t)dest & 15))
307 uint32_t ia = ALPHA_8 (~d);
309 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
314 COMPUTE_SHIFT_MASKS (dest, src);
316 /* printf ("%s\n",__PRETTY_FUNCTION__); */
317 for (i = width / 4; i > 0; i--)
320 LOAD_VECTORS (dest, src);
322 vdest = over (vdest, splat_alpha (vdest), vsrc);
330 for (i = width % 4; --i >= 0;)
333 uint32_t d = dest[i];
334 uint32_t ia = ALPHA_8 (~dest[i]);
336 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
342 vmx_combine_over_reverse_u_mask (uint32_t * dest,
344 const uint32_t *mask,
348 vector unsigned int vdest, vsrc, vmask;
349 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
351 while (width && ((uintptr_t)dest & 15))
353 uint32_t m = ALPHA_8 (*mask++);
356 uint32_t ia = ALPHA_8 (~d);
358 UN8x4_MUL_UN8 (s, m);
360 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
365 COMPUTE_SHIFT_MASKC (dest, src, mask);
367 /* printf ("%s\n",__PRETTY_FUNCTION__); */
368 for (i = width / 4; i > 0; i--)
371 LOAD_VECTORSM (dest, src, mask);
373 vdest = over (vdest, splat_alpha (vdest), vsrc);
382 for (i = width % 4; --i >= 0;)
384 uint32_t m = ALPHA_8 (mask[i]);
386 uint32_t d = dest[i];
387 uint32_t ia = ALPHA_8 (~dest[i]);
389 UN8x4_MUL_UN8 (s, m);
391 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
397 vmx_combine_over_reverse_u (pixman_implementation_t *imp,
400 const uint32_t * src,
401 const uint32_t * mask,
405 vmx_combine_over_reverse_u_mask (dest, src, mask, width);
407 vmx_combine_over_reverse_u_no_mask (dest, src, width);
411 vmx_combine_in_u_no_mask (uint32_t * dest,
416 vector unsigned int vdest, vsrc;
417 vector unsigned char tmp1, tmp2, src_mask;
419 while (width && ((uintptr_t)dest & 15))
422 uint32_t a = ALPHA_8 (*dest);
424 UN8x4_MUL_UN8 (s, a);
429 COMPUTE_SHIFT_MASKS (dest, src);
431 /* printf ("%s\n",__PRETTY_FUNCTION__); */
432 for (i = width / 4; i > 0; i--)
434 LOAD_VECTORS (dest, src);
436 vdest = pix_multiply (vsrc, splat_alpha (vdest));
444 for (i = width % 4; --i >= 0;)
447 uint32_t a = ALPHA_8 (dest[i]);
449 UN8x4_MUL_UN8 (s, a);
455 vmx_combine_in_u_mask (uint32_t * dest,
457 const uint32_t *mask,
461 vector unsigned int vdest, vsrc, vmask;
462 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
464 while (width && ((uintptr_t)dest & 15))
466 uint32_t m = ALPHA_8 (*mask++);
468 uint32_t a = ALPHA_8 (*dest);
470 UN8x4_MUL_UN8 (s, m);
471 UN8x4_MUL_UN8 (s, a);
477 COMPUTE_SHIFT_MASKC (dest, src, mask);
479 /* printf ("%s\n",__PRETTY_FUNCTION__); */
480 for (i = width / 4; i > 0; i--)
482 LOAD_VECTORSM (dest, src, mask);
484 vdest = pix_multiply (vsrc, splat_alpha (vdest));
493 for (i = width % 4; --i >= 0;)
495 uint32_t m = ALPHA_8 (mask[i]);
497 uint32_t a = ALPHA_8 (dest[i]);
499 UN8x4_MUL_UN8 (s, m);
500 UN8x4_MUL_UN8 (s, a);
507 vmx_combine_in_u (pixman_implementation_t *imp,
510 const uint32_t * src,
511 const uint32_t * mask,
515 vmx_combine_in_u_mask (dest, src, mask, width);
517 vmx_combine_in_u_no_mask (dest, src, width);
521 vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
526 vector unsigned int vdest, vsrc;
527 vector unsigned char tmp1, tmp2, src_mask;
529 while (width && ((uintptr_t)dest & 15))
532 uint32_t a = ALPHA_8 (*src++);
534 UN8x4_MUL_UN8 (d, a);
540 COMPUTE_SHIFT_MASKS (dest, src);
542 /* printf ("%s\n",__PRETTY_FUNCTION__); */
543 for (i = width / 4; i > 0; i--)
545 LOAD_VECTORS (dest, src);
547 vdest = pix_multiply (vdest, splat_alpha (vsrc));
555 for (i = width % 4; --i >= 0;)
557 uint32_t d = dest[i];
558 uint32_t a = ALPHA_8 (src[i]);
560 UN8x4_MUL_UN8 (d, a);
567 vmx_combine_in_reverse_u_mask (uint32_t * dest,
569 const uint32_t *mask,
573 vector unsigned int vdest, vsrc, vmask;
574 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
576 while (width && ((uintptr_t)dest & 15))
578 uint32_t m = ALPHA_8 (*mask++);
582 UN8x4_MUL_UN8 (a, m);
584 UN8x4_MUL_UN8 (d, a);
590 COMPUTE_SHIFT_MASKC (dest, src, mask);
592 /* printf ("%s\n",__PRETTY_FUNCTION__); */
593 for (i = width / 4; i > 0; i--)
595 LOAD_VECTORSM (dest, src, mask);
597 vdest = pix_multiply (vdest, splat_alpha (vsrc));
606 for (i = width % 4; --i >= 0;)
608 uint32_t m = ALPHA_8 (mask[i]);
609 uint32_t d = dest[i];
612 UN8x4_MUL_UN8 (a, m);
614 UN8x4_MUL_UN8 (d, a);
621 vmx_combine_in_reverse_u (pixman_implementation_t *imp,
624 const uint32_t * src,
625 const uint32_t * mask,
629 vmx_combine_in_reverse_u_mask (dest, src, mask, width);
631 vmx_combine_in_reverse_u_no_mask (dest, src, width);
635 vmx_combine_out_u_no_mask (uint32_t * dest,
640 vector unsigned int vdest, vsrc;
641 vector unsigned char tmp1, tmp2, src_mask;
643 while (width && ((uintptr_t)dest & 15))
646 uint32_t a = ALPHA_8 (~(*dest));
648 UN8x4_MUL_UN8 (s, a);
654 COMPUTE_SHIFT_MASKS (dest, src);
656 /* printf ("%s\n",__PRETTY_FUNCTION__); */
657 for (i = width / 4; i > 0; i--)
659 LOAD_VECTORS (dest, src);
661 vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
669 for (i = width % 4; --i >= 0;)
672 uint32_t a = ALPHA_8 (~dest[i]);
674 UN8x4_MUL_UN8 (s, a);
681 vmx_combine_out_u_mask (uint32_t * dest,
683 const uint32_t *mask,
687 vector unsigned int vdest, vsrc, vmask;
688 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
690 while (width && ((uintptr_t)dest & 15))
692 uint32_t m = ALPHA_8 (*mask++);
694 uint32_t a = ALPHA_8 (~(*dest));
696 UN8x4_MUL_UN8 (s, m);
697 UN8x4_MUL_UN8 (s, a);
703 COMPUTE_SHIFT_MASKC (dest, src, mask);
705 /* printf ("%s\n",__PRETTY_FUNCTION__); */
706 for (i = width / 4; i > 0; i--)
708 LOAD_VECTORSM (dest, src, mask);
710 vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
719 for (i = width % 4; --i >= 0;)
721 uint32_t m = ALPHA_8 (mask[i]);
723 uint32_t a = ALPHA_8 (~dest[i]);
725 UN8x4_MUL_UN8 (s, m);
726 UN8x4_MUL_UN8 (s, a);
733 vmx_combine_out_u (pixman_implementation_t *imp,
736 const uint32_t * src,
737 const uint32_t * mask,
741 vmx_combine_out_u_mask (dest, src, mask, width);
743 vmx_combine_out_u_no_mask (dest, src, width);
747 vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
752 vector unsigned int vdest, vsrc;
753 vector unsigned char tmp1, tmp2, src_mask;
755 while (width && ((uintptr_t)dest & 15))
758 uint32_t a = ALPHA_8 (~(*src++));
760 UN8x4_MUL_UN8 (d, a);
766 COMPUTE_SHIFT_MASKS (dest, src);
768 /* printf ("%s\n",__PRETTY_FUNCTION__); */
769 for (i = width / 4; i > 0; i--)
772 LOAD_VECTORS (dest, src);
774 vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
782 for (i = width % 4; --i >= 0;)
784 uint32_t d = dest[i];
785 uint32_t a = ALPHA_8 (~src[i]);
787 UN8x4_MUL_UN8 (d, a);
794 vmx_combine_out_reverse_u_mask (uint32_t * dest,
796 const uint32_t *mask,
800 vector unsigned int vdest, vsrc, vmask;
801 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
803 while (width && ((uintptr_t)dest & 15))
805 uint32_t m = ALPHA_8 (*mask++);
809 UN8x4_MUL_UN8 (a, m);
811 UN8x4_MUL_UN8 (d, a);
817 COMPUTE_SHIFT_MASKC (dest, src, mask);
819 /* printf ("%s\n",__PRETTY_FUNCTION__); */
820 for (i = width / 4; i > 0; i--)
822 LOAD_VECTORSM (dest, src, mask);
824 vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
833 for (i = width % 4; --i >= 0;)
835 uint32_t m = ALPHA_8 (mask[i]);
836 uint32_t d = dest[i];
839 UN8x4_MUL_UN8 (a, m);
841 UN8x4_MUL_UN8 (d, a);
848 vmx_combine_out_reverse_u (pixman_implementation_t *imp,
851 const uint32_t * src,
852 const uint32_t * mask,
856 vmx_combine_out_reverse_u_mask (dest, src, mask, width);
858 vmx_combine_out_reverse_u_no_mask (dest, src, width);
862 vmx_combine_atop_u_no_mask (uint32_t * dest,
867 vector unsigned int vdest, vsrc;
868 vector unsigned char tmp1, tmp2, src_mask;
870 while (width && ((uintptr_t)dest & 15))
874 uint32_t dest_a = ALPHA_8 (d);
875 uint32_t src_ia = ALPHA_8 (~s);
877 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
883 COMPUTE_SHIFT_MASKS (dest, src);
885 /* printf ("%s\n",__PRETTY_FUNCTION__); */
886 for (i = width / 4; i > 0; i--)
888 LOAD_VECTORS (dest, src);
890 vdest = pix_add_mul (vsrc, splat_alpha (vdest),
891 vdest, splat_alpha (negate (vsrc)));
899 for (i = width % 4; --i >= 0;)
902 uint32_t d = dest[i];
903 uint32_t dest_a = ALPHA_8 (d);
904 uint32_t src_ia = ALPHA_8 (~s);
906 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
913 vmx_combine_atop_u_mask (uint32_t * dest,
915 const uint32_t *mask,
919 vector unsigned int vdest, vsrc, vmask;
920 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
922 while (width && ((uintptr_t)dest & 15))
924 uint32_t m = ALPHA_8 (*mask++);
927 uint32_t dest_a = ALPHA_8 (d);
930 UN8x4_MUL_UN8 (s, m);
932 src_ia = ALPHA_8 (~s);
934 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
940 COMPUTE_SHIFT_MASKC (dest, src, mask);
942 /* printf ("%s\n",__PRETTY_FUNCTION__); */
943 for (i = width / 4; i > 0; i--)
945 LOAD_VECTORSM (dest, src, mask);
947 vdest = pix_add_mul (vsrc, splat_alpha (vdest),
948 vdest, splat_alpha (negate (vsrc)));
957 for (i = width % 4; --i >= 0;)
959 uint32_t m = ALPHA_8 (mask[i]);
961 uint32_t d = dest[i];
962 uint32_t dest_a = ALPHA_8 (d);
965 UN8x4_MUL_UN8 (s, m);
967 src_ia = ALPHA_8 (~s);
969 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
976 vmx_combine_atop_u (pixman_implementation_t *imp,
979 const uint32_t * src,
980 const uint32_t * mask,
984 vmx_combine_atop_u_mask (dest, src, mask, width);
986 vmx_combine_atop_u_no_mask (dest, src, width);
990 vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
995 vector unsigned int vdest, vsrc;
996 vector unsigned char tmp1, tmp2, src_mask;
998 while (width && ((uintptr_t)dest & 15))
1000 uint32_t s = *src++;
1002 uint32_t src_a = ALPHA_8 (s);
1003 uint32_t dest_ia = ALPHA_8 (~d);
1005 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
1011 COMPUTE_SHIFT_MASKS (dest, src);
1013 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1014 for (i = width / 4; i > 0; i--)
1016 LOAD_VECTORS (dest, src);
1018 vdest = pix_add_mul (vdest, splat_alpha (vsrc),
1019 vsrc, splat_alpha (negate (vdest)));
1021 STORE_VECTOR (dest);
1027 for (i = width % 4; --i >= 0;)
1029 uint32_t s = src[i];
1030 uint32_t d = dest[i];
1031 uint32_t src_a = ALPHA_8 (s);
1032 uint32_t dest_ia = ALPHA_8 (~d);
1034 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
1041 vmx_combine_atop_reverse_u_mask (uint32_t * dest,
1042 const uint32_t *src,
1043 const uint32_t *mask,
1047 vector unsigned int vdest, vsrc, vmask;
1048 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
1050 while (width && ((uintptr_t)dest & 15))
1052 uint32_t m = ALPHA_8 (*mask++);
1053 uint32_t s = *src++;
1056 uint32_t dest_ia = ALPHA_8 (~d);
1058 UN8x4_MUL_UN8 (s, m);
1060 src_a = ALPHA_8 (s);
1062 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
1068 COMPUTE_SHIFT_MASKC (dest, src, mask);
1070 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1071 for (i = width / 4; i > 0; i--)
1073 LOAD_VECTORSM (dest, src, mask);
1075 vdest = pix_add_mul (vdest, splat_alpha (vsrc),
1076 vsrc, splat_alpha (negate (vdest)));
1078 STORE_VECTOR (dest);
1085 for (i = width % 4; --i >= 0;)
1087 uint32_t m = ALPHA_8 (mask[i]);
1088 uint32_t s = src[i];
1089 uint32_t d = dest[i];
1091 uint32_t dest_ia = ALPHA_8 (~d);
1093 UN8x4_MUL_UN8 (s, m);
1095 src_a = ALPHA_8 (s);
1097 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
1104 vmx_combine_atop_reverse_u (pixman_implementation_t *imp,
1107 const uint32_t * src,
1108 const uint32_t * mask,
1112 vmx_combine_atop_reverse_u_mask (dest, src, mask, width);
1114 vmx_combine_atop_reverse_u_no_mask (dest, src, width);
1118 vmx_combine_xor_u_no_mask (uint32_t * dest,
1119 const uint32_t *src,
1123 vector unsigned int vdest, vsrc;
1124 vector unsigned char tmp1, tmp2, src_mask;
1126 while (width && ((uintptr_t)dest & 15))
1128 uint32_t s = *src++;
1130 uint32_t src_ia = ALPHA_8 (~s);
1131 uint32_t dest_ia = ALPHA_8 (~d);
1133 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1139 COMPUTE_SHIFT_MASKS (dest, src);
1141 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1142 for (i = width / 4; i > 0; i--)
1144 LOAD_VECTORS (dest, src);
1146 vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
1147 vdest, splat_alpha (negate (vsrc)));
1149 STORE_VECTOR (dest);
1155 for (i = width % 4; --i >= 0;)
1157 uint32_t s = src[i];
1158 uint32_t d = dest[i];
1159 uint32_t src_ia = ALPHA_8 (~s);
1160 uint32_t dest_ia = ALPHA_8 (~d);
1162 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1169 vmx_combine_xor_u_mask (uint32_t * dest,
1170 const uint32_t *src,
1171 const uint32_t *mask,
1175 vector unsigned int vdest, vsrc, vmask;
1176 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
1178 while (width && ((uintptr_t)dest & 15))
1180 uint32_t m = ALPHA_8 (*mask++);
1181 uint32_t s = *src++;
1184 uint32_t dest_ia = ALPHA_8 (~d);
1186 UN8x4_MUL_UN8 (s, m);
1188 src_ia = ALPHA_8 (~s);
1190 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1196 COMPUTE_SHIFT_MASKC (dest, src, mask);
1198 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1199 for (i = width / 4; i > 0; i--)
1201 LOAD_VECTORSM (dest, src, mask);
1203 vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
1204 vdest, splat_alpha (negate (vsrc)));
1206 STORE_VECTOR (dest);
1213 for (i = width % 4; --i >= 0;)
1215 uint32_t m = ALPHA_8 (mask[i]);
1216 uint32_t s = src[i];
1217 uint32_t d = dest[i];
1219 uint32_t dest_ia = ALPHA_8 (~d);
1221 UN8x4_MUL_UN8 (s, m);
1223 src_ia = ALPHA_8 (~s);
1225 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1232 vmx_combine_xor_u (pixman_implementation_t *imp,
1235 const uint32_t * src,
1236 const uint32_t * mask,
1240 vmx_combine_xor_u_mask (dest, src, mask, width);
1242 vmx_combine_xor_u_no_mask (dest, src, width);
1246 vmx_combine_add_u_no_mask (uint32_t * dest,
1247 const uint32_t *src,
1251 vector unsigned int vdest, vsrc;
1252 vector unsigned char tmp1, tmp2, src_mask;
1254 while (width && ((uintptr_t)dest & 15))
1256 uint32_t s = *src++;
1259 UN8x4_ADD_UN8x4 (d, s);
1265 COMPUTE_SHIFT_MASKS (dest, src);
1266 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1267 for (i = width / 4; i > 0; i--)
1269 LOAD_VECTORS (dest, src);
1271 vdest = pix_add (vsrc, vdest);
1273 STORE_VECTOR (dest);
1279 for (i = width % 4; --i >= 0;)
1281 uint32_t s = src[i];
1282 uint32_t d = dest[i];
1284 UN8x4_ADD_UN8x4 (d, s);
1291 vmx_combine_add_u_mask (uint32_t * dest,
1292 const uint32_t *src,
1293 const uint32_t *mask,
1297 vector unsigned int vdest, vsrc, vmask;
1298 vector unsigned char tmp1, tmp2, src_mask, mask_mask;
1300 while (width && ((uintptr_t)dest & 15))
1302 uint32_t m = ALPHA_8 (*mask++);
1303 uint32_t s = *src++;
1306 UN8x4_MUL_UN8 (s, m);
1307 UN8x4_ADD_UN8x4 (d, s);
1313 COMPUTE_SHIFT_MASKC (dest, src, mask);
1315 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1316 for (i = width / 4; i > 0; i--)
1318 LOAD_VECTORSM (dest, src, mask);
1320 vdest = pix_add (vsrc, vdest);
1322 STORE_VECTOR (dest);
1329 for (i = width % 4; --i >= 0;)
1331 uint32_t m = ALPHA_8 (mask[i]);
1332 uint32_t s = src[i];
1333 uint32_t d = dest[i];
1335 UN8x4_MUL_UN8 (s, m);
1336 UN8x4_ADD_UN8x4 (d, s);
1343 vmx_combine_add_u (pixman_implementation_t *imp,
1346 const uint32_t * src,
1347 const uint32_t * mask,
1351 vmx_combine_add_u_mask (dest, src, mask, width);
1353 vmx_combine_add_u_no_mask (dest, src, width);
1357 vmx_combine_src_ca (pixman_implementation_t *imp,
1360 const uint32_t * src,
1361 const uint32_t * mask,
1365 vector unsigned int vdest, vsrc, vmask;
1366 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1368 while (width && ((uintptr_t)dest & 15))
1370 uint32_t a = *mask++;
1371 uint32_t s = *src++;
1373 UN8x4_MUL_UN8x4 (s, a);
1379 COMPUTE_SHIFT_MASKC (dest, src, mask);
1381 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1382 for (i = width / 4; i > 0; i--)
1384 LOAD_VECTORSC (dest, src, mask);
1386 vdest = pix_multiply (vsrc, vmask);
1388 STORE_VECTOR (dest);
1395 for (i = width % 4; --i >= 0;)
1397 uint32_t a = mask[i];
1398 uint32_t s = src[i];
1400 UN8x4_MUL_UN8x4 (s, a);
1407 vmx_combine_over_ca (pixman_implementation_t *imp,
1410 const uint32_t * src,
1411 const uint32_t * mask,
1415 vector unsigned int vdest, vsrc, vmask;
1416 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1418 while (width && ((uintptr_t)dest & 15))
1420 uint32_t a = *mask++;
1421 uint32_t s = *src++;
1423 uint32_t sa = ALPHA_8 (s);
1425 UN8x4_MUL_UN8x4 (s, a);
1426 UN8x4_MUL_UN8 (a, sa);
1427 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
1433 COMPUTE_SHIFT_MASKC (dest, src, mask);
1435 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1436 for (i = width / 4; i > 0; i--)
1438 LOAD_VECTORSC (dest, src, mask);
1440 vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
1442 STORE_VECTOR (dest);
1449 for (i = width % 4; --i >= 0;)
1451 uint32_t a = mask[i];
1452 uint32_t s = src[i];
1453 uint32_t d = dest[i];
1454 uint32_t sa = ALPHA_8 (s);
1456 UN8x4_MUL_UN8x4 (s, a);
1457 UN8x4_MUL_UN8 (a, sa);
1458 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
1465 vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
1468 const uint32_t * src,
1469 const uint32_t * mask,
1473 vector unsigned int vdest, vsrc, vmask;
1474 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1476 while (width && ((uintptr_t)dest & 15))
1478 uint32_t a = *mask++;
1479 uint32_t s = *src++;
1481 uint32_t ida = ALPHA_8 (~d);
1483 UN8x4_MUL_UN8x4 (s, a);
1484 UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
1490 COMPUTE_SHIFT_MASKC (dest, src, mask);
1492 /* printf("%s\n",__PRETTY_FUNCTION__); */
1493 for (i = width / 4; i > 0; i--)
1495 LOAD_VECTORSC (dest, src, mask);
1497 vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
1499 STORE_VECTOR (dest);
1506 for (i = width % 4; --i >= 0;)
1508 uint32_t a = mask[i];
1509 uint32_t s = src[i];
1510 uint32_t d = dest[i];
1511 uint32_t ida = ALPHA_8 (~d);
1513 UN8x4_MUL_UN8x4 (s, a);
1514 UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
1521 vmx_combine_in_ca (pixman_implementation_t *imp,
1524 const uint32_t * src,
1525 const uint32_t * mask,
1529 vector unsigned int vdest, vsrc, vmask;
1530 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1532 while (width && ((uintptr_t)dest & 15))
1534 uint32_t a = *mask++;
1535 uint32_t s = *src++;
1536 uint32_t da = ALPHA_8 (*dest);
1538 UN8x4_MUL_UN8x4 (s, a);
1539 UN8x4_MUL_UN8 (s, da);
1545 COMPUTE_SHIFT_MASKC (dest, src, mask);
1547 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1548 for (i = width / 4; i > 0; i--)
1550 LOAD_VECTORSC (dest, src, mask);
1552 vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
1554 STORE_VECTOR (dest);
1561 for (i = width % 4; --i >= 0;)
1563 uint32_t a = mask[i];
1564 uint32_t s = src[i];
1565 uint32_t da = ALPHA_8 (dest[i]);
1567 UN8x4_MUL_UN8x4 (s, a);
1568 UN8x4_MUL_UN8 (s, da);
1575 vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
1578 const uint32_t * src,
1579 const uint32_t * mask,
1583 vector unsigned int vdest, vsrc, vmask;
1584 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1586 while (width && ((uintptr_t)dest & 15))
1588 uint32_t a = *mask++;
1590 uint32_t sa = ALPHA_8 (*src++);
1592 UN8x4_MUL_UN8 (a, sa);
1593 UN8x4_MUL_UN8x4 (d, a);
1599 COMPUTE_SHIFT_MASKC (dest, src, mask);
1601 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1602 for (i = width / 4; i > 0; i--)
1605 LOAD_VECTORSC (dest, src, mask);
1607 vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
1609 STORE_VECTOR (dest);
1616 for (i = width % 4; --i >= 0;)
1618 uint32_t a = mask[i];
1619 uint32_t d = dest[i];
1620 uint32_t sa = ALPHA_8 (src[i]);
1622 UN8x4_MUL_UN8 (a, sa);
1623 UN8x4_MUL_UN8x4 (d, a);
1630 vmx_combine_out_ca (pixman_implementation_t *imp,
1633 const uint32_t * src,
1634 const uint32_t * mask,
1638 vector unsigned int vdest, vsrc, vmask;
1639 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1641 while (width && ((uintptr_t)dest & 15))
1643 uint32_t a = *mask++;
1644 uint32_t s = *src++;
1646 uint32_t da = ALPHA_8 (~d);
1648 UN8x4_MUL_UN8x4 (s, a);
1649 UN8x4_MUL_UN8 (s, da);
1655 COMPUTE_SHIFT_MASKC (dest, src, mask);
1657 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1658 for (i = width / 4; i > 0; i--)
1660 LOAD_VECTORSC (dest, src, mask);
1662 vdest = pix_multiply (
1663 pix_multiply (vsrc, vmask), splat_alpha (negate (vdest)));
1665 STORE_VECTOR (dest);
1672 for (i = width % 4; --i >= 0;)
1674 uint32_t a = mask[i];
1675 uint32_t s = src[i];
1676 uint32_t d = dest[i];
1677 uint32_t da = ALPHA_8 (~d);
1679 UN8x4_MUL_UN8x4 (s, a);
1680 UN8x4_MUL_UN8 (s, da);
1687 vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
1690 const uint32_t * src,
1691 const uint32_t * mask,
1695 vector unsigned int vdest, vsrc, vmask;
1696 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1698 while (width && ((uintptr_t)dest & 15))
1700 uint32_t a = *mask++;
1701 uint32_t s = *src++;
1703 uint32_t sa = ALPHA_8 (s);
1705 UN8x4_MUL_UN8 (a, sa);
1706 UN8x4_MUL_UN8x4 (d, ~a);
1712 COMPUTE_SHIFT_MASKC (dest, src, mask);
1714 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1715 for (i = width / 4; i > 0; i--)
1717 LOAD_VECTORSC (dest, src, mask);
1719 vdest = pix_multiply (
1720 vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))));
1722 STORE_VECTOR (dest);
1729 for (i = width % 4; --i >= 0;)
1731 uint32_t a = mask[i];
1732 uint32_t s = src[i];
1733 uint32_t d = dest[i];
1734 uint32_t sa = ALPHA_8 (s);
1736 UN8x4_MUL_UN8 (a, sa);
1737 UN8x4_MUL_UN8x4 (d, ~a);
1744 vmx_combine_atop_ca (pixman_implementation_t *imp,
1747 const uint32_t * src,
1748 const uint32_t * mask,
1752 vector unsigned int vdest, vsrc, vmask, vsrca;
1753 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1755 while (width && ((uintptr_t)dest & 15))
1757 uint32_t a = *mask++;
1758 uint32_t s = *src++;
1760 uint32_t sa = ALPHA_8 (s);
1761 uint32_t da = ALPHA_8 (d);
1763 UN8x4_MUL_UN8x4 (s, a);
1764 UN8x4_MUL_UN8 (a, sa);
1765 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1771 COMPUTE_SHIFT_MASKC (dest, src, mask);
1773 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1774 for (i = width / 4; i > 0; i--)
1776 LOAD_VECTORSC (dest, src, mask);
1778 vsrca = splat_alpha (vsrc);
1780 vsrc = pix_multiply (vsrc, vmask);
1781 vmask = pix_multiply (vmask, vsrca);
1783 vdest = pix_add_mul (vsrc, splat_alpha (vdest),
1784 negate (vmask), vdest);
1786 STORE_VECTOR (dest);
1793 for (i = width % 4; --i >= 0;)
1795 uint32_t a = mask[i];
1796 uint32_t s = src[i];
1797 uint32_t d = dest[i];
1798 uint32_t sa = ALPHA_8 (s);
1799 uint32_t da = ALPHA_8 (d);
1801 UN8x4_MUL_UN8x4 (s, a);
1802 UN8x4_MUL_UN8 (a, sa);
1803 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1810 vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
1813 const uint32_t * src,
1814 const uint32_t * mask,
1818 vector unsigned int vdest, vsrc, vmask;
1819 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1821 while (width && ((uintptr_t)dest & 15))
1823 uint32_t a = *mask++;
1824 uint32_t s = *src++;
1826 uint32_t sa = ALPHA_8 (s);
1827 uint32_t da = ALPHA_8 (~d);
1829 UN8x4_MUL_UN8x4 (s, a);
1830 UN8x4_MUL_UN8 (a, sa);
1831 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
1837 COMPUTE_SHIFT_MASKC (dest, src, mask);
1839 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1840 for (i = width / 4; i > 0; i--)
1842 LOAD_VECTORSC (dest, src, mask);
1844 vdest = pix_add_mul (vdest,
1845 pix_multiply (vmask, splat_alpha (vsrc)),
1846 pix_multiply (vsrc, vmask),
1847 negate (splat_alpha (vdest)));
1849 STORE_VECTOR (dest);
1856 for (i = width % 4; --i >= 0;)
1858 uint32_t a = mask[i];
1859 uint32_t s = src[i];
1860 uint32_t d = dest[i];
1861 uint32_t sa = ALPHA_8 (s);
1862 uint32_t da = ALPHA_8 (~d);
1864 UN8x4_MUL_UN8x4 (s, a);
1865 UN8x4_MUL_UN8 (a, sa);
1866 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
1873 vmx_combine_xor_ca (pixman_implementation_t *imp,
1876 const uint32_t * src,
1877 const uint32_t * mask,
1881 vector unsigned int vdest, vsrc, vmask;
1882 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1884 while (width && ((uintptr_t)dest & 15))
1886 uint32_t a = *mask++;
1887 uint32_t s = *src++;
1889 uint32_t sa = ALPHA_8 (s);
1890 uint32_t da = ALPHA_8 (~d);
1892 UN8x4_MUL_UN8x4 (s, a);
1893 UN8x4_MUL_UN8 (a, sa);
1894 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1900 COMPUTE_SHIFT_MASKC (dest, src, mask);
1902 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1903 for (i = width / 4; i > 0; i--)
1905 LOAD_VECTORSC (dest, src, mask);
1907 vdest = pix_add_mul (vdest,
1908 negate (pix_multiply (vmask, splat_alpha (vsrc))),
1909 pix_multiply (vsrc, vmask),
1910 negate (splat_alpha (vdest)));
1912 STORE_VECTOR (dest);
1919 for (i = width % 4; --i >= 0;)
1921 uint32_t a = mask[i];
1922 uint32_t s = src[i];
1923 uint32_t d = dest[i];
1924 uint32_t sa = ALPHA_8 (s);
1925 uint32_t da = ALPHA_8 (~d);
1927 UN8x4_MUL_UN8x4 (s, a);
1928 UN8x4_MUL_UN8 (a, sa);
1929 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1936 vmx_combine_add_ca (pixman_implementation_t *imp,
1939 const uint32_t * src,
1940 const uint32_t * mask,
1944 vector unsigned int vdest, vsrc, vmask;
1945 vector unsigned char tmp1, tmp2, mask_mask, src_mask;
1947 while (width && ((uintptr_t)dest & 15))
1949 uint32_t a = *mask++;
1950 uint32_t s = *src++;
1953 UN8x4_MUL_UN8x4 (s, a);
1954 UN8x4_ADD_UN8x4 (s, d);
1960 COMPUTE_SHIFT_MASKC (dest, src, mask);
1962 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1963 for (i = width / 4; i > 0; i--)
1965 LOAD_VECTORSC (dest, src, mask);
1967 vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
1969 STORE_VECTOR (dest);
1976 for (i = width % 4; --i >= 0;)
1978 uint32_t a = mask[i];
1979 uint32_t s = src[i];
1980 uint32_t d = dest[i];
1982 UN8x4_MUL_UN8x4 (s, a);
1983 UN8x4_ADD_UN8x4 (s, d);
1989 static const pixman_fast_path_t vmx_fast_paths[] =
1994 pixman_implementation_t *
1995 _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
1997 pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
1999 /* Set up function pointers */
2001 imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;
2002 imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u;
2003 imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u;
2004 imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u;
2005 imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u;
2006 imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u;
2007 imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u;
2008 imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u;
2009 imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u;
2011 imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u;
2013 imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca;
2014 imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca;
2015 imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca;
2016 imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca;
2017 imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca;
2018 imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca;
2019 imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca;
2020 imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca;
2021 imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca;
2022 imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
2023 imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;