mirror of
https://github.com/SwallowOS/image_pixman
synced 2025-12-26 15:21:04 +08:00
vmx: add helper functions
This patch adds the following helper functions for reuse of code,
hiding BE/LE differences and maintainability.
All of the functions were defined as static force_inline.
Names were copied from pixman-sse2.c so conversion of fast-paths between
sse2 and vmx would be easier from now on. Therefore, I tried to keep the
input/output of the functions to be as close as possible to the sse2
definitions.
The functions are:
- load_128_aligned : load 128-bit from a 16-byte aligned memory
address into a vector
- load_128_unaligned : load 128-bit from memory into a vector,
without guarantee of alignment for the
source pointer
- save_128_aligned : save 128-bit vector into a 16-byte aligned
memory address
- create_mask_16_128 : take a 16-bit value and fill with it
a new vector
- create_mask_1x32_128 : take a 32-bit pointer and fill a new
vector with the 32-bit value from that pointer
- create_mask_32_128 : take a 32-bit value and fill with it
a new vector
- unpack_32_1x128 : unpack 32-bit value into a vector
- unpacklo_128_16x8 : unpack the eight low 8-bit values of a vector
- unpackhi_128_16x8 : unpack the eight high 8-bit values of a vector
- unpacklo_128_8x16 : unpack the four low 16-bit values of a vector
- unpackhi_128_8x16 : unpack the four high 16-bit values of a vector
- unpack_128_2x128 : unpack the eight low 8-bit values of a vector
into one vector and the eight high 8-bit
values into another vector
- unpack_128_2x128_16 : unpack the four low 16-bit values of a vector
into one vector and the four high 16-bit
values into another vector
- unpack_565_to_8888 : unpack an RGB_565 vector to 8888 vector
- pack_1x128_32 : pack a vector and return the LSB 32-bit of it
- pack_2x128_128 : pack two vectors into one and return it
- negate_2x128 : xor two vectors with mask_00ff (separately)
- is_opaque : returns whether all the pixels contained in
the vector are opaque
- is_zero : returns whether the vector equals 0
- is_transparent : returns whether all the pixels
contained in the vector are transparent
- expand_pixel_8_1x128 : expand an 8-bit pixel into lower 8 bytes of a
vector
- expand_alpha_1x128 : expand alpha from vector and return the new
vector
- expand_alpha_2x128 : expand alpha from one vector and another alpha
from a second vector
- expand_alpha_rev_2x128 : expand a reversed alpha from one vector and
another reversed alpha from a second vector
- pix_multiply_2x128 : do pix_multiply for two vectors (separately)
- over_2x128 : perform over op. on two vectors
- in_over_2x128 : perform in-over op. on two vectors
v2: removed expand_pixel_32_1x128 as it was not used by any function and
its implementation was erroneous
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
This commit is contained in:
@@ -30,10 +30,19 @@
|
||||
#endif
|
||||
#include "pixman-private.h"
|
||||
#include "pixman-combine32.h"
|
||||
#include "pixman-inlines.h"
|
||||
#include <altivec.h>
|
||||
|
||||
#define AVV(x...) {x}
|
||||
|
||||
static vector unsigned int mask_00ff;
|
||||
static vector unsigned int mask_ff000000;
|
||||
static vector unsigned int mask_red;
|
||||
static vector unsigned int mask_green;
|
||||
static vector unsigned int mask_blue;
|
||||
static vector unsigned int mask_565_fix_rb;
|
||||
static vector unsigned int mask_565_fix_g;
|
||||
|
||||
static force_inline vector unsigned int
|
||||
splat_alpha (vector unsigned int pix)
|
||||
{
|
||||
@@ -233,6 +242,464 @@ do \
|
||||
#define STORE_VECTOR(dest) \
|
||||
vec_st ((vector unsigned int) v ## dest, 0, dest);
|
||||
|
||||
/* load 4 pixels from a 16-byte boundary aligned address */
|
||||
static force_inline vector unsigned int
|
||||
load_128_aligned (const uint32_t* src)
|
||||
{
|
||||
return *((vector unsigned int *) src);
|
||||
}
|
||||
|
||||
/* load 4 pixels from a unaligned address */
|
||||
static force_inline vector unsigned int
|
||||
load_128_unaligned (const uint32_t* src)
|
||||
{
|
||||
vector unsigned int vsrc;
|
||||
DECLARE_SRC_MASK_VAR;
|
||||
|
||||
COMPUTE_SHIFT_MASK (src);
|
||||
LOAD_VECTOR (src);
|
||||
|
||||
return vsrc;
|
||||
}
|
||||
|
||||
/* save 4 pixels on a 16-byte boundary aligned address */
|
||||
static force_inline void
|
||||
save_128_aligned (uint32_t* data,
|
||||
vector unsigned int vdata)
|
||||
{
|
||||
STORE_VECTOR(data)
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
create_mask_16_128 (uint16_t mask)
|
||||
{
|
||||
uint16_t* src;
|
||||
vector unsigned short vsrc;
|
||||
DECLARE_SRC_MASK_VAR;
|
||||
|
||||
src = &mask;
|
||||
|
||||
COMPUTE_SHIFT_MASK (src);
|
||||
LOAD_VECTOR (src);
|
||||
return (vector unsigned int) vec_splat(vsrc, 0);
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
create_mask_1x32_128 (const uint32_t *src)
|
||||
{
|
||||
vector unsigned int vsrc;
|
||||
DECLARE_SRC_MASK_VAR;
|
||||
|
||||
COMPUTE_SHIFT_MASK (src);
|
||||
LOAD_VECTOR (src);
|
||||
return vec_splat(vsrc, 0);
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
create_mask_32_128 (uint32_t mask)
|
||||
{
|
||||
return create_mask_1x32_128(&mask);
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
unpack_32_1x128 (uint32_t data)
|
||||
{
|
||||
vector unsigned int vdata = {0, 0, 0, data};
|
||||
vector unsigned short lo;
|
||||
|
||||
lo = (vector unsigned short)
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
vec_mergel ((vector unsigned char) AVV(0),
|
||||
(vector unsigned char) vdata);
|
||||
#else
|
||||
vec_mergel ((vector unsigned char) vdata,
|
||||
(vector unsigned char) AVV(0));
|
||||
#endif
|
||||
|
||||
return (vector unsigned int) lo;
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)
|
||||
{
|
||||
vector unsigned char lo;
|
||||
|
||||
/* unpack to short */
|
||||
lo = (vector unsigned char)
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
vec_mergel ((vector unsigned char) data2,
|
||||
(vector unsigned char) data1);
|
||||
#else
|
||||
vec_mergel ((vector unsigned char) data1,
|
||||
(vector unsigned char) data2);
|
||||
#endif
|
||||
|
||||
return (vector unsigned int) lo;
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2)
|
||||
{
|
||||
vector unsigned char hi;
|
||||
|
||||
/* unpack to short */
|
||||
hi = (vector unsigned char)
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
vec_mergeh ((vector unsigned char) data2,
|
||||
(vector unsigned char) data1);
|
||||
#else
|
||||
vec_mergeh ((vector unsigned char) data1,
|
||||
(vector unsigned char) data2);
|
||||
#endif
|
||||
|
||||
return (vector unsigned int) hi;
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2)
|
||||
{
|
||||
vector unsigned short lo;
|
||||
|
||||
/* unpack to char */
|
||||
lo = (vector unsigned short)
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
vec_mergel ((vector unsigned short) data2,
|
||||
(vector unsigned short) data1);
|
||||
#else
|
||||
vec_mergel ((vector unsigned short) data1,
|
||||
(vector unsigned short) data2);
|
||||
#endif
|
||||
|
||||
return (vector unsigned int) lo;
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2)
|
||||
{
|
||||
vector unsigned short hi;
|
||||
|
||||
/* unpack to char */
|
||||
hi = (vector unsigned short)
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
vec_mergeh ((vector unsigned short) data2,
|
||||
(vector unsigned short) data1);
|
||||
#else
|
||||
vec_mergeh ((vector unsigned short) data1,
|
||||
(vector unsigned short) data2);
|
||||
#endif
|
||||
|
||||
return (vector unsigned int) hi;
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2,
|
||||
vector unsigned int* data_lo, vector unsigned int* data_hi)
|
||||
{
|
||||
*data_lo = unpacklo_128_16x8(data1, data2);
|
||||
*data_hi = unpackhi_128_16x8(data1, data2);
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2,
|
||||
vector unsigned int* data_lo, vector unsigned int* data_hi)
|
||||
{
|
||||
*data_lo = unpacklo_128_8x16(data1, data2);
|
||||
*data_hi = unpackhi_128_8x16(data1, data2);
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
unpack_565_to_8888 (vector unsigned int lo)
|
||||
{
|
||||
vector unsigned int r, g, b, rb, t;
|
||||
|
||||
r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red);
|
||||
g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green);
|
||||
b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue);
|
||||
|
||||
rb = vec_or (r, b);
|
||||
t = vec_and (rb, mask_565_fix_rb);
|
||||
t = vec_sr (t, create_mask_32_128(5));
|
||||
rb = vec_or (rb, t);
|
||||
|
||||
t = vec_and (g, mask_565_fix_g);
|
||||
t = vec_sr (t, create_mask_32_128(6));
|
||||
g = vec_or (g, t);
|
||||
|
||||
return vec_or (rb, g);
|
||||
}
|
||||
|
||||
static force_inline uint32_t
|
||||
pack_1x128_32 (vector unsigned int data)
|
||||
{
|
||||
vector unsigned char vpack;
|
||||
|
||||
vpack = vec_packsu((vector unsigned short) data,
|
||||
(vector unsigned short) AVV(0));
|
||||
|
||||
return vec_extract((vector unsigned int) vpack, 1);
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
pack_2x128_128 (vector unsigned int lo, vector unsigned int hi)
|
||||
{
|
||||
vector unsigned char vpack;
|
||||
|
||||
vpack = vec_packsu((vector unsigned short) hi,
|
||||
(vector unsigned short) lo);
|
||||
|
||||
return (vector unsigned int) vpack;
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
negate_2x128 (vector unsigned int data_lo,
|
||||
vector unsigned int data_hi,
|
||||
vector unsigned int* neg_lo,
|
||||
vector unsigned int* neg_hi)
|
||||
{
|
||||
*neg_lo = vec_xor (data_lo, mask_00ff);
|
||||
*neg_hi = vec_xor (data_hi, mask_00ff);
|
||||
}
|
||||
|
||||
static force_inline int
|
||||
is_opaque (vector unsigned int x)
|
||||
{
|
||||
uint32_t cmp_result;
|
||||
vector bool int ffs = vec_cmpeq(x, x);
|
||||
|
||||
cmp_result = vec_all_eq(x, ffs);
|
||||
|
||||
return (cmp_result & 0x8888) == 0x8888;
|
||||
}
|
||||
|
||||
static force_inline int
|
||||
is_zero (vector unsigned int x)
|
||||
{
|
||||
uint32_t cmp_result;
|
||||
|
||||
cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
|
||||
|
||||
return cmp_result == 0xffff;
|
||||
}
|
||||
|
||||
static force_inline int
|
||||
is_transparent (vector unsigned int x)
|
||||
{
|
||||
uint32_t cmp_result;
|
||||
|
||||
cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
|
||||
return (cmp_result & 0x8888) == 0x8888;
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
expand_pixel_8_1x128 (uint8_t data)
|
||||
{
|
||||
vector unsigned int vdata;
|
||||
|
||||
vdata = unpack_32_1x128 ((uint32_t) data);
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return vec_perm (vdata, vdata,
|
||||
(vector unsigned char)AVV (
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
|
||||
#else
|
||||
return vec_perm (vdata, vdata,
|
||||
(vector unsigned char)AVV (
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
|
||||
#endif
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
expand_alpha_1x128 (vector unsigned int data)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return vec_perm (data, data,
|
||||
(vector unsigned char)AVV (
|
||||
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
|
||||
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
|
||||
#else
|
||||
return vec_perm (data, data,
|
||||
(vector unsigned char)AVV (
|
||||
0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
|
||||
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
|
||||
#endif
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
expand_alpha_2x128 (vector unsigned int data_lo,
|
||||
vector unsigned int data_hi,
|
||||
vector unsigned int* alpha_lo,
|
||||
vector unsigned int* alpha_hi)
|
||||
{
|
||||
|
||||
*alpha_lo = expand_alpha_1x128(data_lo);
|
||||
*alpha_hi = expand_alpha_1x128(data_hi);
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
expand_alpha_rev_2x128 (vector unsigned int data_lo,
|
||||
vector unsigned int data_hi,
|
||||
vector unsigned int* alpha_lo,
|
||||
vector unsigned int* alpha_hi)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
*alpha_lo = vec_perm (data_lo, data_lo,
|
||||
(vector unsigned char)AVV (
|
||||
0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
|
||||
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
|
||||
|
||||
*alpha_hi = vec_perm (data_hi, data_hi,
|
||||
(vector unsigned char)AVV (
|
||||
0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
|
||||
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
|
||||
#else
|
||||
*alpha_lo = vec_perm (data_lo, data_lo,
|
||||
(vector unsigned char)AVV (
|
||||
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
|
||||
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
|
||||
|
||||
*alpha_hi = vec_perm (data_hi, data_hi,
|
||||
(vector unsigned char)AVV (
|
||||
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
|
||||
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
|
||||
#endif
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
pix_multiply_2x128 (vector unsigned int* data_lo,
|
||||
vector unsigned int* data_hi,
|
||||
vector unsigned int* alpha_lo,
|
||||
vector unsigned int* alpha_hi,
|
||||
vector unsigned int* ret_lo,
|
||||
vector unsigned int* ret_hi)
|
||||
{
|
||||
*ret_lo = pix_multiply(*data_lo, *alpha_lo);
|
||||
*ret_hi = pix_multiply(*data_hi, *alpha_hi);
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
over_2x128 (vector unsigned int* src_lo,
|
||||
vector unsigned int* src_hi,
|
||||
vector unsigned int* alpha_lo,
|
||||
vector unsigned int* alpha_hi,
|
||||
vector unsigned int* dst_lo,
|
||||
vector unsigned int* dst_hi)
|
||||
{
|
||||
vector unsigned int t1, t2;
|
||||
|
||||
negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
|
||||
|
||||
pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
|
||||
|
||||
*dst_lo = (vector unsigned int)
|
||||
vec_adds ((vector unsigned char) *src_lo,
|
||||
(vector unsigned char) *dst_lo);
|
||||
|
||||
*dst_hi = (vector unsigned int)
|
||||
vec_adds ((vector unsigned char) *src_hi,
|
||||
(vector unsigned char) *dst_hi);
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
in_over_2x128 (vector unsigned int* src_lo,
|
||||
vector unsigned int* src_hi,
|
||||
vector unsigned int* alpha_lo,
|
||||
vector unsigned int* alpha_hi,
|
||||
vector unsigned int* mask_lo,
|
||||
vector unsigned int* mask_hi,
|
||||
vector unsigned int* dst_lo,
|
||||
vector unsigned int* dst_hi)
|
||||
{
|
||||
vector unsigned int s_lo, s_hi;
|
||||
vector unsigned int a_lo, a_hi;
|
||||
|
||||
pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
|
||||
pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
|
||||
|
||||
over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
|
||||
}
|
||||
|
||||
static force_inline uint32_t
|
||||
core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst)
|
||||
{
|
||||
uint8_t a;
|
||||
vector unsigned int vmxs;
|
||||
|
||||
a = src >> 24;
|
||||
|
||||
if (a == 0xff)
|
||||
{
|
||||
return src;
|
||||
}
|
||||
else if (src)
|
||||
{
|
||||
vmxs = unpack_32_1x128 (src);
|
||||
return pack_1x128_32(
|
||||
over(vmxs, expand_alpha_1x128 (vmxs), unpack_32_1x128 (dst)));
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
static force_inline uint32_t
|
||||
combine1 (const uint32_t *ps, const uint32_t *pm)
|
||||
{
|
||||
uint32_t s = *ps;
|
||||
|
||||
if (pm)
|
||||
{
|
||||
vector unsigned int ms, mm;
|
||||
|
||||
mm = unpack_32_1x128 (*pm);
|
||||
mm = expand_alpha_1x128 (mm);
|
||||
|
||||
ms = unpack_32_1x128 (s);
|
||||
ms = pix_multiply (ms, mm);
|
||||
|
||||
s = pack_1x128_32 (ms);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static force_inline vector unsigned int
|
||||
combine4 (const uint32_t* ps, const uint32_t* pm)
|
||||
{
|
||||
vector unsigned int vmx_src_lo, vmx_src_hi;
|
||||
vector unsigned int vmx_msk_lo, vmx_msk_hi;
|
||||
vector unsigned int s;
|
||||
|
||||
if (pm)
|
||||
{
|
||||
vmx_msk_lo = load_128_unaligned(pm);
|
||||
|
||||
if (is_transparent(vmx_msk_lo))
|
||||
return (vector unsigned int) AVV(0);
|
||||
}
|
||||
|
||||
s = load_128_unaligned(ps);
|
||||
|
||||
if (pm)
|
||||
{
|
||||
unpack_128_2x128(s, (vector unsigned int) AVV(0),
|
||||
&vmx_src_lo, &vmx_src_hi);
|
||||
|
||||
unpack_128_2x128(vmx_msk_lo, (vector unsigned int) AVV(0),
|
||||
&vmx_msk_lo, &vmx_msk_hi);
|
||||
|
||||
expand_alpha_2x128(vmx_msk_lo, vmx_msk_hi, &vmx_msk_lo, &vmx_msk_hi);
|
||||
|
||||
pix_multiply_2x128(&vmx_src_lo, &vmx_src_hi,
|
||||
&vmx_msk_lo, &vmx_msk_hi,
|
||||
&vmx_src_lo, &vmx_src_hi);
|
||||
|
||||
s = pack_2x128_128(vmx_src_lo, vmx_src_hi);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static void
|
||||
vmx_combine_over_u_no_mask (uint32_t * dest,
|
||||
const uint32_t *src,
|
||||
@@ -2080,6 +2547,15 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
|
||||
{
|
||||
pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
|
||||
|
||||
/* VMX constants */
|
||||
mask_00ff = create_mask_16_128 (0x00ff);
|
||||
mask_ff000000 = create_mask_32_128 (0xff000000);
|
||||
mask_red = create_mask_32_128 (0x00f80000);
|
||||
mask_green = create_mask_32_128 (0x0000fc00);
|
||||
mask_blue = create_mask_32_128 (0x000000f8);
|
||||
mask_565_fix_rb = create_mask_32_128 (0x00e000e0);
|
||||
mask_565_fix_g = create_mask_32_128 (0x0000c000);
|
||||
|
||||
/* Set up function pointers */
|
||||
|
||||
imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;
|
||||
|
||||
Reference in New Issue
Block a user