vmx: add helper functions

This patch adds the following helper functions for reuse of code,
hiding BE/LE differences and maintainability.

All of the functions were defined as static force_inline.

Names were copied from pixman-sse2.c so conversion of fast-paths between
sse2 and vmx would be easier from now on. Therefore, I tried to keep the
input/output of the functions to be as close as possible to the sse2
definitions.

The functions are:

- load_128_aligned       : load 128-bit from a 16-byte aligned memory
                           address into a vector

- load_128_unaligned     : load 128-bit from memory into a vector,
                           without guarantee of alignment for the
                           source pointer

- save_128_aligned       : save 128-bit vector into a 16-byte aligned
                           memory address

- create_mask_16_128     : take a 16-bit value and fill with it
                           a new vector

- create_mask_1x32_128   : take a 32-bit pointer and fill a new
                           vector with the 32-bit value from that pointer

- create_mask_32_128     : take a 32-bit value and fill with it
                           a new vector

- unpack_32_1x128        : unpack 32-bit value into a vector

- unpacklo_128_16x8      : unpack the eight low 8-bit values of a vector

- unpackhi_128_16x8      : unpack the eight high 8-bit values of a vector

- unpacklo_128_8x16      : unpack the four low 16-bit values of a vector

- unpackhi_128_8x16      : unpack the four high 16-bit values of a vector

- unpack_128_2x128       : unpack the eight low 8-bit values of a vector
                           into one vector and the eight high 8-bit
                           values into another vector

- unpack_128_2x128_16    : unpack the four low 16-bit values of a vector
                           into one vector and the four high 16-bit
                           values into another vector

- unpack_565_to_8888     : unpack an RGB_565 vector to 8888 vector

- pack_1x128_32          : pack a vector and return the LSB 32-bit of it

- pack_2x128_128         : pack two vectors into one and return it

- negate_2x128           : xor two vectors with mask_00ff (separately)

- is_opaque              : returns whether all the pixels contained in
                           the vector are opaque

- is_zero                : returns whether the vector equals 0

- is_transparent         : returns whether all the pixels
                           contained in the vector are transparent

- expand_pixel_8_1x128   : expand an 8-bit pixel into lower 8 bytes of a
                           vector

- expand_alpha_1x128     : expand alpha from vector and return the new
                           vector

- expand_alpha_2x128     : expand alpha from one vector and another alpha
                           from a second vector

- expand_alpha_rev_2x128 : expand a reversed alpha from one vector and
                           another reversed alpha from a second vector

- pix_multiply_2x128     : do pix_multiply for two vectors (separately)

- over_2x128             : perform over op. on two vectors

- in_over_2x128          : perform in-over op. on two vectors

v2: removed expand_pixel_32_1x128 as it was not used by any function and
its implementation was erroneous

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
This commit is contained in:
Oded Gabbay
2015-06-28 09:42:08 +03:00
parent 034149537b
commit c12ee95089

View File

@@ -30,10 +30,19 @@
#endif
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-inlines.h"
#include <altivec.h>
#define AVV(x...) {x}
static vector unsigned int mask_00ff;
static vector unsigned int mask_ff000000;
static vector unsigned int mask_red;
static vector unsigned int mask_green;
static vector unsigned int mask_blue;
static vector unsigned int mask_565_fix_rb;
static vector unsigned int mask_565_fix_g;
static force_inline vector unsigned int
splat_alpha (vector unsigned int pix)
{
@@ -233,6 +242,464 @@ do \
#define STORE_VECTOR(dest) \
vec_st ((vector unsigned int) v ## dest, 0, dest);
/* load 4 pixels from a 16-byte boundary aligned address */
static force_inline vector unsigned int
load_128_aligned (const uint32_t* src)
{
return *((vector unsigned int *) src);
}
/* load 4 pixels from a unaligned address */
static force_inline vector unsigned int
load_128_unaligned (const uint32_t* src)
{
vector unsigned int vsrc;
DECLARE_SRC_MASK_VAR;
COMPUTE_SHIFT_MASK (src);
LOAD_VECTOR (src);
return vsrc;
}
/* save 4 pixels on a 16-byte boundary aligned address */
static force_inline void
save_128_aligned (uint32_t* data,
vector unsigned int vdata)
{
STORE_VECTOR(data)
}
static force_inline vector unsigned int
create_mask_16_128 (uint16_t mask)
{
uint16_t* src;
vector unsigned short vsrc;
DECLARE_SRC_MASK_VAR;
src = &mask;
COMPUTE_SHIFT_MASK (src);
LOAD_VECTOR (src);
return (vector unsigned int) vec_splat(vsrc, 0);
}
static force_inline vector unsigned int
create_mask_1x32_128 (const uint32_t *src)
{
vector unsigned int vsrc;
DECLARE_SRC_MASK_VAR;
COMPUTE_SHIFT_MASK (src);
LOAD_VECTOR (src);
return vec_splat(vsrc, 0);
}
static force_inline vector unsigned int
create_mask_32_128 (uint32_t mask)
{
return create_mask_1x32_128(&mask);
}
static force_inline vector unsigned int
unpack_32_1x128 (uint32_t data)
{
vector unsigned int vdata = {0, 0, 0, data};
vector unsigned short lo;
lo = (vector unsigned short)
#ifdef WORDS_BIGENDIAN
vec_mergel ((vector unsigned char) AVV(0),
(vector unsigned char) vdata);
#else
vec_mergel ((vector unsigned char) vdata,
(vector unsigned char) AVV(0));
#endif
return (vector unsigned int) lo;
}
static force_inline vector unsigned int
unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)
{
vector unsigned char lo;
/* unpack to short */
lo = (vector unsigned char)
#ifdef WORDS_BIGENDIAN
vec_mergel ((vector unsigned char) data2,
(vector unsigned char) data1);
#else
vec_mergel ((vector unsigned char) data1,
(vector unsigned char) data2);
#endif
return (vector unsigned int) lo;
}
static force_inline vector unsigned int
unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2)
{
vector unsigned char hi;
/* unpack to short */
hi = (vector unsigned char)
#ifdef WORDS_BIGENDIAN
vec_mergeh ((vector unsigned char) data2,
(vector unsigned char) data1);
#else
vec_mergeh ((vector unsigned char) data1,
(vector unsigned char) data2);
#endif
return (vector unsigned int) hi;
}
static force_inline vector unsigned int
unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2)
{
vector unsigned short lo;
/* unpack to char */
lo = (vector unsigned short)
#ifdef WORDS_BIGENDIAN
vec_mergel ((vector unsigned short) data2,
(vector unsigned short) data1);
#else
vec_mergel ((vector unsigned short) data1,
(vector unsigned short) data2);
#endif
return (vector unsigned int) lo;
}
static force_inline vector unsigned int
unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2)
{
vector unsigned short hi;
/* unpack to char */
hi = (vector unsigned short)
#ifdef WORDS_BIGENDIAN
vec_mergeh ((vector unsigned short) data2,
(vector unsigned short) data1);
#else
vec_mergeh ((vector unsigned short) data1,
(vector unsigned short) data2);
#endif
return (vector unsigned int) hi;
}
static force_inline void
unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2,
vector unsigned int* data_lo, vector unsigned int* data_hi)
{
*data_lo = unpacklo_128_16x8(data1, data2);
*data_hi = unpackhi_128_16x8(data1, data2);
}
static force_inline void
unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2,
vector unsigned int* data_lo, vector unsigned int* data_hi)
{
*data_lo = unpacklo_128_8x16(data1, data2);
*data_hi = unpackhi_128_8x16(data1, data2);
}
static force_inline vector unsigned int
unpack_565_to_8888 (vector unsigned int lo)
{
vector unsigned int r, g, b, rb, t;
r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red);
g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green);
b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue);
rb = vec_or (r, b);
t = vec_and (rb, mask_565_fix_rb);
t = vec_sr (t, create_mask_32_128(5));
rb = vec_or (rb, t);
t = vec_and (g, mask_565_fix_g);
t = vec_sr (t, create_mask_32_128(6));
g = vec_or (g, t);
return vec_or (rb, g);
}
static force_inline uint32_t
pack_1x128_32 (vector unsigned int data)
{
vector unsigned char vpack;
vpack = vec_packsu((vector unsigned short) data,
(vector unsigned short) AVV(0));
return vec_extract((vector unsigned int) vpack, 1);
}
static force_inline vector unsigned int
pack_2x128_128 (vector unsigned int lo, vector unsigned int hi)
{
vector unsigned char vpack;
vpack = vec_packsu((vector unsigned short) hi,
(vector unsigned short) lo);
return (vector unsigned int) vpack;
}
static force_inline void
negate_2x128 (vector unsigned int data_lo,
vector unsigned int data_hi,
vector unsigned int* neg_lo,
vector unsigned int* neg_hi)
{
*neg_lo = vec_xor (data_lo, mask_00ff);
*neg_hi = vec_xor (data_hi, mask_00ff);
}
static force_inline int
is_opaque (vector unsigned int x)
{
uint32_t cmp_result;
vector bool int ffs = vec_cmpeq(x, x);
cmp_result = vec_all_eq(x, ffs);
return (cmp_result & 0x8888) == 0x8888;
}
static force_inline int
is_zero (vector unsigned int x)
{
uint32_t cmp_result;
cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
return cmp_result == 0xffff;
}
static force_inline int
is_transparent (vector unsigned int x)
{
uint32_t cmp_result;
cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
return (cmp_result & 0x8888) == 0x8888;
}
static force_inline vector unsigned int
expand_pixel_8_1x128 (uint8_t data)
{
vector unsigned int vdata;
vdata = unpack_32_1x128 ((uint32_t) data);
#ifdef WORDS_BIGENDIAN
return vec_perm (vdata, vdata,
(vector unsigned char)AVV (
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
#else
return vec_perm (vdata, vdata,
(vector unsigned char)AVV (
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
#endif
}
static force_inline vector unsigned int
expand_alpha_1x128 (vector unsigned int data)
{
#ifdef WORDS_BIGENDIAN
return vec_perm (data, data,
(vector unsigned char)AVV (
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
#else
return vec_perm (data, data,
(vector unsigned char)AVV (
0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
#endif
}
static force_inline void
expand_alpha_2x128 (vector unsigned int data_lo,
vector unsigned int data_hi,
vector unsigned int* alpha_lo,
vector unsigned int* alpha_hi)
{
*alpha_lo = expand_alpha_1x128(data_lo);
*alpha_hi = expand_alpha_1x128(data_hi);
}
static force_inline void
expand_alpha_rev_2x128 (vector unsigned int data_lo,
vector unsigned int data_hi,
vector unsigned int* alpha_lo,
vector unsigned int* alpha_hi)
{
#ifdef WORDS_BIGENDIAN
*alpha_lo = vec_perm (data_lo, data_lo,
(vector unsigned char)AVV (
0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
*alpha_hi = vec_perm (data_hi, data_hi,
(vector unsigned char)AVV (
0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
#else
*alpha_lo = vec_perm (data_lo, data_lo,
(vector unsigned char)AVV (
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
*alpha_hi = vec_perm (data_hi, data_hi,
(vector unsigned char)AVV (
0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
#endif
}
static force_inline void
pix_multiply_2x128 (vector unsigned int* data_lo,
vector unsigned int* data_hi,
vector unsigned int* alpha_lo,
vector unsigned int* alpha_hi,
vector unsigned int* ret_lo,
vector unsigned int* ret_hi)
{
*ret_lo = pix_multiply(*data_lo, *alpha_lo);
*ret_hi = pix_multiply(*data_hi, *alpha_hi);
}
static force_inline void
over_2x128 (vector unsigned int* src_lo,
vector unsigned int* src_hi,
vector unsigned int* alpha_lo,
vector unsigned int* alpha_hi,
vector unsigned int* dst_lo,
vector unsigned int* dst_hi)
{
vector unsigned int t1, t2;
negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
*dst_lo = (vector unsigned int)
vec_adds ((vector unsigned char) *src_lo,
(vector unsigned char) *dst_lo);
*dst_hi = (vector unsigned int)
vec_adds ((vector unsigned char) *src_hi,
(vector unsigned char) *dst_hi);
}
static force_inline void
in_over_2x128 (vector unsigned int* src_lo,
vector unsigned int* src_hi,
vector unsigned int* alpha_lo,
vector unsigned int* alpha_hi,
vector unsigned int* mask_lo,
vector unsigned int* mask_hi,
vector unsigned int* dst_lo,
vector unsigned int* dst_hi)
{
vector unsigned int s_lo, s_hi;
vector unsigned int a_lo, a_hi;
pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
}
static force_inline uint32_t
core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst)
{
uint8_t a;
vector unsigned int vmxs;
a = src >> 24;
if (a == 0xff)
{
return src;
}
else if (src)
{
vmxs = unpack_32_1x128 (src);
return pack_1x128_32(
over(vmxs, expand_alpha_1x128 (vmxs), unpack_32_1x128 (dst)));
}
return dst;
}
static force_inline uint32_t
combine1 (const uint32_t *ps, const uint32_t *pm)
{
uint32_t s = *ps;
if (pm)
{
vector unsigned int ms, mm;
mm = unpack_32_1x128 (*pm);
mm = expand_alpha_1x128 (mm);
ms = unpack_32_1x128 (s);
ms = pix_multiply (ms, mm);
s = pack_1x128_32 (ms);
}
return s;
}
static force_inline vector unsigned int
combine4 (const uint32_t* ps, const uint32_t* pm)
{
vector unsigned int vmx_src_lo, vmx_src_hi;
vector unsigned int vmx_msk_lo, vmx_msk_hi;
vector unsigned int s;
if (pm)
{
vmx_msk_lo = load_128_unaligned(pm);
if (is_transparent(vmx_msk_lo))
return (vector unsigned int) AVV(0);
}
s = load_128_unaligned(ps);
if (pm)
{
unpack_128_2x128(s, (vector unsigned int) AVV(0),
&vmx_src_lo, &vmx_src_hi);
unpack_128_2x128(vmx_msk_lo, (vector unsigned int) AVV(0),
&vmx_msk_lo, &vmx_msk_hi);
expand_alpha_2x128(vmx_msk_lo, vmx_msk_hi, &vmx_msk_lo, &vmx_msk_hi);
pix_multiply_2x128(&vmx_src_lo, &vmx_src_hi,
&vmx_msk_lo, &vmx_msk_hi,
&vmx_src_lo, &vmx_src_hi);
s = pack_2x128_128(vmx_src_lo, vmx_src_hi);
}
return s;
}
static void
vmx_combine_over_u_no_mask (uint32_t * dest,
const uint32_t *src,
@@ -2080,6 +2547,15 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
/* VMX constants */
mask_00ff = create_mask_16_128 (0x00ff);
mask_ff000000 = create_mask_32_128 (0xff000000);
mask_red = create_mask_32_128 (0x00f80000);
mask_green = create_mask_32_128 (0x0000fc00);
mask_blue = create_mask_32_128 (0x000000f8);
mask_565_fix_rb = create_mask_32_128 (0x00e000e0);
mask_565_fix_g = create_mask_32_128 (0x0000c000);
/* Set up function pointers */
imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;