Skip to content

Commit 6455817

Browse files
committed
Keep fix_eqMask in vectors for RVV decoding
Avoids Vector<>int transitions
1 parent de6122a commit 6455817

File tree

1 file changed

+42
-38
lines changed

1 file changed

+42
-38
lines changed

src/decoder_rvv.cc

+42-38
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,17 @@ static inline vbool4_t mask_lshift(vbool4_t m, unsigned shiftIn, size_t vl) {
2929
RV_MASK_CAST(4, 8, mvl), RV_MASK_CAST(4, 8, mvr), vl
3030
);
3131
}
32+
template<int shift>
33+
static inline vbool64_t mask_lshift(vbool64_t m, unsigned shiftIn, size_t vl) {
34+
vuint8m1_t mv = RV_VEC_CAST(64, 8, m);
35+
vuint8m1_t mvl = RV(vsll_vx_u8m1)(mv, shift, vl/8);
36+
vuint8m1_t mvr = RV(vsrl_vx_u8m1)(mv, 8-shift, vl/8);
37+
mvr = RV(vslide1up_vx_u8m1)(mvr, shiftIn, vl/8);
38+
39+
return RV(vmor_mm_b64)(
40+
RV_MASK_CAST(64, 8, mvl), RV_MASK_CAST(64, 8, mvr), vl
41+
);
42+
}
3243

3344
static inline vuint8m2_t set_first_vu8(vuint8m2_t src, uint8_t item, size_t vl) {
3445
#ifdef __riscv_v_intrinsic
@@ -195,48 +206,41 @@ HEDLEY_ALWAYS_INLINE void do_decode_rvv(const uint8_t* src, long& len, unsigned
195206
// the yEnc specification requires any character following = to be unescaped, not skipped over, so we'll deal with that
196207
// firstly, check for invalid sequences of = (we assume that these are rare, as a spec compliant yEnc encoder should not generate these)
197208
if(LIKELIHOOD(0.0001, RV(vcpop_m_b4)(RV(vmandn_mm_b4)(cmpEqShift1, cmp, vl2), vl2) != 0)) {
198-
// note: we assume that uintptr_t corresponds with __riscv_xlen
199-
#if __riscv_xlen == 64
200-
vuint64m1_t cmpEqW = RV_VEC_CAST(4, 64, cmpEq);
201-
#else
202-
vuint32m1_t cmpEqW = RV_VEC_CAST(4, 32, cmpEq);
203-
#endif
204-
size_t nextShiftDown = (vl2 > sizeof(uintptr_t)*8 ? sizeof(uintptr_t)*8 : vl2) - 1;
205-
size_t wvl = (vl2 + sizeof(uintptr_t)*8 -1) / (sizeof(uintptr_t)*8);
206-
for(size_t w=0; w<vl2; w+=sizeof(uintptr_t)*8) {
207-
// extract bottom word
208-
#if __riscv_xlen == 64
209-
uintptr_t maskW = RV(vmv_x_s_u64m1_u64)(cmpEqW);
210-
#else
211-
uintptr_t maskW = RV(vmv_x_s_u32m1_u32)(cmpEqW);
212-
#endif
213-
214-
// fix it
215-
maskW = fix_eqMask<uintptr_t>(maskW, (maskW << 1) | escFirst);
216-
uint8_t nextEscFirst = (maskW >> nextShiftDown) & 1;
217-
218-
// shift it up (will be used for cmpEqShift1)
219-
maskW = (maskW<<1) | escFirst; // TODO: should this be done using mask_lshift<1> instead?
220-
escFirst = nextEscFirst;
221-
222-
// slide the new value in from the top
223-
#if __riscv_xlen == 64
224-
cmpEqW = RV(vslide1down_vx_u64m1)(cmpEqW, maskW, wvl);
225-
#else
226-
cmpEqW = RV(vslide1down_vx_u32m1)(cmpEqW, maskW, wvl);
227-
#endif
209+
// replicate fix_eqMask, but in vector form
210+
vbool4_t groupStart = RV(vmandn_mm_b4)(cmpEq, cmpEqShift1, vl2);
211+
vbool4_t evenBits = RV_MASK_CAST(4, 8, RV(vmv_v_x_u8m1)(0x55, vl2));
212+
vbool4_t evenStart = RV(vmand_mm_b4)(groupStart, evenBits, vl2);
213+
214+
// compute `cmpEq + evenStart` to obtain oddGroups
215+
vbool4_t oddGroups;
216+
vuint64m1_t cmpEq64 = RV_VEC_CAST(4, 64, cmpEq);
217+
vuint64m1_t evenStart64 = RV_VEC_CAST(4, 64, evenStart);
218+
vuint64m1_t oddGroups64;
219+
if(vl2 <= 64) {
220+
// no loop needed - single 64b add will work
221+
oddGroups64 = RV(vadd_vv_u64m1)(cmpEq64, evenStart64, 1);
222+
} else {
223+
// need to loop whilst the add causes a carry
224+
unsigned vl64 = vl2/64;
225+
vbool64_t carry = RV(vmadc_vv_u64m1_b64)(cmpEq64, evenStart64, vl64);
226+
carry = mask_lshift<1>(carry, 0, vl64);
227+
oddGroups64 = RV(vadd_vv_u64m1)(cmpEq64, evenStart64, 1);
228+
while(RV(vcpop_m_b64)(carry, vl64)) {
229+
vbool64_t nextCarry = RV(vmadc_vx_u64m1_b64)(oddGroups64, 1, vl64);
230+
oddGroups64 = RV(vadd_vx_u64m1_mu)(carry, oddGroups64, oddGroups64, 1, vl64);
231+
carry = mask_lshift<1>(nextCarry, 0, vl64);
232+
}
228233
}
229-
#if __riscv_xlen == 64
230-
cmpEqShift1 = RV_MASK_CAST(4, 64, cmpEqW);
231-
#else
232-
cmpEqShift1 = RV_MASK_CAST(4, 32, cmpEqW);
233-
#endif
234+
oddGroups = RV_MASK_CAST(4, 64, oddGroups64);
235+
236+
cmpEq = RV(vmand_mm_b4)(RV(vmxor_mm_b4)(oddGroups, evenBits, vl2), cmpEq, vl2);
237+
238+
cmpEqShift1 = mask_lshift<1>(cmpEq, escFirst, vl2);
234239
cmp = RV(vmor_mm_b4)(cmpEqShift1, cmp, vl2); // ~(~cmp & ~cmpEqShift1)
235240
numOutputChars = RV(vcpop_m_b4)(cmp, vl2);
236-
} else {
237-
// no invalid = sequences found - don't need to fix up cmpEq
238-
escFirst = RV(vcpop_m_b4)(RV(vmand_mm_b4)(cmpEq, lastBit, vl2), vl2);
239241
}
242+
escFirst = RV(vcpop_m_b4)(RV(vmand_mm_b4)(cmpEq, lastBit, vl2), vl2);
243+
240244
data = RV(vsub_vv_u8m2)(data, RV_vmerge_vxm_u8m2(yencOffset, 64+42, cmpEqShift1, vl2), vl2);
241245
yencOffset = set_first_vu8(yencOffset, 42 | (escFirst<<6), vl2);
242246

0 commit comments

Comments
 (0)