Skip to content

Commit aaf0bb5

Browse files
committed
gpu_unai: asm part 5
1 parent 8177857 commit aaf0bb5

File tree

6 files changed

+321
-52
lines changed

6 files changed

+321
-52
lines changed

plugins/gpu_unai/gpu_arm.S

+220-8
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,65 @@
4040
@ msb of input p0 is assumed to be set
4141
.macro semitrans0 p0 p1 t
4242
eor \t, \p0, \p1
43-
and \t, \t, #0x0420
43+
and \t, \t, #0x0420
4444
sub \p0, \p0, \t
4545
orr \p1, \p1, #0x8000
4646
uhadd16 \p0, \p0, \p1
4747
.endm
4848

49+
.macro semitrans0p p0 p1 m421 t
50+
eor \t, \p0, \p1
51+
and \t, \t, \m421
52+
add \p0, \p0, \p1
53+
uhsub16 \p0, \p0, \t @ sub because of borrow into hi16
54+
.endm
55+
56+
@ p0 - {p1|r,g,b} // p1* - premasked rgb
57+
.macro semitrans2p p0 p1r p1g p1b m1f t0 t1
58+
and \t0, \p0, \m1f
59+
and \t1, \p0, \m1f, lsl #5
60+
and \p0, \p0, \m1f, lsl #10
61+
uqsub16 \t0, \t0, \p1r
62+
uqsub16 \t1, \t1, \p1g
63+
uqsub16 \p0, \p0, \p1b
64+
orr \t0, \t0, \t1
65+
orr \p0, \p0, \t0
66+
.endm
67+
68+
#else
69+
70+
@ msb of input p0 is assumed to be set
71+
.macro semitrans0 p0 p1 t
72+
eor \t, \p0, \p1
73+
and \t, \t, #0x0420
74+
orr \p1, \p1, #0x8000
75+
sub \p0, \p0, \t
76+
add \p0, \p0, \p1
77+
orr \p0, \p0, #0x10000
78+
mov \p0, \p0, lsr #1
79+
.endm
80+
81+
.macro semitrans0p p0 p1 m421 t
82+
eor \t, \p0, \p1
83+
and \t, \t, \m421
84+
add \p0, \p0, \p1
85+
sub \p0, \p0, \t
86+
mov \p0, \p0, lsr #1
87+
.endm
88+
4989
#endif // HAVE_ARMV6
5090

91+
.macro semitrans13p p0 p1 m421 t0
92+
add \t0, \p0, \p1
93+
eor \p0, \p0, \p1
94+
and \p0, \p0, \m421 @ low_bits
95+
sub \p0, \t0, \p0
96+
and \p0, \p0, \m421, lsl #5 @ carries
97+
sub \t0, \t0, \p0 @ modulo
98+
sub \p0, \p0, \p0, lsr #5 @ clamp
99+
orr \p0, \t0, \p0
100+
.endm
101+
51102

52103
@ in: r0=dst, r2=pal, r12=0x1e
53104
@ trashes r6-r8,lr,flags
@@ -95,6 +146,91 @@
95146
strhne \rs,[r0, #6]
96147
.endm
97148

149+
150+
@ (void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn)
151+
@ see also poly_untex_st_m
152+
.macro tile_driver_st_m name semit
153+
FUNCTION(\name):
154+
.cfi_startproc
155+
stmfd sp!, {r4-r9,lr}
156+
.cfi_def_cfa_offset 4*7
157+
.cfi_rel_offset lr, 4*6
158+
ldr r7, [r3, #0x18] @ y0
159+
ldr r8, [r3, #0x1c] @ y1
160+
.if \semit != 2
161+
mov r4, #0x8000
162+
orr r4, r4, r4, lsl #16 @ mask 8000
163+
mov r6, #0x420
164+
orr r6, r6, #1
165+
orr r6, r6, r6, lsl #16 @ mask 0421
166+
.endif
167+
.if \semit == 2
168+
and r4, r1, #0x03e0
169+
and r5, r1, #0x7c00
170+
and r1, r1, #0x001f
171+
orr r4, r4, r4, lsl #16 @ premasked g
172+
orr r5, r5, r5, lsl #16 @ premasked b
173+
mov r6, #0x00001f
174+
orr r6, #0x1f0000 @ mask
175+
.elseif \semit == 3
176+
mov r1, r1, lsr #2
177+
bic r1, r1, #(0x0c60>>2)
178+
.endif
179+
orr r1, r1, r1, lsl #16
180+
sub r3, r8, r7 @ h
181+
mov r7, r2 @ save w
182+
0:
183+
ldrh r8, [r0]
184+
pld_ r0, #2048
185+
tst r0, #2
186+
beq 1f
187+
sub r2, #1
188+
.if \semit == 0
189+
bic r8, r8, r4
190+
semitrans0p r8, r1, r6, lr
191+
.elseif \semit == 1 || \semit == 3
192+
bic r8, r8, r4
193+
semitrans13p r8, r1, r6, lr
194+
.elseif \semit == 2
195+
semitrans2p r8, r1, r4, r5, r6, r9, lr
196+
.endif
197+
strh r8, [r0], #2
198+
1:
199+
ldr r8, [r0]
200+
pld_ r0, #32
201+
subs r2, r2, #2
202+
.if \semit == 0
203+
bic r8, r8, r4
204+
semitrans0p r8, r1, r6, lr
205+
.elseif \semit == 1 || \semit == 3
206+
bic r8, r8, r4
207+
semitrans13p r8, r1, r6, lr
208+
.elseif \semit == 2
209+
semitrans2p r8, r1, r4, r5, r6, r9, lr
210+
.endif
211+
strpl r8, [r0], #4
212+
bpl 1b
213+
2:
214+
tst r2, #1
215+
strhne r8, [r0], #2
216+
mov r2, r7 @ w
217+
add r0, r0, #2048
218+
sub r0, r0, r7, lsl #1
219+
subs r3, r3, #1
220+
bgt 0b
221+
222+
ldmfd sp!, {r4-r9,pc}
223+
.cfi_endproc
224+
.endm
225+
226+
227+
tile_driver_st_m tile_driver_st0_asm, 0
228+
tile_driver_st_m tile_driver_st1_asm, 1
229+
tile_driver_st_m tile_driver_st3_asm, 3
230+
#ifdef HAVE_ARMV6
231+
tile_driver_st_m tile_driver_st2_asm, 2
232+
#endif
233+
98234
@ (u16 *d, void *s, u16 *pal, int lines)
99235
sprite_4bpp_x16_asm_:
100236
ldr r12,[r3, #0x18] @ y0
@@ -106,7 +242,7 @@ FUNCTION(sprite_4bpp_x16_asm):
106242
stmfd sp!, {r4-r8,lr}
107243
.cfi_def_cfa_offset 4*6
108244
.cfi_rel_offset lr, 4*5
109-
mov r12, #0x1e @ empty pixel
245+
mov r12, #0x1e
110246

111247
0:
112248
ldmia r1, {r4,r5}
@@ -343,15 +479,15 @@ FUNCTION(\name):
343479
.endm
344480

345481
sprite_driver_l_st sprite_driver_4bpp_l0_std_asm, 4, 0, -1
482+
sprite_driver_l_st sprite_driver_4bpp_l0_st0_asm, 4, 0, 0
346483
sprite_driver_l_st sprite_driver_8bpp_l0_std_asm, 8, 0, -1
484+
sprite_driver_l_st sprite_driver_8bpp_l0_st0_asm, 8, 0, 0
347485

348486
#ifdef HAVE_ARMV6
349487

350-
sprite_driver_l_st sprite_driver_4bpp_l0_st0_asm, 4, 0, 0
351488
sprite_driver_l_st sprite_driver_4bpp_l1_std_asm, 4, 1, -1
352489
sprite_driver_l_st sprite_driver_4bpp_l1_st0_asm, 4, 1, 0
353490
sprite_driver_l_st sprite_driver_4bpp_l1_st1_asm, 4, 1, 1
354-
sprite_driver_l_st sprite_driver_8bpp_l0_st0_asm, 8, 0, 0
355491
sprite_driver_l_st sprite_driver_8bpp_l1_std_asm, 8, 1, -1
356492
sprite_driver_l_st sprite_driver_8bpp_l1_st0_asm, 8, 1, 0
357493
sprite_driver_l_st sprite_driver_8bpp_l1_st1_asm, 8, 1, 1
@@ -414,6 +550,82 @@ FUNCTION(sprite_driver_16bpp_asm):
414550
.cfi_endproc
415551

416552

553+
@ (void *d, const gpu_unai_inner_t *inn, int count)
554+
@ see also tile_driver_st_m
555+
.macro poly_untex_st_m name semit
556+
FUNCTION(\name):
557+
.cfi_startproc
558+
ldrh r1, [r1, #0x38] @ rgb
559+
stmfd sp!, {r4-r7,lr}
560+
.cfi_def_cfa_offset 4*5
561+
.cfi_rel_offset lr, 4*4
562+
.if \semit != 2
563+
mov r4, #0x8000
564+
orr r4, r4, r4, lsl #16 @ mask 8000
565+
mov r6, #0x420
566+
orr r6, r6, #1
567+
orr r6, r6, r6, lsl #16 @ mask 0421
568+
.endif
569+
.if \semit == 2
570+
and r4, r1, #0x03e0
571+
and r5, r1, #0x7c00
572+
and r1, r1, #0x001f
573+
orr r4, r4, r4, lsl #16 @ premasked g
574+
orr r5, r5, r5, lsl #16 @ premasked b
575+
mov r6, #0x00001f
576+
orr r6, #0x1f0000 @ mask
577+
.elseif \semit == 3
578+
mov r1, r1, lsr #2
579+
bic r1, r1, #(0x0c60>>2)
580+
.endif
581+
orr r1, r1, r1, lsl #16
582+
0:
583+
ldrh r3, [r0]
584+
pld_ r0, #2048
585+
tst r0, #2
586+
beq 1f
587+
sub r2, #1
588+
.if \semit == 0
589+
bic r3, r3, r4
590+
semitrans0p r3, r1, r6, lr
591+
.elseif \semit == 1 || \semit == 3
592+
bic r3, r3, r4
593+
semitrans13p r3, r1, r6, lr
594+
.elseif \semit == 2
595+
semitrans2p r3, r1, r4, r5, r6, r7, lr
596+
.endif
597+
strh r3, [r0], #2
598+
1:
599+
ldr r3, [r0]
600+
pld_ r0, #32
601+
subs r2, r2, #2
602+
.if \semit == 0
603+
bic r3, r3, r4
604+
semitrans0p r3, r1, r6, lr
605+
.elseif \semit == 1 || \semit == 3
606+
bic r3, r3, r4
607+
semitrans13p r3, r1, r6, lr
608+
.elseif \semit == 2
609+
semitrans2p r3, r1, r4, r5, r6, r7, lr
610+
.endif
611+
strpl r3, [r0], #4
612+
bpl 1b
613+
2:
614+
tst r2, #1
615+
strhne r3, [r0], #2
616+
617+
ldmfd sp!, {r4-r7,pc}
618+
.cfi_endproc
619+
.endm
620+
621+
poly_untex_st_m poly_untex_st0_asm, 0
622+
poly_untex_st_m poly_untex_st1_asm, 1
623+
poly_untex_st_m poly_untex_st3_asm, 3
624+
#ifdef HAVE_ARMV6
625+
poly_untex_st_m poly_untex_st2_asm, 2
626+
#endif
627+
628+
417629
.macro poly_4_8bpp_asm_m name bpp light semit
418630
FUNCTION(\name): @ (void *d, const gpu_unai_inner_t *inn, int count)
419631
.cfi_startproc
@@ -568,15 +780,15 @@ v_\name: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked
568780
.cfi_endproc
569781
.endm
570782

571-
poly_4_8bpp_asm_m poly_4bpp_asm, 4, 0, -1
572-
poly_4_8bpp_asm_m poly_8bpp_asm, 8, 0, -1
783+
poly_4_8bpp_asm_m poly_4bpp_asm, 4, 0, -1
784+
poly_4_8bpp_asm_m poly_4bpp_l0_st0_asm, 4, 0, 0
785+
poly_4_8bpp_asm_m poly_8bpp_asm, 8, 0, -1
786+
poly_4_8bpp_asm_m poly_8bpp_l0_st0_asm, 8, 0, 0
573787

574788
#ifdef HAVE_ARMV6
575789

576-
poly_4_8bpp_asm_m poly_4bpp_l0_st0_asm, 4, 0, 0
577790
poly_4_8bpp_asm_m poly_4bpp_l1_std_asm, 4, 1, -1
578791
poly_4_8bpp_asm_m poly_4bpp_l1_st0_asm, 4, 1, 0
579-
poly_4_8bpp_asm_m poly_8bpp_l0_st0_asm, 8, 0, 0
580792
poly_4_8bpp_asm_m poly_8bpp_l1_std_asm, 8, 1, -1
581793
poly_4_8bpp_asm_m poly_8bpp_l1_st0_asm, 8, 1, 0
582794

plugins/gpu_unai/gpu_arm.h

+20-9
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ extern "C" {
77

88
struct gpu_unai_inner_t;
99

10+
void tile_driver_st0_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn);
11+
void tile_driver_st1_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn);
12+
void tile_driver_st3_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn);
13+
1014
void sprite_driver_4bpp_asm(void *pPixel, const u8 *pTxt_base,
1115
u32 count, const struct gpu_unai_inner_t *inn);
1216
void sprite_driver_8bpp_asm(void *pPixel, const u8 *pTxt_base,
@@ -15,36 +19,43 @@ void sprite_driver_16bpp_asm(void *pPixel, const void *pTxt_base,
1519
u32 count, const struct gpu_unai_inner_t *inn);
1620
void sprite_4bpp_x16_asm(void *d, const void *s, void *pal, int lines);
1721

18-
void poly_4bpp_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
19-
void poly_8bpp_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
20-
2122
void sprite_driver_4bpp_l0_std_asm(void *pPixel, const u8 *pTxt_base,
2223
u32 count, const struct gpu_unai_inner_t *inn);
24+
void sprite_driver_4bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base,
25+
u32 count, const struct gpu_unai_inner_t *inn);
2326
void sprite_driver_8bpp_l0_std_asm(void *pPixel, const u8 *pTxt_base,
2427
u32 count, const struct gpu_unai_inner_t *inn);
28+
void sprite_driver_8bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base,
29+
u32 count, const struct gpu_unai_inner_t *inn);
30+
31+
void poly_untex_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
32+
void poly_untex_st1_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
33+
void poly_untex_st3_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
34+
void poly_4bpp_asm (void *d, const struct gpu_unai_inner_t *inn, int count);
35+
void poly_4bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
36+
void poly_8bpp_asm (void *d, const struct gpu_unai_inner_t *inn, int count);
37+
void poly_8bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
2538

2639
#ifdef HAVE_ARMV6
2740

28-
void sprite_driver_4bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base,
29-
u32 count, const struct gpu_unai_inner_t *inn);
41+
void tile_driver_st2_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn);
42+
3043
void sprite_driver_4bpp_l1_std_asm(void *pPixel, const u8 *pTxt_base,
3144
u32 count, const struct gpu_unai_inner_t *inn);
3245
void sprite_driver_4bpp_l1_st0_asm(void *pPixel, const u8 *pTxt_base,
3346
u32 count, const struct gpu_unai_inner_t *inn);
3447
void sprite_driver_4bpp_l1_st1_asm(void *pPixel, const u8 *pTxt_base,
3548
u32 count, const struct gpu_unai_inner_t *inn);
36-
void sprite_driver_8bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base,
37-
u32 count, const struct gpu_unai_inner_t *inn);
3849
void sprite_driver_8bpp_l1_std_asm(void *pPixel, const u8 *pTxt_base,
3950
u32 count, const struct gpu_unai_inner_t *inn);
4051
void sprite_driver_8bpp_l1_st0_asm(void *pPixel, const u8 *pTxt_base,
4152
u32 count, const struct gpu_unai_inner_t *inn);
4253
void sprite_driver_8bpp_l1_st1_asm(void *pPixel, const u8 *pTxt_base,
4354
u32 count, const struct gpu_unai_inner_t *inn);
44-
void poly_4bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
55+
56+
void poly_untex_st2_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
4557
void poly_4bpp_l1_std_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
4658
void poly_4bpp_l1_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
47-
void poly_8bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
4859
void poly_8bpp_l1_std_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
4960
void poly_8bpp_l1_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
5061

0 commit comments

Comments
 (0)