40
40
@ msb of input p0 is assumed to be set
41
41
.macro semitrans0 p0 p1 t
42
42
eor \t , \p0 , \p1
43
- and \t , \t , # 0x0420
43
+ and \t , \t , # 0x0420
44
44
sub \p0 , \p0 , \t
45
45
orr \p1 , \p1 , # 0x8000
46
46
uhadd16 \p0 , \p0 , \p1
47
47
.endm
48
48
49
+ .macro semitrans0p p0 p1 m421 t
50
+ eor \t , \p0 , \p1
51
+ and \t , \t , \m421
52
+ add \p0 , \p0 , \p1
53
+ uhsub16 \p0 , \p0 , \t @ sub because of borrow into hi16
54
+ .endm
55
+
56
+ @ p0 - {p1|r , g , b} // p1 * - premasked rgb
57
+ .macro semitrans2p p0 p1r p1g p1b m1f t0 t1
58
+ and \t0 , \p0 , \m1f
59
+ and \t1 , \p0 , \m1f , lsl # 5
60
+ and \p0 , \p0 , \m1f , lsl # 10
61
+ uqsub16 \t0 , \t0 , \p1r
62
+ uqsub16 \t1 , \t1 , \p1g
63
+ uqsub16 \p0 , \p0 , \p1b
64
+ orr \t0 , \t0 , \t1
65
+ orr \p0 , \p0 , \t0
66
+ .endm
67
+
68
+ #else
69
+
70
+ @ msb of input p0 is assumed to be set
71
+ .macro semitrans0 p0 p1 t
72
+ eor \t , \p0 , \p1
73
+ and \t , \t , # 0x0420
74
+ orr \p1 , \p1 , # 0x8000
75
+ sub \p0 , \p0 , \t
76
+ add \p0 , \p0 , \p1
77
+ orr \p0 , \p0 , # 0x10000
78
+ mov \p0 , \p0 , lsr # 1
79
+ .endm
80
+
81
+ .macro semitrans0p p0 p1 m421 t
82
+ eor \t , \p0 , \p1
83
+ and \t , \t , \m421
84
+ add \p0 , \p0 , \p1
85
+ sub \p0 , \p0 , \t
86
+ mov \p0 , \p0 , lsr # 1
87
+ .endm
88
+
49
89
#endif // HAVE_ARMV6
50
90
91
+ .macro semitrans13p p0 p1 m421 t0
92
+ add \t0 , \p0 , \p1
93
+ eor \p0 , \p0 , \p1
94
+ and \p0 , \p0 , \m421 @ low_bits
95
+ sub \p0 , \t0 , \p0
96
+ and \p0 , \p0 , \m421 , lsl # 5 @ carries
97
+ sub \t0 , \t0 , \p0 @ modulo
98
+ sub \p0 , \p0 , \p0 , lsr # 5 @ clamp
99
+ orr \p0 , \t0 , \p0
100
+ .endm
101
+
51
102
52
103
@ in : r0=dst , r2=pal , r12 = 0x1e
53
104
@ trashes r6 - r8 , lr , flags
95
146
strhne \rs ,[ r0 , # 6 ]
96
147
.endm
97
148
149
+
150
+ @ (void * d , u16 c , u32 cnt , const struct gpu_unai_inner_t * inn)
151
+ @ see also poly_untex_st_m
152
+ .macro tile_driver_st_m name semit
153
+ FUNCTION(\name):
154
+ .cfi_startproc
155
+ stmfd sp ! , {r4 - r9 , lr}
156
+ .cfi_def_cfa_offset 4 * 7
157
+ .cfi_rel_offset lr , 4 * 6
158
+ ldr r7 , [ r3 , # 0x18 ] @ y0
159
+ ldr r8 , [ r3 , # 0x1c ] @ y1
160
+ .if \semit != 2
161
+ mov r4 , # 0x8000
162
+ orr r4 , r4 , r4 , lsl # 16 @ mask 8000
163
+ mov r6 , # 0x420
164
+ orr r6 , r6 , # 1
165
+ orr r6 , r6 , r6 , lsl # 16 @ mask 0421
166
+ .endif
167
+ .if \semit == 2
168
+ and r4 , r1 , # 0x03e0
169
+ and r5 , r1 , # 0x7c00
170
+ and r1 , r1 , # 0x001f
171
+ orr r4 , r4 , r4 , lsl # 16 @ premasked g
172
+ orr r5 , r5 , r5 , lsl # 16 @ premasked b
173
+ mov r6 , # 0x00001f
174
+ orr r6 , # 0x1f0000 @ mask
175
+ .elseif \semit == 3
176
+ mov r1 , r1 , lsr # 2
177
+ bic r1 , r1 , #( 0x0c60 >> 2 )
178
+ .endif
179
+ orr r1 , r1 , r1 , lsl # 16
180
+ sub r3 , r8 , r7 @ h
181
+ mov r7 , r2 @ save w
182
+ 0 :
183
+ ldrh r8 , [ r0 ]
184
+ pld_ r0 , # 2048
185
+ tst r0 , # 2
186
+ beq 1f
187
+ sub r2 , # 1
188
+ .if \semit == 0
189
+ bic r8 , r8 , r4
190
+ semitrans0p r8 , r1 , r6 , lr
191
+ .elseif \semit == 1 || \semit == 3
192
+ bic r8 , r8 , r4
193
+ semitrans13p r8 , r1 , r6 , lr
194
+ .elseif \semit == 2
195
+ semitrans2p r8 , r1 , r4 , r5 , r6 , r9 , lr
196
+ .endif
197
+ strh r8 , [ r0 ], # 2
198
+ 1 :
199
+ ldr r8 , [ r0 ]
200
+ pld_ r0 , # 32
201
+ subs r2 , r2 , # 2
202
+ .if \semit == 0
203
+ bic r8 , r8 , r4
204
+ semitrans0p r8 , r1 , r6 , lr
205
+ .elseif \semit == 1 || \semit == 3
206
+ bic r8 , r8 , r4
207
+ semitrans13p r8 , r1 , r6 , lr
208
+ .elseif \semit == 2
209
+ semitrans2p r8 , r1 , r4 , r5 , r6 , r9 , lr
210
+ .endif
211
+ strpl r8 , [ r0 ], # 4
212
+ bpl 1b
213
+ 2 :
214
+ tst r2 , # 1
215
+ strhne r8 , [ r0 ], # 2
216
+ mov r2 , r7 @ w
217
+ add r0 , r0 , # 2048
218
+ sub r0 , r0 , r7 , lsl # 1
219
+ subs r3 , r3 , # 1
220
+ bgt 0b
221
+
222
+ ldmfd sp ! , {r4 - r9 , pc}
223
+ .cfi_endproc
224
+ .endm
225
+
226
+
227
+ tile_driver_st_m tile_driver_st0_asm , 0
228
+ tile_driver_st_m tile_driver_st1_asm , 1
229
+ tile_driver_st_m tile_driver_st3_asm , 3
230
+ #ifdef HAVE_ARMV6
231
+ tile_driver_st_m tile_driver_st2_asm , 2
232
+ #endif
233
+
98
234
@ (u16 * d , void * s , u16 * pal , int lines)
99
235
sprite_4bpp_x16_asm_:
100
236
ldr r12 ,[ r3 , # 0x18 ] @ y0
@@ -106,7 +242,7 @@ FUNCTION(sprite_4bpp_x16_asm):
106
242
stmfd sp ! , {r4 - r8 , lr}
107
243
.cfi_def_cfa_offset 4 * 6
108
244
.cfi_rel_offset lr , 4 * 5
109
- mov r12 , # 0x1e @ empty pixel
245
+ mov r12 , # 0x1e
110
246
111
247
0 :
112
248
ldmia r1 , {r4 , r5}
@@ -343,15 +479,15 @@ FUNCTION(\name):
343
479
.endm
344
480
345
481
sprite_driver_l_st sprite_driver_4bpp_l0_std_asm , 4 , 0 , - 1
482
+ sprite_driver_l_st sprite_driver_4bpp_l0_st0_asm , 4 , 0 , 0
346
483
sprite_driver_l_st sprite_driver_8bpp_l0_std_asm , 8 , 0 , - 1
484
+ sprite_driver_l_st sprite_driver_8bpp_l0_st0_asm , 8 , 0 , 0
347
485
348
486
#ifdef HAVE_ARMV6
349
487
350
- sprite_driver_l_st sprite_driver_4bpp_l0_st0_asm , 4 , 0 , 0
351
488
sprite_driver_l_st sprite_driver_4bpp_l1_std_asm , 4 , 1 , - 1
352
489
sprite_driver_l_st sprite_driver_4bpp_l1_st0_asm , 4 , 1 , 0
353
490
sprite_driver_l_st sprite_driver_4bpp_l1_st1_asm , 4 , 1 , 1
354
- sprite_driver_l_st sprite_driver_8bpp_l0_st0_asm , 8 , 0 , 0
355
491
sprite_driver_l_st sprite_driver_8bpp_l1_std_asm , 8 , 1 , - 1
356
492
sprite_driver_l_st sprite_driver_8bpp_l1_st0_asm , 8 , 1 , 0
357
493
sprite_driver_l_st sprite_driver_8bpp_l1_st1_asm , 8 , 1 , 1
@@ -414,6 +550,82 @@ FUNCTION(sprite_driver_16bpp_asm):
414
550
.cfi_endproc
415
551
416
552
553
+ @ (void * d , const gpu_unai_inner_t * inn , int count)
554
+ @ see also tile_driver_st_m
555
+ .macro poly_untex_st_m name semit
556
+ FUNCTION(\name):
557
+ .cfi_startproc
558
+ ldrh r1 , [ r1 , # 0x38 ] @ rgb
559
+ stmfd sp ! , {r4 - r7 , lr}
560
+ .cfi_def_cfa_offset 4 * 5
561
+ .cfi_rel_offset lr , 4 * 4
562
+ .if \semit != 2
563
+ mov r4 , # 0x8000
564
+ orr r4 , r4 , r4 , lsl # 16 @ mask 8000
565
+ mov r6 , # 0x420
566
+ orr r6 , r6 , # 1
567
+ orr r6 , r6 , r6 , lsl # 16 @ mask 0421
568
+ .endif
569
+ .if \semit == 2
570
+ and r4 , r1 , # 0x03e0
571
+ and r5 , r1 , # 0x7c00
572
+ and r1 , r1 , # 0x001f
573
+ orr r4 , r4 , r4 , lsl # 16 @ premasked g
574
+ orr r5 , r5 , r5 , lsl # 16 @ premasked b
575
+ mov r6 , # 0x00001f
576
+ orr r6 , # 0x1f0000 @ mask
577
+ .elseif \semit == 3
578
+ mov r1 , r1 , lsr # 2
579
+ bic r1 , r1 , #( 0x0c60 >> 2 )
580
+ .endif
581
+ orr r1 , r1 , r1 , lsl # 16
582
+ 0 :
583
+ ldrh r3 , [ r0 ]
584
+ pld_ r0 , # 2048
585
+ tst r0 , # 2
586
+ beq 1f
587
+ sub r2 , # 1
588
+ .if \semit == 0
589
+ bic r3 , r3 , r4
590
+ semitrans0p r3 , r1 , r6 , lr
591
+ .elseif \semit == 1 || \semit == 3
592
+ bic r3 , r3 , r4
593
+ semitrans13p r3 , r1 , r6 , lr
594
+ .elseif \semit == 2
595
+ semitrans2p r3 , r1 , r4 , r5 , r6 , r7 , lr
596
+ .endif
597
+ strh r3 , [ r0 ], # 2
598
+ 1 :
599
+ ldr r3 , [ r0 ]
600
+ pld_ r0 , # 32
601
+ subs r2 , r2 , # 2
602
+ .if \semit == 0
603
+ bic r3 , r3 , r4
604
+ semitrans0p r3 , r1 , r6 , lr
605
+ .elseif \semit == 1 || \semit == 3
606
+ bic r3 , r3 , r4
607
+ semitrans13p r3 , r1 , r6 , lr
608
+ .elseif \semit == 2
609
+ semitrans2p r3 , r1 , r4 , r5 , r6 , r7 , lr
610
+ .endif
611
+ strpl r3 , [ r0 ], # 4
612
+ bpl 1b
613
+ 2 :
614
+ tst r2 , # 1
615
+ strhne r3 , [ r0 ], # 2
616
+
617
+ ldmfd sp ! , {r4 - r7 , pc}
618
+ .cfi_endproc
619
+ .endm
620
+
621
+ poly_untex_st_m poly_untex_st0_asm , 0
622
+ poly_untex_st_m poly_untex_st1_asm , 1
623
+ poly_untex_st_m poly_untex_st3_asm , 3
624
+ #ifdef HAVE_ARMV6
625
+ poly_untex_st_m poly_untex_st2_asm , 2
626
+ #endif
627
+
628
+
417
629
.macro poly_4_8bpp_asm_m name bpp light semit
418
630
FUNCTION(\name): @ (void * d , const gpu_unai_inner_t * inn , int count)
419
631
.cfi_startproc
@@ -568,15 +780,15 @@ v_\name: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked
568
780
.cfi_endproc
569
781
.endm
570
782
571
- poly_4_8bpp_asm_m poly_4bpp_asm , 4 , 0 , - 1
572
- poly_4_8bpp_asm_m poly_8bpp_asm , 8 , 0 , - 1
783
+ poly_4_8bpp_asm_m poly_4bpp_asm , 4 , 0 , - 1
784
+ poly_4_8bpp_asm_m poly_4bpp_l0_st0_asm , 4 , 0 , 0
785
+ poly_4_8bpp_asm_m poly_8bpp_asm , 8 , 0 , - 1
786
+ poly_4_8bpp_asm_m poly_8bpp_l0_st0_asm , 8 , 0 , 0
573
787
574
788
#ifdef HAVE_ARMV6
575
789
576
- poly_4_8bpp_asm_m poly_4bpp_l0_st0_asm , 4 , 0 , 0
577
790
poly_4_8bpp_asm_m poly_4bpp_l1_std_asm , 4 , 1 , - 1
578
791
poly_4_8bpp_asm_m poly_4bpp_l1_st0_asm , 4 , 1 , 0
579
- poly_4_8bpp_asm_m poly_8bpp_l0_st0_asm , 8 , 0 , 0
580
792
poly_4_8bpp_asm_m poly_8bpp_l1_std_asm , 8 , 1 , - 1
581
793
poly_4_8bpp_asm_m poly_8bpp_l1_st0_asm , 8 , 1 , 0
582
794
0 commit comments