Skip to content

Commit f060f4b

Browse files
committed
rect quad optimizations
1 parent 8e6b629 commit f060f4b

File tree

8 files changed

+423
-172
lines changed

8 files changed

+423
-172
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ plugins/dfsound/out.o: CFLAGS += -DHAVE_LIBRETRO
229229
endif
230230

231231
# builtin gpu
232-
OBJS += plugins/gpulib/gpu.o plugins/gpulib/vout_pl.o
232+
OBJS += plugins/gpulib/gpu.o plugins/gpulib/vout_pl.o plugins/gpulib/prim.o
233233
ifeq "$(BUILTIN_GPU)" "neon"
234234
CFLAGS += -DGPU_NEON
235235
OBJS += plugins/gpu_neon/psx_gpu_if.o

include/compiler_features.h

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#ifdef __GNUC__
33
# define likely(x) __builtin_expect((x),1)
44
# define unlikely(x) __builtin_expect((x),0)
5+
# define preload __builtin_prefetch
56
# ifdef __clang__
67
# define noinline __attribute__((noinline))
78
# else
@@ -11,6 +12,7 @@
1112
#else
1213
# define likely(x) (x)
1314
# define unlikely(x) (x)
15+
# define preload (x)
1416
# define noinline
1517
# define attr_unused
1618
#endif

plugins/gpu_neon/psx_gpu/psx_gpu_parse.c

+104-106
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "common.h"
1818
#include "../../gpulib/gpu_timing.h"
19+
#include "../../gpulib/gpu.h"
1920

2021
#ifndef command_lengths
2122
const u8 command_lengths[256] =
@@ -245,12 +246,27 @@ static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y,
245246
#define SET_Ex(r, v)
246247
#endif
247248

249+
static void textured_sprite(psx_gpu_struct *psx_gpu, const u32 *list,
250+
s32 width, s32 height, u32 *cpu_cycles_sum, u32 *cpu_cycles)
251+
{
252+
s32 x = sign_extend_11bit(list[1] + psx_gpu->offset_x);
253+
s32 y = sign_extend_11bit((list[1] >> 16) + psx_gpu->offset_y);
254+
u8 v = (list[2] >> 8) & 0xff;
255+
u8 u = list[2] & 0xff;
256+
257+
set_clut(psx_gpu, list[2] >> 16);
258+
259+
render_sprite(psx_gpu, x, y, u, v, &width, &height, list[0] >> 24, list[0]);
260+
gput_sum(*cpu_cycles_sum, *cpu_cycles, gput_sprite(width, height));
261+
}
262+
248263
u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
249264
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
250265
{
251266
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
252267
u32 current_command = 0, command_length;
253268
u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last;
269+
u32 siplified_prim[4*4];
254270

255271
u32 *list_start = list;
256272
u32 *list_end = list + (size / 4);
@@ -328,8 +344,19 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
328344

329345
case 0x2C ... 0x2F:
330346
{
331-
set_clut(psx_gpu, list_s16[5]);
332-
set_texture(psx_gpu, list_s16[9]);
347+
u32 i, simplified_count;
348+
set_texture(psx_gpu, list[4] >> 16);
349+
if ((simplified_count = prim_try_simplify_quad_t(siplified_prim, list)))
350+
{
351+
for (i = 0; i < simplified_count; i++) {
352+
const u32 *list_ = &siplified_prim[i * 4];
353+
textured_sprite(psx_gpu, list_, list_[3] & 0x3FF,
354+
(list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
355+
}
356+
break;
357+
}
358+
359+
set_clut(psx_gpu, list[2] >> 16);
333360
set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
334361

335362
get_vertex_data_xy_uv(0, 2);
@@ -383,8 +410,19 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
383410

384411
case 0x3C ... 0x3F:
385412
{
386-
set_clut(psx_gpu, list_s16[5]);
387-
set_texture(psx_gpu, list_s16[11]);
413+
u32 i, simplified_count;
414+
set_texture(psx_gpu, list[5] >> 16);
415+
if ((simplified_count = prim_try_simplify_quad_gt(siplified_prim, list)))
416+
{
417+
for (i = 0; i < simplified_count; i++) {
418+
const u32 *list_ = &siplified_prim[i * 4];
419+
textured_sprite(psx_gpu, list_, list_[3] & 0x3FF,
420+
(list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
421+
}
422+
break;
423+
}
424+
425+
set_clut(psx_gpu, list[2] >> 16);
388426

389427
get_vertex_data_xy_uv_rgb(0, 0);
390428
get_vertex_data_xy_uv_rgb(1, 6);
@@ -525,23 +563,12 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
525563
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
526564
break;
527565
}
528-
529-
case 0x64 ... 0x67:
530-
{
531-
u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
532-
u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
533-
u32 uv = list_s16[4];
534-
s32 width = list_s16[6] & 0x3FF;
535-
s32 height = list_s16[7] & 0x1FF;
536-
537-
set_clut(psx_gpu, list_s16[5]);
538566

539-
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
540-
&width, &height, current_command, list[0]);
541-
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
567+
case 0x64 ... 0x67:
568+
textured_sprite(psx_gpu, list, list[3] & 0x3FF, (list[3] >> 16) & 0x1FF,
569+
&cpu_cycles_sum, &cpu_cycles);
542570
break;
543-
}
544-
571+
545572
case 0x68 ... 0x6B:
546573
{
547574
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
@@ -565,22 +592,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
565592
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
566593
break;
567594
}
568-
569-
case 0x74 ... 0x77:
570-
{
571-
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
572-
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
573-
u32 uv = list_s16[4];
574-
s32 width = 8, height = 8;
575595

576-
set_clut(psx_gpu, list_s16[5]);
577-
578-
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
579-
&width, &height, current_command, list[0]);
580-
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
596+
case 0x74 ... 0x77:
597+
textured_sprite(psx_gpu, list, 8, 8, &cpu_cycles_sum, &cpu_cycles);
581598
break;
582-
}
583-
599+
584600
case 0x78 ... 0x7B:
585601
{
586602
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
@@ -594,19 +610,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
594610
}
595611

596612
case 0x7C ... 0x7F:
597-
{
598-
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
599-
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
600-
u32 uv = list_s16[4];
601-
s32 width = 16, height = 16;
602-
603-
set_clut(psx_gpu, list_s16[5]);
604-
605-
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
606-
&width, &height, current_command, list[0]);
607-
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
613+
textured_sprite(psx_gpu, list, 16, 16, &cpu_cycles_sum, &cpu_cycles);
608614
break;
609-
}
610615

611616
#ifdef PCSX
612617
case 0x1F: // irq?
@@ -1155,12 +1160,31 @@ static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y,
11551160
}
11561161
#endif
11571162

1163+
static void textured_sprite_enh(psx_gpu_struct *psx_gpu, const u32 *list,
1164+
s32 width, s32 height, u32 *cpu_cycles_sum, u32 *cpu_cycles)
1165+
{
1166+
s32 x = sign_extend_11bit(list[1] + psx_gpu->offset_x);
1167+
s32 y = sign_extend_11bit((list[1] >> 16) + psx_gpu->offset_y);
1168+
s32 width_b = width, height_b = height;
1169+
u8 v = (list[2] >> 8) & 0xff;
1170+
u8 u = list[2] & 0xff;
1171+
1172+
set_clut(psx_gpu, list[2] >> 16);
1173+
1174+
render_sprite(psx_gpu, x, y, u, v, &width, &height, list[0] >> 24, list[0]);
1175+
gput_sum(*cpu_cycles_sum, *cpu_cycles, gput_sprite(width, height));
1176+
1177+
if (check_enhanced_range(psx_gpu, x, x + width))
1178+
do_sprite_enhanced(psx_gpu, x, y, u, v, width_b, height_b, list[0]);
1179+
}
1180+
11581181
u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
11591182
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
11601183
{
11611184
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
11621185
u32 current_command = 0, command_length;
11631186
u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last;
1187+
u32 siplified_prim[4*4];
11641188

11651189
u32 *list_start = list;
11661190
u32 *list_end = list + (size / 4);
@@ -1265,8 +1289,19 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
12651289

12661290
case 0x2C ... 0x2F:
12671291
{
1268-
set_clut(psx_gpu, list_s16[5]);
1269-
set_texture(psx_gpu, list_s16[9]);
1292+
u32 i, simplified_count;
1293+
set_texture(psx_gpu, list[4] >> 16);
1294+
if ((simplified_count = prim_try_simplify_quad_t(siplified_prim, list)))
1295+
{
1296+
for (i = 0; i < simplified_count; i++) {
1297+
const u32 *list_ = &siplified_prim[i * 4];
1298+
textured_sprite_enh(psx_gpu, list_, list_[3] & 0x3FF,
1299+
(list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
1300+
}
1301+
break;
1302+
}
1303+
1304+
set_clut(psx_gpu, list[2] >> 16);
12701305
set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
12711306

12721307
get_vertex_data_xy_uv(0, 2);
@@ -1318,8 +1353,19 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
13181353

13191354
case 0x3C ... 0x3F:
13201355
{
1321-
set_clut(psx_gpu, list_s16[5]);
1322-
set_texture(psx_gpu, list_s16[11]);
1356+
u32 i, simplified_count;
1357+
set_texture(psx_gpu, list[5] >> 16);
1358+
if ((simplified_count = prim_try_simplify_quad_gt(siplified_prim, list)))
1359+
{
1360+
for (i = 0; i < simplified_count; i++) {
1361+
const u32 *list_ = &siplified_prim[i * 4];
1362+
textured_sprite_enh(psx_gpu, list_, list_[3] & 0x3FF,
1363+
(list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
1364+
}
1365+
break;
1366+
}
1367+
1368+
set_clut(psx_gpu, list[2] >> 16);
13231369

13241370
get_vertex_data_xy_uv_rgb(0, 0);
13251371
get_vertex_data_xy_uv_rgb(1, 6);
@@ -1475,30 +1521,12 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
14751521
}
14761522
break;
14771523
}
1478-
1479-
case 0x64 ... 0x67:
1480-
{
1481-
u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
1482-
u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
1483-
u8 u = list_s16[4];
1484-
u8 v = list_s16[4] >> 8;
1485-
s32 width = list_s16[6] & 0x3FF;
1486-
s32 height = list_s16[7] & 0x1FF;
1487-
1488-
set_clut(psx_gpu, list_s16[5]);
14891524

1490-
render_sprite(psx_gpu, x, y, u, v,
1491-
&width, &height, current_command, list[0]);
1492-
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
1493-
1494-
if (check_enhanced_range(psx_gpu, x, x + width)) {
1495-
width = list_s16[6] & 0x3FF;
1496-
height = list_s16[7] & 0x1FF;
1497-
do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]);
1498-
}
1525+
case 0x64 ... 0x67:
1526+
textured_sprite_enh(psx_gpu, list, list[3] & 0x3FF, (list[3] >> 16) & 0x1FF,
1527+
&cpu_cycles_sum, &cpu_cycles);
14991528
break;
1500-
}
1501-
1529+
15021530
case 0x68 ... 0x6B:
15031531
{
15041532
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
@@ -1528,26 +1556,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
15281556
do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]);
15291557
break;
15301558
}
1531-
1532-
case 0x74 ... 0x77:
1533-
{
1534-
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
1535-
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
1536-
u8 u = list_s16[4];
1537-
u8 v = list_s16[4] >> 8;
1538-
s32 width = 8, height = 8;
15391559

1540-
set_clut(psx_gpu, list_s16[5]);
1541-
1542-
render_sprite(psx_gpu, x, y, u, v,
1543-
&width, &height, current_command, list[0]);
1544-
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
1545-
1546-
if (check_enhanced_range(psx_gpu, x, x + 8))
1547-
do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]);
1560+
case 0x74 ... 0x77:
1561+
textured_sprite_enh(psx_gpu, list, 8, 8, &cpu_cycles_sum, &cpu_cycles);
15481562
break;
1549-
}
1550-
1563+
15511564
case 0x78 ... 0x7B:
15521565
{
15531566
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
@@ -1562,25 +1575,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
15621575
do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]);
15631576
break;
15641577
}
1565-
1566-
case 0x7C ... 0x7F:
1567-
{
1568-
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
1569-
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
1570-
u8 u = list_s16[4];
1571-
u8 v = list_s16[4] >> 8;
1572-
s32 width = 16, height = 16;
15731578

1574-
set_clut(psx_gpu, list_s16[5]);
1575-
1576-
render_sprite(psx_gpu, x, y, u, v,
1577-
&width, &height, current_command, list[0]);
1578-
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
1579-
1580-
if (check_enhanced_range(psx_gpu, x, x + 16))
1581-
do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]);
1579+
case 0x7C ... 0x7F:
1580+
textured_sprite_enh(psx_gpu, list, 16, 16, &cpu_cycles_sum, &cpu_cycles);
15821581
break;
1583-
}
15841582

15851583
case 0x80 ... 0x9F: // vid -> vid
15861584
case 0xA0 ... 0xBF: // sys -> vid

0 commit comments

Comments
 (0)