Skip to content

Commit 6aa72fa

Browse files
committed
gpu_unai: avoid unneeded gouraud calculations
1 parent f060f4b commit 6aa72fa

File tree

3 files changed

+71
-15
lines changed

3 files changed

+71
-15
lines changed

Makefile

+2
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,10 @@ else
273273
CFLAGS += -DGPU_UNAI_NO_OLD
274274
endif
275275
plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -DUSE_GPULIB=1
276+
ifneq ($(DEBUG), 1)
276277
plugins/gpu_unai/gpulib_if.o \
277278
plugins/gpu_unai/old/if.o: CFLAGS += -O3
279+
endif
278280
CC_LINK = $(CXX)
279281
endif
280282

plugins/gpu_unai/gpu_raster_polygon.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -223,13 +223,14 @@ static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVerte
223223
/*----------------------------------------------------------------------
224224
gpuDrawPolyF - Flat-shaded, untextured poly
225225
----------------------------------------------------------------------*/
226-
void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
226+
void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
227+
PolyType ptype = POLYTYPE_F)
227228
{
228229
// Set up bgr555 color to be used across calls in inner driver
229230
gpu_unai.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
230231

231232
PolyVertex vbuf[4];
232-
polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
233+
polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
233234

234235
int total_passes = is_quad ? 2 : 1;
235236
int cur_pass = 0;
@@ -374,7 +375,8 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad
374375
/*----------------------------------------------------------------------
375376
gpuDrawPolyFT - Flat-shaded, textured poly
376377
----------------------------------------------------------------------*/
377-
void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
378+
void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
379+
PolyType ptype = POLYTYPE_FT)
378380
{
379381
// r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
380382
gpu_unai.r8 = packet.U1[0];
@@ -386,7 +388,7 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua
386388
gpu_unai.b5 = packet.U1[2] >> 3;
387389

388390
PolyVertex vbuf[4];
389-
polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad);
391+
polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
390392

391393
int total_passes = is_quad ? 2 : 1;
392394
int cur_pass = 0;

plugins/gpu_unai/gpulib_if.cpp

+63-11
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,12 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word)
376376

377377
#include "../gpulib/gpu_timing.h"
378378

379+
// Strip lower 3 bits of each color and determine if lighting should be used:
380+
static inline bool need_lighting(u32 rgb_raw)
381+
{
382+
return (rgb_raw & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080);
383+
}
384+
379385
static inline void textured_sprite(int &cpu_cycles_sum, int &cpu_cycles)
380386
{
381387
u32 PRIM = le32_to_u32(gpu_unai.PacketBuffer.U4[0]) >> 24;
@@ -395,9 +401,7 @@ static inline void textured_sprite(int &cpu_cycles_sum, int &cpu_cycles)
395401
// NOTE: I've changed all textured sprite draw commands here and
396402
// elsewhere to use proper behavior, but left poly commands
397403
// alone, I don't want to slow rendering down too much. (TODO)
398-
//if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
399-
// Strip lower 3 bits of each color and determine if lighting should be used:
400-
if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
404+
if (need_lighting(le32_raw(gpu_unai.PacketBuffer.U4[0])))
401405
driver_idx |= Lighting;
402406
PS driver = gpuSpriteDrivers[driver_idx];
403407
PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer };
@@ -539,13 +543,22 @@ int do_cmd_list(u32 *list_, int list_len,
539543
// this is an untextured poly, so CF_LIGHT (texture blend)
540544
// shouldn't apply. Until the original array of template
541545
// instantiation ptrs is fixed, we're stuck with this. (TODO)
546+
u8 gouraud = 129;
547+
u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]);
548+
for (i = 1; i < 3; i++)
549+
xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 2]);
550+
if ((xor_ & HTOLE32(0xf8f8f8)) == 0)
551+
gouraud = 0;
542552
PP driver = gpuPolySpanDrivers[
543553
//(gpu_unai.blit_mask?1024:0) |
544554
Dithering |
545555
Blending_Mode |
546-
gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
556+
gpu_unai.Masking | Blending | gouraud | gpu_unai.PixelMSB
547557
];
548-
gpuDrawPolyG(packet, driver, false);
558+
if (gouraud)
559+
gpuDrawPolyG(packet, driver, false);
560+
else
561+
gpuDrawPolyF(packet, driver, false, POLYTYPE_G);
549562
gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
550563
} break;
551564

@@ -555,13 +568,28 @@ int do_cmd_list(u32 *list_, int list_len,
555568
case 0x37: { // Gouraud-shaded, textured 3-pt poly
556569
gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
557570
gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
571+
u8 lighting = Lighting;
572+
u8 gouraud = lighting ? (1<<7) : 0;
573+
if (lighting) {
574+
u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]);
575+
for (i = 1; i < 3; i++)
576+
xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 3]);
577+
if ((xor_ & HTOLE32(0xf8f8f8)) == 0) {
578+
gouraud = 0;
579+
if (!need_lighting(rgb0))
580+
lighting = 0;
581+
}
582+
}
558583
PP driver = gpuPolySpanDrivers[
559584
//(gpu_unai.blit_mask?1024:0) |
560585
Dithering |
561586
Blending_Mode | gpu_unai.TEXT_MODE |
562-
gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
587+
gpu_unai.Masking | Blending | gouraud | lighting | gpu_unai.PixelMSB
563588
];
564-
gpuDrawPolyGT(packet, driver, false);
589+
if (gouraud)
590+
gpuDrawPolyGT(packet, driver, false); // is_quad = true
591+
else
592+
gpuDrawPolyFT(packet, driver, false, POLYTYPE_GT);
565593
gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
566594
} break;
567595

@@ -570,13 +598,22 @@ int do_cmd_list(u32 *list_, int list_len,
570598
case 0x3A:
571599
case 0x3B: { // Gouraud-shaded 4-pt poly
572600
// See notes regarding '129' for 0x30..0x33 further above -senquack
601+
u8 gouraud = 129;
602+
u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]);
603+
for (i = 1; i < 4; i++)
604+
xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 2]);
605+
if ((xor_ & HTOLE32(0xf8f8f8)) == 0)
606+
gouraud = 0;
573607
PP driver = gpuPolySpanDrivers[
574608
//(gpu_unai.blit_mask?1024:0) |
575609
Dithering |
576610
Blending_Mode |
577-
gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
611+
gpu_unai.Masking | Blending | gouraud | gpu_unai.PixelMSB
578612
];
579-
gpuDrawPolyG(packet, driver, true); // is_quad = true
613+
if (gouraud)
614+
gpuDrawPolyG(packet, driver, true); // is_quad = true
615+
else
616+
gpuDrawPolyF(packet, driver, true, POLYTYPE_G);
580617
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
581618
} break;
582619

@@ -598,13 +635,28 @@ int do_cmd_list(u32 *list_, int list_len,
598635
break;
599636
}
600637
gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
638+
u8 lighting = Lighting;
639+
u8 gouraud = lighting ? (1<<7) : 0;
640+
if (lighting) {
641+
u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]);
642+
for (i = 1; i < 4; i++)
643+
xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 3]);
644+
if ((xor_ & HTOLE32(0xf8f8f8)) == 0) {
645+
gouraud = 0;
646+
if (!need_lighting(rgb0))
647+
lighting = 0;
648+
}
649+
}
601650
PP driver = gpuPolySpanDrivers[
602651
//(gpu_unai.blit_mask?1024:0) |
603652
Dithering |
604653
Blending_Mode | gpu_unai.TEXT_MODE |
605-
gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
654+
gpu_unai.Masking | Blending | gouraud | lighting | gpu_unai.PixelMSB
606655
];
607-
gpuDrawPolyGT(packet, driver, true); // is_quad = true
656+
if (gouraud)
657+
gpuDrawPolyGT(packet, driver, true); // is_quad = true
658+
else
659+
gpuDrawPolyFT(packet, driver, true, POLYTYPE_GT);
608660
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
609661
} break;
610662

0 commit comments

Comments
 (0)