Skip to content

Commit b3bce60

Browse files
pcercueinotaz
authored andcommitted
unai: Use 8.8 RGB triplet format for gouraud shading
Use a new 'gcol_t' type which fits in 64 bits, and encode each color component into a 8.8 fixed-point format. This boosts the precision of the gouraud shading algorithm, at almost zero cost. Fixes #320. Signed-off-by: Paul Cercueil <paul@crapouillou.net>
1 parent 846344d commit b3bce60

File tree

3 files changed

+65
-62
lines changed

3 files changed

+65
-62
lines changed

plugins/gpu_unai/gpu_inner.h

+6-9
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,10 @@
5959
#include "gpu_inner_blend_arm.h"
6060
#include "gpu_inner_light_arm.h"
6161
#define gpuBlending gpuBlendingARM
62-
#define gpuLightingRGB gpuLightingRGBARM
6362
#define gpuLightingTXT gpuLightingTXTARM
64-
#define gpuLightingTXTGouraud gpuLightingTXTGouraudARM
6563
#else
6664
#define gpuBlending gpuBlendingGeneric
67-
#define gpuLightingRGB gpuLightingRGBGeneric
6865
#define gpuLightingTXT gpuLightingTXTGeneric
69-
#define gpuLightingTXTGouraud gpuLightingTXTGouraudGeneric
7066
#endif
7167

7268
// Non-dithering lighting and blending functions preserve uSrc
@@ -537,8 +533,8 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
537533
else
538534
{
539535
// UNTEXTURED, GOURAUD
540-
u32 l_gCol = gpu_unai.gCol;
541-
u32 l_gInc = gpu_unai.gInc;
536+
gcol_t l_gCol = gpu_unai.gCol;
537+
gcol_t l_gInc = gpu_unai.gInc;
542538

543539
do {
544540
uint_fast16_t uDst, uSrc;
@@ -570,7 +566,7 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
570566

571567
endpolynotextgou:
572568
pDst++;
573-
l_gCol += l_gInc;
569+
l_gCol.raw += l_gInc.raw;
574570
}
575571
while (--count);
576572
}
@@ -594,7 +590,7 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
594590
u8 r5, g5, b5;
595591
u8 r8, g8, b8;
596592

597-
u32 l_gInc, l_gCol;
593+
gcol_t l_gInc, l_gCol;
598594

599595
if (CF_LIGHT) {
600596
if (CF_GOURAUD) {
@@ -678,7 +674,8 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
678674
pDst++;
679675
l_u = (l_u + l_u_inc) & l_u_msk;
680676
l_v = (l_v + l_v_inc) & l_v_msk;
681-
if (CF_LIGHT && CF_GOURAUD) l_gCol += l_gInc;
677+
if (CF_LIGHT && CF_GOURAUD)
678+
l_gCol.raw += l_gInc.raw;
682679
}
683680
while (--count);
684681
}

plugins/gpu_unai/gpu_inner_light.h

+46-49
Original file line numberDiff line numberDiff line change
@@ -72,90 +72,89 @@ static void SetupLightLUT()
7272

7373

7474
////////////////////////////////////////////////////////////////////////////////
75-
// Create packed Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet
75+
// Create packed Gouraud fixed-pt 8.8 rgb triplet
7676
//
7777
// INPUT:
7878
// 'r','g','b' are 8.10 fixed-pt color components (r shown here)
7979
// 'r' input: --------------rrrrrrrrXXXXXXXXXX
8080
// ^ bit 31
8181
// RETURNS:
82-
// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
83-
// ^ bit 31
82+
// gcol_t output: ccccccccXXXXXXXX for c in [r, g, b]
83+
// ^ bit 16
8484
// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '-' don't care
8585
////////////////////////////////////////////////////////////////////////////////
86-
GPU_INLINE u32 gpuPackGouraudCol(u32 r, u32 g, u32 b)
86+
GPU_INLINE gcol_t gpuPackGouraudCol(u32 r, u32 g, u32 b)
8787
{
88-
return ((u32)(b>> 8)&(0x03ff ))
89-
| ((u32)(g<< 3)&(0x07ff<<10))
90-
| ((u32)(r<<14)&(0x07ff<<21));
88+
return (gcol_t){
89+
(u16)(r >> 2),
90+
(u16)(g >> 2),
91+
(u16)(b >> 2),
92+
};
9193
}
9294

93-
9495
////////////////////////////////////////////////////////////////////////////////
95-
// Create packed increment for Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet
96+
// Create packed increment for Gouraud fixed-pt 8.8 rgb triplet
9697
//
9798
// INPUT:
9899
// Sign-extended 8.10 fixed-pt r,g,b color increment values (only dr is shown)
99100
// 'dr' input: ssssssssssssssrrrrrrrrXXXXXXXXXX
100101
// ^ bit 31
101102
// RETURNS:
102-
// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
103-
// ^ bit 31
103+
// gcol_t output: ccccccccXXXXXXXX for c in [r, g, b]
104+
// ^ bit 16
104105
// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and 's' sign bits
105106
//
106107
// NOTE: The correctness of this code/method has not been fully verified,
107108
// having been merely factored out from original code in
108109
// poly-drawing functions. Feel free to check/improve it -senquack
109110
////////////////////////////////////////////////////////////////////////////////
110-
GPU_INLINE u32 gpuPackGouraudColInc(s32 dr, s32 dg, s32 db)
111+
GPU_INLINE gcol_t gpuPackGouraudColInc(s32 dr, s32 dg, s32 db)
111112
{
112-
u32 dr_tmp = (u32)(dr << 14)&(0xffffffff<<21); if (dr < 0) dr_tmp += 1<<21;
113-
u32 dg_tmp = (u32)(dg << 3)&(0xffffffff<<10); if (dg < 0) dg_tmp += 1<<10;
114-
u32 db_tmp = (u32)(db >> 8)&(0xffffffff ); if (db < 0) db_tmp += 1<< 0;
115-
return db_tmp + dg_tmp + dr_tmp;
113+
return (gcol_t){
114+
(u16)((dr >> 2) + (dr < 0)),
115+
(u16)((dg >> 2) + (dg < 0)),
116+
(u16)((db >> 2) + (db < 0)),
117+
};
116118
}
117119

118-
119120
////////////////////////////////////////////////////////////////////////////////
120-
// Extract bgr555 color from Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet
121+
// Extract bgr555 color from Gouraud u32 fixed-pt 8.8 rgb triplet
121122
//
122123
// INPUT:
123-
// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
124-
// ^ bit 31
124+
// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b]
125+
// ^ bit 16
125126
// RETURNS:
126127
// u16 output: 0bbbbbgggggrrrrr
127128
// ^ bit 16
128129
// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '0' zero
129130
////////////////////////////////////////////////////////////////////////////////
130-
GPU_INLINE uint_fast16_t gpuLightingRGBGeneric(u32 gCol)
131+
GPU_INLINE uint_fast16_t gpuLightingRGB(gcol_t gCol)
131132
{
132-
return ((gCol<< 5)&0x7C00) |
133-
((gCol>>11)&0x03E0) |
134-
(gCol>>27);
133+
return (gCol.c.r >> 11) |
134+
((gCol.c.g >> 6) & 0x3e0) |
135+
((gCol.c.b >> 1) & 0x7c00);
135136
}
136137

137-
138138
////////////////////////////////////////////////////////////////////////////////
139-
// Convert packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet in 'gCol'
140-
// to padded u32 5.4:5.4:5.4 bgr fixed-pt triplet, suitable for use
139+
// Convert packed Gouraud u32 fixed-pt 8.8 rgb triplet in 'gCol'
140+
// to padded u32 5.4 bgr fixed-pt triplet, suitable for use
141141
// with HQ 24-bit lighting/quantization.
142142
//
143143
// INPUT:
144-
// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
145-
// ^ bit 31
144+
// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b]
145+
// ^ bit 16
146146
// RETURNS:
147147
// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
148148
// ^ bit 31
149149
// Where 'X' are fixed-pt bits, '0' zero-padding, and '-' is don't care
150150
////////////////////////////////////////////////////////////////////////////////
151-
GPU_INLINE u32 gpuLightingRGB24(u32 gCol)
151+
GPU_INLINE u32 gpuLightingRGB24(gcol_t gCol)
152152
{
153-
return ((gCol<<19) & (0x1FF<<20)) |
154-
((gCol>> 2) & (0x1FF<<10)) |
155-
(gCol>>23);
153+
return (gCol.c.r >> 7)
154+
| ((gCol.c.g >> 7) << 10)
155+
| ((gCol.c.b >> 7) << 20);
156156
}
157157

158-
159158
////////////////////////////////////////////////////////////////////////////////
160159
// Apply fast (low-precision) 5-bit lighting to bgr555 texture color:
161160
//
@@ -181,25 +180,23 @@ GPU_INLINE uint_fast16_t gpuLightingTXTGeneric(uint_fast16_t uSrc, u8 r5, u8 g5,
181180
// Apply fast (low-precision) 5-bit Gouraud lighting to bgr555 texture color:
182181
//
183182
// INPUT:
184-
// 'gCol' is a packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet, value of
185-
// 15.0 is midpoint that does not modify color of texture
186-
// gCol input : rrrrrXXXXXXgggggXXXXXXbbbbbXXXXX
187-
// ^ bit 31
183+
// 'gCol' is a Gouraud fixed-pt 8.8 rgb triplet
184+
// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b]
185+
// ^ bit 16
188186
// 'uSrc' input: -bbbbbgggggrrrrr
189187
// ^ bit 16
190188
// RETURNS:
191189
// u16 output: 0bbbbbgggggrrrrr
192190
// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
193191
////////////////////////////////////////////////////////////////////////////////
194-
GPU_INLINE uint_fast16_t gpuLightingTXTGouraudGeneric(uint_fast16_t uSrc, u32 gCol)
192+
GPU_INLINE uint_fast16_t gpuLightingTXTGouraud(uint_fast16_t uSrc, gcol_t gCol)
195193
{
196-
return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | ((gCol>> 5)&0x1F)]<<10) |
197-
(gpu_unai.LightLUT[ (uSrc&0x03E0) | ((gCol>>16)&0x1F)]<< 5) |
198-
(gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol>>27) ]) |
194+
return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | (gCol.c.b >> 11)] << 10) |
195+
(gpu_unai.LightLUT[ (uSrc&0x03E0) | (gCol.c.g >> 11)] << 5) |
196+
(gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol.c.r >> 11)]) |
199197
(uSrc & 0x8000);
200198
}
201199

202-
203200
////////////////////////////////////////////////////////////////////////////////
204201
// Apply high-precision 8-bit lighting to bgr555 texture color,
205202
// returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet
@@ -244,22 +241,22 @@ GPU_INLINE u32 gpuLightingTXT24(uint_fast16_t uSrc, u8 r8, u8 g8, u8 b8)
244241
// INPUT:
245242
// 'uSrc' input: -bbbbbgggggrrrrr
246243
// ^ bit 16
247-
// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
248-
// ^ bit 31
244+
// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b]
245+
// ^ bit 16
249246
// RETURNS:
250247
// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
251248
// ^ bit 31
252249
// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
253250
////////////////////////////////////////////////////////////////////////////////
254-
GPU_INLINE u32 gpuLightingTXT24Gouraud(uint_fast16_t uSrc, u32 gCol)
251+
GPU_INLINE u32 gpuLightingTXT24Gouraud(uint_fast16_t uSrc, gcol_t gCol)
255252
{
256253
uint_fast16_t r1 = uSrc&0x001F;
257254
uint_fast16_t g1 = uSrc&0x03E0;
258255
uint_fast16_t b1 = uSrc&0x7C00;
259256

260-
uint_fast16_t r2 = (gCol>>24) & 0xFF;
261-
uint_fast16_t g2 = (gCol>>13) & 0xFF;
262-
uint_fast16_t b2 = (gCol>> 2) & 0xFF;
257+
uint_fast16_t r2 = gCol.c.r >> 8;
258+
uint_fast16_t g2 = gCol.c.g >> 8;
259+
uint_fast16_t b2 = gCol.c.b >> 8;
263260

264261
u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000;
265262
u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000;

plugins/gpu_unai/gpu_unai.h

+13-4
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@
5353
#define u32 uint32_t
5454
#define s32 int32_t
5555
#define s64 int64_t
56+
#define u64 uint64_t
57+
58+
typedef union {
59+
struct {
60+
u16 r, g, b;
61+
} c;
62+
u64 raw;
63+
} gcol_t;
5664

5765
typedef struct {
5866
u32 v;
@@ -253,11 +261,12 @@ struct gpu_unai_t {
253261
s32 u_inc, v_inc;
254262

255263
// Color for Gouraud-shaded prims
264+
// Fixed-pt 8.8 rgb triplet
256265
// Packed fixed-pt 8.3:8.3:8.2 rgb triplet
257-
// layout: rrrrrrrrXXXggggggggXXXbbbbbbbbXX
258-
// ^ bit 31 ^ bit 0
259-
u32 gCol;
260-
u32 gInc; // Increment along scanline for gCol
266+
// layout: ccccccccXXXXXXXX for c in [r, g, b]
267+
// ^ bit 16
268+
gcol_t gCol;
269+
gcol_t gInc; // Increment along scanline for gCol
261270

262271
// Color for flat-shaded, texture-blended prims
263272
u8 r5, g5, b5; // 5-bit light for undithered prims

0 commit comments

Comments
 (0)