Skip to content

Commit 7ea9ff0

Browse files
authored
Feature/var byte encoding (shader-slang#665)
* * Remove the need for IRHighLevelDecoration in Emit * Use the IRLayoutDecoration for GeometryShaderPrimitiveTypeModifier * Initial look at at variable byte encoding, and simple unit test. * Fixing problems with comparison due to naming differences with slang/fxc. * * More tests and perf improvements for byte encoding. * Mechanism to detect processor and processor features in main slang header. * Split out cpu based defines into slang-cpu-defines.h so do not polute slang.h * Support for variable byte encoding on serialization. * Removed unused flag. * Fix warning. * Fix calcMsByte32 for 0 values without using intrinsic. * Fix a mistake in calculating maximum instruction size.
1 parent 4cb2a19 commit 7ea9ff0

11 files changed

+1393
-57
lines changed

source/core/core.vcxproj

+3-1
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@
182182
<ClInclude Include="list.h" />
183183
<ClInclude Include="platform.h" />
184184
<ClInclude Include="secure-crt.h" />
185+
<ClInclude Include="slang-byte-encode-util.h" />
185186
<ClInclude Include="slang-free-list.h" />
186187
<ClInclude Include="slang-io.h" />
187188
<ClInclude Include="slang-math.h" />
@@ -197,6 +198,7 @@
197198
</ItemGroup>
198199
<ItemGroup>
199200
<ClCompile Include="platform.cpp" />
201+
<ClCompile Include="slang-byte-encode-util.cpp" />
200202
<ClCompile Include="slang-free-list.cpp" />
201203
<ClCompile Include="slang-io.cpp" />
202204
<ClCompile Include="slang-memory-arena.cpp" />
@@ -213,4 +215,4 @@
213215
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
214216
<ImportGroup Label="ExtensionTargets">
215217
</ImportGroup>
216-
</Project>
218+
</Project>

source/core/core.vcxproj.filters

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<?xml version="1.0" encoding="utf-8"?>
1+
<?xml version="1.0" encoding="utf-8"?>
22
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
33
<ItemGroup>
44
<Filter Include="Header Files">
@@ -81,6 +81,9 @@
8181
<ClInclude Include="type-traits.h">
8282
<Filter>Header Files</Filter>
8383
</ClInclude>
84+
<ClInclude Include="slang-byte-encode-util.h">
85+
<Filter>Header Files</Filter>
86+
</ClInclude>
8487
</ItemGroup>
8588
<ItemGroup>
8689
<ClCompile Include="platform.cpp">
@@ -113,6 +116,9 @@
113116
<ClCompile Include="token-reader.cpp">
114117
<Filter>Source Files</Filter>
115118
</ClCompile>
119+
<ClCompile Include="slang-byte-encode-util.cpp">
120+
<Filter>Source Files</Filter>
121+
</ClCompile>
116122
</ItemGroup>
117123
<ItemGroup>
118124
<None Include="core.natvis">

source/core/list.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ namespace Slang
472472
if (bufferSize > _count && _count > 0)
473473
{
474474
T * newBuffer = Allocate(_count);
475-
for (int i = 0; i < _count; i++)
475+
for (UInt i = 0; i < _count; i++)
476476
newBuffer[i] = static_cast<T&&>(buffer[i]);
477477
FreeBuffer();
478478
buffer = newBuffer;
+283
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
#include "slang-byte-encode-util.h"
2+
3+
4+
5+
namespace Slang {
6+
7+
// Descriptions of algorithms here...
8+
// https://github.com/stoklund/varint
9+
10+
#if SLANG_LITTLE_ENDIAN && SLANG_UNALIGNED_ACCESS
11+
// Testing on i7, unaligned access is around 40% faster
12+
# define SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS 1
13+
#endif
14+
15+
#ifndef SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS
16+
# define SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS 0
17+
#endif
18+
19+
#define SLANG_REPEAT_2(n) n, n
20+
#define SLANG_REPEAT_4(n) SLANG_REPEAT_2(n), SLANG_REPEAT_2(n)
21+
#define SLANG_REPEAT_8(n) SLANG_REPEAT_4(n), SLANG_REPEAT_4(n)
22+
#define SLANG_REPEAT_16(n) SLANG_REPEAT_8(n), SLANG_REPEAT_8(n)
23+
#define SLANG_REPEAT_32(n) SLANG_REPEAT_16(n), SLANG_REPEAT_16(n)
24+
#define SLANG_REPEAT_64(n) SLANG_REPEAT_32(n), SLANG_REPEAT_32(n)
25+
#define SLANG_REPEAT_128(n) SLANG_REPEAT_64(n), SLANG_REPEAT_64(n)
26+
27+
/* static */const int8_t ByteEncodeUtil::s_msb8[256] =
28+
{
29+
- 1,
30+
0,
31+
SLANG_REPEAT_2(1),
32+
SLANG_REPEAT_4(2),
33+
SLANG_REPEAT_8(3),
34+
SLANG_REPEAT_16(4),
35+
SLANG_REPEAT_32(5),
36+
SLANG_REPEAT_64(6),
37+
SLANG_REPEAT_128(7),
38+
};
39+
40+
/* static */size_t ByteEncodeUtil::calcEncodeLiteSizeUInt32(const uint32_t* in, size_t num)
41+
{
42+
size_t totalNumEncodeBytes = 0;
43+
44+
for (size_t i = 0; i < num; i++)
45+
{
46+
const uint32_t v = in[i];
47+
48+
if (v < kLiteCut1)
49+
{
50+
totalNumEncodeBytes += 1;
51+
}
52+
else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1))
53+
{
54+
totalNumEncodeBytes += 2;
55+
}
56+
else
57+
{
58+
totalNumEncodeBytes += calcNonZeroMsByte32(v) + 2;
59+
}
60+
}
61+
return totalNumEncodeBytes;
62+
}
63+
64+
/* static */size_t ByteEncodeUtil::encodeLiteUInt32(const uint32_t* in, size_t num, uint8_t* encodeOut)
65+
{
66+
uint8_t* encodeStart = encodeOut;
67+
68+
for (size_t i = 0; i < num; ++i)
69+
{
70+
uint32_t v = in[i];
71+
72+
if(v < kLiteCut1)
73+
{
74+
*encodeOut++ = uint8_t(v);
75+
}
76+
else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1))
77+
{
78+
v -= kLiteCut1;
79+
80+
encodeOut[0] = uint8_t(kLiteCut1 + (v >> 8));
81+
encodeOut[1] = uint8_t(v);
82+
encodeOut += 2;
83+
}
84+
else
85+
{
86+
uint8_t* encodeOutStart = encodeOut++;
87+
while (v)
88+
{
89+
*encodeOut++ = uint8_t(v);
90+
v >>= 8;
91+
}
92+
// Finally write the size to the start
93+
const int numBytes = int(encodeOut - encodeOutStart);
94+
encodeOutStart[0] = uint8_t(kLiteCut2 + (numBytes - 2));
95+
}
96+
}
97+
return size_t(encodeOut - encodeStart);
98+
}
99+
100+
/* static */void ByteEncodeUtil::encodeLiteUInt32(const uint32_t* in, size_t num, List<uint8_t>& encodeArrayOut)
101+
{
102+
// Make sure there is at least enough space for all bytes
103+
encodeArrayOut.SetSize(num);
104+
105+
uint8_t* encodeOut = encodeArrayOut.begin();
106+
uint8_t* encodeOutEnd = encodeArrayOut.end();
107+
108+
for (size_t i = 0; i < num; ++i)
109+
{
110+
// Check if we need some more space
111+
if (encodeOut + kMaxLiteEncodeUInt32 > encodeOutEnd)
112+
{
113+
const size_t offset = size_t(encodeOut - encodeArrayOut.begin());
114+
115+
const UInt oldCapacity = encodeArrayOut.Capacity();
116+
117+
// Make some more space
118+
encodeArrayOut.Reserve(oldCapacity + (oldCapacity >> 1) + kMaxLiteEncodeUInt32);
119+
// Make the size the capacity
120+
const UInt capacity = encodeArrayOut.Capacity();
121+
encodeArrayOut.SetSize(capacity);
122+
123+
encodeOut = encodeArrayOut.begin() + offset;
124+
encodeOutEnd = encodeArrayOut.end();
125+
}
126+
127+
uint32_t v = in[i];
128+
129+
if (v < kLiteCut1)
130+
{
131+
*encodeOut++ = uint8_t(v);
132+
}
133+
else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1))
134+
{
135+
v -= kLiteCut1;
136+
137+
encodeOut[0] = uint8_t(kLiteCut1 + (v >> 8));
138+
encodeOut[1] = uint8_t(v);
139+
encodeOut += 2;
140+
}
141+
else
142+
{
143+
uint8_t* encodeOutStart = encodeOut++;
144+
while (v)
145+
{
146+
*encodeOut++ = uint8_t(v);
147+
v >>= 8;
148+
}
149+
// Finally write the size to the start
150+
const int numBytes = int(encodeOut - encodeOutStart);
151+
encodeOutStart[0] = uint8_t(kLiteCut2 + (numBytes - 2));
152+
}
153+
}
154+
155+
encodeArrayOut.SetSize(UInt(encodeOut - encodeArrayOut.begin()));
156+
encodeArrayOut.Compress();
157+
}
158+
159+
/* static */int ByteEncodeUtil::encodeLiteUInt32(uint32_t in, uint8_t out[kMaxLiteEncodeUInt32])
160+
{
161+
// 0-184 1 byte value = B0
162+
// 185 - 248 2 bytes value = 185 + 256 * (B0 - 185) + B1
163+
// 249 - 255 3 - 9 bytes value = (B0 - 249 + 2) little - endian bytes following B0.
164+
165+
if (in < kLiteCut1)
166+
{
167+
out[0] = uint8_t(in);
168+
return 1;
169+
}
170+
else if (in <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1))
171+
{
172+
in -= kLiteCut1;
173+
174+
out[0] = uint8_t(kLiteCut1 + (in >> 8));
175+
out[1] = uint8_t(in);
176+
return 2;
177+
}
178+
else
179+
{
180+
int numBytes = 1;
181+
while (in)
182+
{
183+
out[numBytes++] = uint8_t(in);
184+
in >>= 8;
185+
}
186+
// Finally write the size
187+
out[0] = uint8_t(kLiteCut2 + (numBytes - 2));
188+
return numBytes;
189+
}
190+
}
191+
192+
static const uint32_t s_unalignedUInt32Mask[5] =
193+
{
194+
0x00000000,
195+
0x000000ff,
196+
0x0000ffff,
197+
0x00ffffff,
198+
0xffffffff,
199+
};
200+
201+
/* static */int ByteEncodeUtil::decodeLiteUInt32(const uint8_t* in, uint32_t* out)
202+
{
203+
uint8_t b0 = *in++;
204+
if (b0 < kLiteCut1)
205+
{
206+
*out = uint32_t(b0);
207+
return 1;
208+
}
209+
else if (b0 < kLiteCut2)
210+
{
211+
uint8_t b1 = *in++;
212+
*out = kLiteCut1 + b1 + (uint32_t(b0 - kLiteCut1) << 8);
213+
return 2;
214+
}
215+
else
216+
{
217+
int numBytesRemaining = b0 - kLiteCut2 + 2 - 1;
218+
219+
#if SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS
220+
//const uint32_t mask = s_unalignedUInt32Mask[numBytesRemaining];
221+
const uint32_t mask = ~(uint32_t(0xffffff00) << ((numBytesRemaining - 1) * 8));
222+
const uint32_t value = (*(const uint32_t*)in) & mask;
223+
#else
224+
// This works on all cpus although slower
225+
uint32_t value = in[0];
226+
227+
switch (numBytesRemaining)
228+
{
229+
case 4: value |= uint32_t(in[3]) << 24; /* fall thru */
230+
case 3: value |= uint32_t(in[2]) << 16; /* fall thru */
231+
case 2: value |= uint32_t(in[1]) << 8; /* fall thru */
232+
case 1: break;
233+
}
234+
#endif
235+
*out = value;
236+
return numBytesRemaining + 1;
237+
}
238+
}
239+
240+
/* static */size_t ByteEncodeUtil::decodeLiteUInt32(const uint8_t* encodeIn, size_t numValues, uint32_t* valuesOut)
241+
{
242+
const uint8_t* encodeStart = encodeIn;
243+
244+
for (size_t i = 0; i < numValues; ++i)
245+
{
246+
uint8_t b0 = *encodeIn++;
247+
if (b0 < kLiteCut1)
248+
{
249+
valuesOut[i] = uint32_t(b0);
250+
}
251+
else if (b0 < kLiteCut2)
252+
{
253+
uint8_t b1 = *encodeIn++;
254+
valuesOut[i] = kLiteCut1 + b1 + (uint32_t(b0 - kLiteCut1) << 8);
255+
}
256+
else
257+
{
258+
int numBytesRemaining = b0 - kLiteCut2 + 2 - 1;
259+
260+
#if SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS
261+
const uint32_t mask = s_unalignedUInt32Mask[numBytesRemaining];
262+
//const uint32_t mask = ~(uint32_t(0xffffff00) << ((numBytesRemaining - 1) * 8));
263+
const uint32_t value = (*(const uint32_t*)encodeIn) & mask;
264+
#else
265+
// This works on all cpus although slower
266+
uint32_t value = encodeIn[0];
267+
switch (numBytesRemaining)
268+
{
269+
case 4: value |= uint32_t(encodeIn[3]) << 24; /* fall thru */
270+
case 3: value |= uint32_t(encodeIn[2]) << 16; /* fall thru */
271+
case 2: value |= uint32_t(encodeIn[1]) << 8; /* fall thru */
272+
case 1: break;
273+
}
274+
#endif
275+
valuesOut[i] = value;
276+
encodeIn += numBytesRemaining;
277+
}
278+
}
279+
280+
return size_t(encodeIn - encodeStart);
281+
}
282+
283+
} // namespace Slang

0 commit comments

Comments
 (0)