Half texture support (shader-slang#1836)

jsmall-zzz · web-flow · commit e510a287deb2 · 2021-05-06T12:45:00.000-04:00
* #include an absolute path didn't work - because paths were taken to always be relative.

* Split out StringEscapeUtil.

* Added StringEscapeUtil.

* Fix typo in unix quoting type.

* Small comment improvements.

* Try to fix linux linking issue.

* Fix typo.

* Attempt to fix linux link issue.

* Update VS proj even though nothing really changed.

* Fix another typo issue.

* Fix for windows issue.
Fixed bug.

* Make separate Utils for escaping.

* Fix typo.

* Split out into StringEscapeHandler.

* Windows shell does handle removing quotes (so remove code to remove them).

* Handle unescaping if not initiating using the shell.

* Slight improvement around shell like decoding.

* Simplify command extraction.

* Add shared-library category type.

* Fix bug in command extraction.

* Typo in transcendental category.

* Enable unit-test on in smoke test category.

* Make parsing failing output as a failing test.

* Fixes for transcendental tests. Disable tests that do not work.

* Changed category parsing.

* Removed the TestResult parameter from _gatherTestsForFile.
Made testsList only output.

* Remove testing if all tests were disabled.

* Make args of CommandLine always unescaped.

* Add category.

* Don't need escaping on unix/linux.

* Remove some no longer used functions.

* Add requireSMVersion to CUDAExtensionTracker.

* half-calc.slang now works for CUDA.

* bit-cast-16-bit works on CUDA.

* WIP handling of CUDA vector&lt;half&gt; types.

* Half swizzle CUDA.

* Half vector test.

* Fix swizzle half bug.

* Fix compilation issue with narrowing to Index.

* Add unary ops.

* Add some vector scalar maths ops.

* Add half vector conversions for CUDA.

* Fix erroneous comment.

* Support for half comparisons.

* First pass test for half compare.

* Fix bug in CUDA specialized emit control.
Updated tests to have pre and post inc/dec.

* Removed unneeded parts of the cuda prelude.

* Half structured buffer works on CUDA.

* Added name lookup for Gfx::Format

* Support half texture type in test system.

* Test for half reading on CUDA.

* Add half formats to Vk and D3D utils.

* Fix getAt for CUDA - where there might not be a .x member in a vector.
diff --git a/slang-gfx.h b/slang-gfx.h
@@ -123,8 +123,30 @@ class IShaderProgram: public ISlangUnknown
         0x9d32d0ad, 0x915c, 0x4ffd, { 0x91, 0xe2, 0x50, 0x85, 0x54, 0xa0, 0x4a, 0x76 } \
     }
 
+// Dont' change without keeping in sync with Format
+#define GFX_FORMAT(x) \
+    x( Unknown, 0) \
+    \
+    x(RGBA_Float32, sizeof(float) * 4) \
+    x(RGB_Float32, sizeof(float) * 3) \
+    x(RG_Float32, sizeof(float) * 2) \
+    x(R_Float32, sizeof(float)) \
+    \
+    x(RGBA_Float16, sizeof(uint16_t) * 4) \
+    x(RG_Float16, sizeof(uint16_t) * 2) \
+    x(R_Float16, sizeof(uint16_t)) \
+    \
+    x(RGBA_Unorm_UInt8, sizeof(uint32_t)) \
+    x(BGRA_Unorm_UInt8, sizeof(uint32_t)) \
+    \
+    x(R_UInt16, sizeof(uint16_t)) \
+    x(R_UInt32, sizeof(uint32_t)) \
+    \
+    x(D_Float32, sizeof(float)) \
+    x(D_Unorm24_S8, sizeof(uint32_t))
+
 /// Different formats of things like pixels or elements of vertices
-/// NOTE! Any change to this type (adding, removing, changing order) - must also be reflected in changes to RendererUtil
+/// NOTE! Any change to this type (adding, removing, changing order) - must also be reflected in changes GFX_FORMAT
 enum class Format
 {
     Unknown,
@@ -134,6 +156,10 @@ enum class Format
     RG_Float32,
     R_Float32,
 
+    RGBA_Float16,
+    RG_Float16,
+    R_Float16,
+
     RGBA_Unorm_UInt8,
     BGRA_Unorm_UInt8,
 
@@ -146,6 +172,12 @@ enum class Format
     CountOf,
 };
 
+struct FormatInfo
+{
+    uint8_t channelCount;       ///< The amount of channels in the format. Only set if the channelType is set 
+    uint8_t channelType;        ///< One of SlangScalarType None if type isn't made up of elements of type.
+};
+
 struct InputElementDesc
 {
     char const* semanticName;
@@ -1342,6 +1374,9 @@ extern "C"
     /// Gets the size in bytes of a Format type. Returns 0 if a size is not defined/invalid
     SLANG_GFX_API size_t SLANG_MCALL gfxGetFormatSize(Format format);
 
+    /// Gets information about the format 
+    SLANG_GFX_API FormatInfo gfxGetFormatInfo(Format format);
+
     /// Given a type returns a function that can construct it, or nullptr if there isn't one
     SLANG_GFX_API SlangResult SLANG_MCALL
         gfxCreateDevice(const IDevice::Desc* desc, IDevice** outDevice);
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
@@ -1028,10 +1028,15 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName,
             writer->emit("SLANG_PRELUDE_ASSERT(b >= 0 && b < ");
             writer->emit(vecSize);
             writer->emit(");\n");
+
+            writer->emit("return ((");
+            emitType(specOp->returnType);
+            writer->emit("*)");
+
             if (lValue)
-                writer->emit("return (&a->x) + b;\n");
+                writer->emit("a) + b;\n");
             else
-                writer->emit("return (&a.x)[b];\n");
+                writer->emit("&a)[b];\n");
         }
         else if (auto matrixType = as<IRMatrixType>(srcType))
         {
diff --git a/tests/compute/half-texture-simple.slang b/tests/compute/half-texture-simple.slang
@@ -0,0 +1,53 @@
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute  -shaderobj
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12  -shaderobj
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj
+// TODO(JS): Doesn't work on vk currently, because createTextureView not implemented on vk renderer
+//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute  -shaderobj
+
+// Doesn't work on CUDA, not clear why yet
+//DISABLE_TEST_INPUT: Texture1D(format=R_Float16, size=4, content = one, mipMaps=1):name tLoad1D
+//Texture1D<float> tLoad1D;
+
+//TEST_INPUT: Texture1D(format=R_Float16, size=4, content = one):name t1D
+Texture1D<float> t1D;
+//TEST_INPUT: Texture2D(format=R_Float16, size=4, content = one):name t2D
+Texture2D<float> t2D;
+//TEST_INPUT: Texture3D(format=R_Float16, size=4, content = one):name t3D
+Texture3D<float> t3D;
+//TEST_INPUT: TextureCube(format=R_Float16, size=4, content = one):name tCube
+TextureCube<float> tCube;
+
+//TEST_INPUT: Texture1D(format=R_Float16, size=4, content = one, arrayLength=2):name t1DArray
+Texture1DArray<float> t1DArray;
+//TEST_INPUT: Texture2D(format=R_Float16, size=4, content = one, arrayLength=2):name t2DArray
+Texture2DArray<float> t2DArray;
+//TEST_INPUT: TextureCube(format=R_Float16, size=4, content = one, arrayLength=2):name tCubeArray
+TextureCubeArray<float> tCubeArray;
+
+//TEST_INPUT: Sampler:name samplerState
+SamplerState samplerState;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    int idx = dispatchThreadID.x;
+    float u = idx * (1.0f / 4);
+    
+    float val = 0.0f;
+   
+    val += t1D.SampleLevel(samplerState, u, 0); 
+    val += t2D.SampleLevel(samplerState, float2(u, u), 0);
+    val += t3D.SampleLevel(samplerState, float3(u, u, u), 0);
+    val += tCube.SampleLevel(samplerState, normalize(float3(u, 1 - u, u)), 0);
+ 
+    val += t1DArray.SampleLevel(samplerState, float2(u, 0), 0);
+    val += t2DArray.SampleLevel(samplerState, float3(u, u, 0), 0);
+    val += tCubeArray.SampleLevel(samplerState, float4(u, u, u, 0), 0);
+ 
+    outputBuffer[idx] = val;
+}
diff --git a/tests/compute/half-texture-simple.slang.expected.txt b/tests/compute/half-texture-simple.slang.expected.txt
@@ -0,0 +1,4 @@
+40E00000
+40E00000
+40E00000
+40E00000
diff --git a/tools/gfx/cpu/render-cpu.cpp b/tools/gfx/cpu/render-cpu.cpp
@@ -89,6 +89,18 @@ void _unpackFloatTexel(void const* texelData, void* outData, size_t outSize)
     memcpy(outData, temp, outSize);
 }
 
+template<int N>
+void _unpackFloat16Texel(void const* texelData, void* outData, size_t outSize)
+{
+    auto input = (int16_t const*)texelData;
+
+    float temp[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
+    for (int i = 0; i < N; ++i)
+        temp[i] = HalfToFloat(input[i]);
+
+    memcpy(outData, temp, outSize);
+}
+
 static inline float _unpackUnorm8Value(uint8_t value)
 {
     return value / 255.0f;
@@ -143,42 +155,45 @@ void _unpackUInt32Texel(void const* texelData, void* outData, size_t outSize)
     memcpy(outData, temp, outSize);
 }
 
-#define TEXTURE_FORMAT_INFO(FORMAT) static const CPUTextureFormatInfo kCPUTextureFormatInfo_##FORMAT
+struct CPUFormatInfoMap
+{
+    CPUFormatInfoMap()
+    {
+        memset(m_infos, 0, sizeof(m_infos));
 
-TEXTURE_FORMAT_INFO(RGBA_Float32)      = { &_unpackFloatTexel<4> };
-TEXTURE_FORMAT_INFO(RGB_Float32)       = { &_unpackFloatTexel<3> };
-TEXTURE_FORMAT_INFO(RG_Float32)        = { &_unpackFloatTexel<2> };
-TEXTURE_FORMAT_INFO(R_Float32)         = { &_unpackFloatTexel<1> };
-TEXTURE_FORMAT_INFO(RGBA_Unorm_UInt8)  = { &_unpackUnorm8Texel<4> };
-TEXTURE_FORMAT_INFO(BGRA_Unorm_UInt8)  = { &_unpackUnormBGRA8Texel };
-TEXTURE_FORMAT_INFO(R_UInt16)          = { &_unpackUInt16Texel<1> };
-TEXTURE_FORMAT_INFO(R_UInt32)          = { &_unpackUInt32Texel<1> };
-TEXTURE_FORMAT_INFO(D_Float32)         = { &_unpackFloatTexel<1> };
+        set(Format::RGBA_Float32, &_unpackFloatTexel<4>);
+        set(Format::RGB_Float32, &_unpackFloatTexel<3>);
 
-#undef TEXTURE_FORMAT_INFO
+        set(Format::RG_Float32, &_unpackFloatTexel<2>);
+        set(Format::R_Float32, &_unpackFloatTexel<1>);
 
-static CPUTextureFormatInfo const* _getFormatInfo(Format format)
-{
-    switch(format)
+        set(Format::RGBA_Float16, &_unpackFloat16Texel<4>);
+        set(Format::RG_Float16, &_unpackFloat16Texel<2>);
+        set(Format::R_Float16, &_unpackFloat16Texel<1>);
+
+        set(Format::RGBA_Unorm_UInt8, &_unpackUnorm8Texel<4>);
+        set(Format::BGRA_Unorm_UInt8, &_unpackUnormBGRA8Texel);
+        set(Format::R_UInt16, &_unpackUInt16Texel<1>);
+        set(Format::R_UInt32, &_unpackUInt32Texel<1>);
+        set(Format::D_Float32, &_unpackFloatTexel<1>);
+    }
+
+    void set(Format format, CPUTextureUnpackFunc func)
     {
-    case Format::D_Unorm24_S8:
-    default:
-        return nullptr;
+        auto& info = m_infos[Index(format)];
+        info.unpackFunc = func;
+    }
+    SLANG_FORCE_INLINE const CPUTextureFormatInfo& get(Format format) const { return m_infos[Index(format)]; }
 
+    CPUTextureFormatInfo m_infos[Index(Format::CountOf)];
+};
 
-#define CASE(FORMAT) case Format::FORMAT: return &kCPUTextureFormatInfo_##FORMAT;
-    CASE(RGBA_Float32)
-    CASE(RGB_Float32)
-    CASE(RG_Float32)
-    CASE(R_Float32)
-    CASE(RGBA_Unorm_UInt8)
-    CASE(BGRA_Unorm_UInt8)
-    CASE(R_UInt16)
-    CASE(R_UInt32)
-    CASE(D_Float32)
+static const CPUFormatInfoMap g_formatInfoMap;
 
-#undef CASE
-    }
+static CPUTextureFormatInfo const* _getFormatInfo(Format format)
+{
+    const CPUTextureFormatInfo& info = g_formatInfoMap.get(format);
+    return info.unpackFunc ? &info : nullptr;
 }
 
 class CPUTextureResource : public TextureResource
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
@@ -1432,14 +1432,28 @@ class CUDADevice : public RendererBase
 
             switch (desc.format)
             {
+            case Format::RGBA_Float32:
+            case Format::RGB_Float32:
+            case Format::RG_Float32:
             case Format::R_Float32:
             case Format::D_Float32:
                 {
+                    const FormatInfo info = gfxGetFormatInfo(desc.format);
                     format = CU_AD_FORMAT_FLOAT;
-                    numChannels = 1;
+                    numChannels = info.channelCount;
                     elementSize = sizeof(float);
                     break;
                 }
+            case Format::RGBA_Float16:
+            case Format::RG_Float16:
+            case Format::R_Float16:
+                {
+                    const FormatInfo info = gfxGetFormatInfo(desc.format);
+                    format = CU_AD_FORMAT_HALF;
+                    numChannels = info.channelCount;
+                    elementSize = sizeof(uint16_t);
+                    break;
+                }
             case Format::RGBA_Unorm_UInt8:
                 {
                     format = CU_AD_FORMAT_UNSIGNED_INT8;
diff --git a/tools/gfx/d3d/d3d-util.cpp b/tools/gfx/d3d/d3d-util.cpp
@@ -115,6 +115,10 @@ D3D12_DEPTH_STENCILOP_DESC D3DUtil::translateStencilOpDesc(DepthStencilOpDesc de
         case Format::RGBA_Unorm_UInt8:      return DXGI_FORMAT_R8G8B8A8_UNORM;
         case Format::BGRA_Unorm_UInt8:      return DXGI_FORMAT_B8G8R8A8_UNORM;
 
+        case Format::RGBA_Float16:          return DXGI_FORMAT_R16G16B16A16_FLOAT;
+        case Format::RG_Float16:            return DXGI_FORMAT_R16G16_FLOAT;
+        case Format::R_Float16:             return DXGI_FORMAT_R16_FLOAT;
+
         case Format::R_UInt16:              return DXGI_FORMAT_R16_UINT;
         case Format::R_UInt32:              return DXGI_FORMAT_R32_UINT;
 
diff --git a/tools/gfx/render.cpp b/tools/gfx/render.cpp
@@ -19,24 +19,70 @@ static bool debugLayerEnabled = false;
 
 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Global Renderer Functions !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
 
-static const uint8_t s_formatSize[] = {
-    0, // Unknown,
+#define GFX_FORMAT_SIZE(name, size) uint8_t(size),
 
-    uint8_t(sizeof(float) * 4), // RGBA_Float32,
-    uint8_t(sizeof(float) * 3), // RGB_Float32,
-    uint8_t(sizeof(float) * 2), // RG_Float32,
-    uint8_t(sizeof(float) * 1), // R_Float32,
+static const uint8_t s_formatSize[] =
+{
+    GFX_FORMAT(GFX_FORMAT_SIZE)
+};
+
+static bool _checkFormat()
+{
+    Index value = 0;
+    Index count = 0;
+
+    // Check the values are in the same order
+#define GFX_FORMAT_CHECK(name, size) count += Index(Index(Format::name) == value++);
+    GFX_FORMAT(GFX_FORMAT_CHECK)
+
+    const bool r = (count == Index(Format::CountOf));
+    SLANG_ASSERT(r);
+    return r;
+}
+
+// We don't make static because we will get a warning that it's unused
+static const bool _checkFormatResult = _checkFormat();
+
+struct FormatInfoMap
+{
+    FormatInfoMap()
+    {
+        // Set all to nothing initially
+        for (auto& info : m_infos)
+        {
+            info.channelCount = 0;
+            info.channelType = SLANG_SCALAR_TYPE_NONE;
+        }
+
+        set(Format::RGBA_Float16, SLANG_SCALAR_TYPE_FLOAT16, 4);
+        set(Format::RG_Float16, SLANG_SCALAR_TYPE_FLOAT16, 2);
+        set(Format::R_Float16, SLANG_SCALAR_TYPE_FLOAT16, 1);
+
+        set(Format::RGBA_Float32, SLANG_SCALAR_TYPE_FLOAT32, 4);
+        set(Format::RGB_Float32, SLANG_SCALAR_TYPE_FLOAT32, 3);
+        set(Format::RG_Float32, SLANG_SCALAR_TYPE_FLOAT32, 2);
+        set(Format::R_Float32, SLANG_SCALAR_TYPE_FLOAT32, 1);
+
+        set(Format::R_UInt16, SLANG_SCALAR_TYPE_UINT16, 1);
+        set(Format::R_UInt32, SLANG_SCALAR_TYPE_UINT32, 1);
+
+        set(Format::D_Float32, SLANG_SCALAR_TYPE_FLOAT32, 1);
+    }
 
-    uint8_t(sizeof(uint32_t)), // RGBA_Unorm_UInt8,
-    uint8_t(sizeof(uint32_t)), // BGRA_Unorm_UInt8,
+    void set(Format format, SlangScalarType type, Index channelCount)
+    {
+        FormatInfo& info = m_infos[Index(format)];
+        info.channelCount = uint8_t(channelCount);
+        info.channelType = uint8_t(type);
+    }
 
-    uint8_t(sizeof(uint16_t)), // R_UInt16,
-    uint8_t(sizeof(uint32_t)), // R_UInt32,
+    const FormatInfo& get(Format format) const { return m_infos[Index(format)]; }
 
-    uint8_t(sizeof(float)), // D_Float32,
-    uint8_t(sizeof(uint32_t)), // D_Unorm24_S8,
+    FormatInfo m_infos[Index(Format::CountOf)];
 };
 
+static const FormatInfoMap s_formatInfoMap;
+
 static void _compileTimeAsserts()
 {
     SLANG_COMPILE_TIME_ASSERT(SLANG_COUNT_OF(s_formatSize) == int(Format::CountOf));
@@ -49,6 +95,11 @@ extern "C"
         return s_formatSize[int(format)];
     }
 
+    SLANG_GFX_API FormatInfo gfxGetFormatInfo(Format format)
+    {
+        return s_formatInfoMap.get(format);
+    }
+
     SlangResult _createDevice(const IDevice::Desc* desc, IDevice** outDevice)
     {
         switch (desc->deviceType)
diff --git a/tools/gfx/vulkan/vk-util.cpp b/tools/gfx/vulkan/vk-util.cpp
@@ -14,6 +14,11 @@ namespace gfx {
         case Format::RGB_Float32:       return VK_FORMAT_R32G32B32_SFLOAT;
         case Format::RG_Float32:        return VK_FORMAT_R32G32_SFLOAT;
         case Format::R_Float32:         return VK_FORMAT_R32_SFLOAT;
+
+        case Format::RGBA_Float16:      return VK_FORMAT_R16G16B16A16_SFLOAT;
+        case Format::RG_Float16:        return VK_FORMAT_R16G16_SFLOAT;
+        case Format::R_Float16:         return VK_FORMAT_R16_SFLOAT;
+
         case Format::RGBA_Unorm_UInt8:  return VK_FORMAT_R8G8B8A8_UNORM;
         case Format::BGRA_Unorm_UInt8:  return VK_FORMAT_B8G8R8A8_UNORM;
         case Format::R_UInt32:          return VK_FORMAT_R32_UINT;
diff --git a/tools/render-test/shader-input-layout.cpp b/tools/render-test/shader-input-layout.cpp
diff --git a/tools/render-test/shader-input-layout.h b/tools/render-test/shader-input-layout.h
diff --git a/tools/render-test/shader-renderer-util.cpp b/tools/render-test/shader-renderer-util.cpp