Merge remote-tracking branch 'official/master' into perf

csyonghe · csyonghe · commit 112bca6781a8 · 2025-02-23T00:20:47.000-08:00
diff --git a/source/slang/diff.meta.slang b/source/slang/diff.meta.slang
@@ -2074,7 +2074,7 @@ DifferentialPair<T> __d_max(DifferentialPair<T> dpx, DifferentialPair<T> dpy)
 {
     return DifferentialPair<T>(
         max(dpx.p, dpy.p),
-        dpx.p > dpy.p ? dpx.d : dpy.d
+        dpx.p > dpy.p ? dpx.d : (dpx.p < dpy.p ? dpy.d : __mul_p_d(T(0.5), T.dadd(dpx.d, dpy.d)))
     );
 }
 
@@ -2084,8 +2084,8 @@ __generic<T : __BuiltinFloatingPointType>
 [BackwardDerivativeOf(max)]
 void __d_max(inout DifferentialPair<T> dpx, inout DifferentialPair<T> dpy, T.Differential dOut)
 {
-    dpx = diffPair(dpx.p, dpx.p > dpy.p ? dOut : T.dzero());
-    dpy = diffPair(dpy.p, dpy.p > dpx.p ? dOut : T.dzero());
+    dpx = diffPair(dpx.p, dpx.p > dpy.p ? dOut : (dpx.p < dpy.p ? T.dzero() : __mul_p_d(T(0.5), dOut)));
+    dpy = diffPair(dpy.p, dpy.p > dpx.p ? dOut : (dpy.p < dpx.p ? T.dzero() : __mul_p_d(T(0.5), dOut)));
 }
 
 VECTOR_MATRIX_BINARY_DIFF_IMPL(max)
@@ -2099,7 +2099,7 @@ DifferentialPair<T> __d_min(DifferentialPair<T> dpx, DifferentialPair<T> dpy)
 {
     return DifferentialPair<T>(
         min(dpx.p, dpy.p),
-        dpx.p < dpy.p ? dpx.d : dpy.d
+        dpx.p < dpy.p ? dpx.d : (dpx.p > dpy.p ? dpy.d : __mul_p_d(T(0.5), T.dadd(dpx.d, dpy.d)))
     );
 }
 
@@ -2109,8 +2109,8 @@ __generic<T : __BuiltinFloatingPointType>
 [BackwardDerivativeOf(min)]
 void __d_min(inout DifferentialPair<T> dpx, inout DifferentialPair<T> dpy, T.Differential dOut)
 {
-    dpx = diffPair(dpx.p, dpx.p < dpy.p ? dOut : T.dzero());
-    dpy = diffPair(dpy.p, dpy.p < dpx.p ? dOut : T.dzero());
+    dpx = diffPair(dpx.p, dpx.p < dpy.p ? dOut : (dpx.p > dpy.p ? T.dzero() : __mul_p_d(T(0.5), dOut)));
+    dpy = diffPair(dpy.p, dpy.p < dpx.p ? dOut : (dpy.p > dpx.p ? T.dzero() : __mul_p_d(T(0.5), dOut)));
 }
 
 VECTOR_MATRIX_BINARY_DIFF_IMPL(min)
diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp
@@ -136,8 +136,15 @@ void MetalSourceEmitter::_emitHLSLTextureType(IRTextureTypeBase* texType)
     switch (texType->getAccess())
     {
     case SLANG_RESOURCE_ACCESS_READ:
-        m_writer->emit("access::sample");
-        break;
+        {
+            // Metal does not support access::sample for texture buffers, so we need to emit
+            // access::read instead.
+            if (texType->GetBaseShape() == SLANG_TEXTURE_BUFFER)
+                m_writer->emit("access::read");
+            else
+                m_writer->emit("access::sample");
+            break;
+        }
 
     case SLANG_RESOURCE_ACCESS_WRITE:
         m_writer->emit("access::write");
diff --git a/tests/autodiff-dstdlib/dstdlib-max-min.slang b/tests/autodiff-dstdlib/dstdlib-max-min.slang
@@ -0,0 +1,112 @@
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+typedef DifferentialPair<float> dpfloat;
+typedef DifferentialPair<float2> dpfloat2;
+
+[BackwardDifferentiable]
+float diffMax(float x, float y)
+{
+    return max(x, y);
+}
+
+[BackwardDifferentiable]
+float2 diffMax(float2 x, float2 y)
+{
+    return max(x, y);
+}
+
+[BackwardDifferentiable]
+float diffMin(float x, float y)
+{
+    return min(x, y);
+}
+
+[BackwardDifferentiable]
+float2 diffMin(float2 x, float2 y)
+{
+    return min(x, y);
+}
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID: SV_DispatchThreadID)
+{
+    // Test max() with x < y
+    {
+        dpfloat dpx = dpfloat(2.0, 1.0);
+        dpfloat dpy = dpfloat(5.0, -2.0);
+        dpfloat res = __fwd_diff(diffMax)(dpx, dpy);
+        outputBuffer[0] = res.p;        // Expect: 5.000000
+        outputBuffer[1] = res.d;        // Expect: -2.000000
+    }
+
+    // Test max() with x == y
+    {
+        dpfloat dpx = dpfloat(3.0, 1.0);
+        dpfloat dpy = dpfloat(3.0, -2.0);
+        dpfloat res = __fwd_diff(diffMax)(dpx, dpy);
+        outputBuffer[2] = res.p;        // Expect: 3.000000
+        outputBuffer[3] = res.d;        // Expect: -0.500000 (average of 1.0 and -2.0)
+    }
+
+    // Test min() with x > y
+    {
+        dpfloat dpx = dpfloat(5.0, 1.0);
+        dpfloat dpy = dpfloat(2.0, -2.0);
+        dpfloat res = __fwd_diff(diffMin)(dpx, dpy);
+        outputBuffer[4] = res.p;        // Expect: 2.000000
+        outputBuffer[5] = res.d;        // Expect: -2.000000
+    }
+
+    // Test min() with x == y
+    {
+        dpfloat dpx = dpfloat(3.0, 1.0);
+        dpfloat dpy = dpfloat(3.0, -2.0);
+        dpfloat res = __fwd_diff(diffMin)(dpx, dpy);
+        outputBuffer[6] = res.p;        // Expect: 3.000000
+        outputBuffer[7] = res.d;        // Expect: -0.500000 (average of 1.0 and -2.0)
+    }
+
+    // Test backward-mode max() with x == y
+    {
+        dpfloat dpx = dpfloat(3.0, 0.0);
+        dpfloat dpy = dpfloat(3.0, 0.0);
+        __bwd_diff(diffMax)(dpx, dpy, 1.0);
+        outputBuffer[8] = dpx.d;        // Expect: 0.500000 (half of gradient)
+        outputBuffer[9] = dpy.d;        // Expect: 0.500000 (half of gradient)
+    }
+
+    // Test backward-mode min() with x == y
+    {
+        dpfloat dpx = dpfloat(3.0, 0.0);
+        dpfloat dpy = dpfloat(3.0, 0.0);
+        __bwd_diff(diffMin)(dpx, dpy, 1.0);
+        outputBuffer[10] = dpx.d;       // Expect: 0.500000 (half of gradient)
+        outputBuffer[11] = dpy.d;       // Expect: 0.500000 (half of gradient)
+    }
+
+    // Test vector max() with x == y
+    {
+        dpfloat2 dpx = dpfloat2(float2(3.0, 4.0), float2(1.0, 2.0));
+        dpfloat2 dpy = dpfloat2(float2(3.0, 2.0), float2(-2.0, -3.0));
+        dpfloat2 res = __fwd_diff(diffMax)(dpx, dpy);
+        outputBuffer[12] = res.p[0];    // Expect: 3.000000
+        outputBuffer[13] = res.d[0];    // Expect: -0.500000 (average of 1.0 and -2.0)
+        outputBuffer[14] = res.p[1];    // Expect: 4.000000
+        outputBuffer[15] = res.d[1];    // Expect: 2.000000
+    }
+
+    // Test vector min() with x == y
+    {
+        dpfloat2 dpx = dpfloat2(float2(3.0, 4.0), float2(1.0, 2.0));
+        dpfloat2 dpy = dpfloat2(float2(3.0, 2.0), float2(-2.0, -3.0));
+        dpfloat2 res = __fwd_diff(diffMin)(dpx, dpy);
+        outputBuffer[16] = res.p[0];    // Expect: 3.000000
+        outputBuffer[17] = res.d[0];    // Expect: -0.500000 (average of 1.0 and -2.0)
+        outputBuffer[18] = res.p[1];    // Expect: 2.000000
+        outputBuffer[19] = res.d[1];    // Expect: -3.000000
+    }
+}
diff --git a/tests/autodiff-dstdlib/dstdlib-max-min.slang.expected.txt b/tests/autodiff-dstdlib/dstdlib-max-min.slang.expected.txt
@@ -0,0 +1,21 @@
+type: float
+5.000000
+-2.000000
+3.000000
+-0.500000
+2.000000
+-2.000000
+3.000000
+-0.500000
+0.500000
+0.500000
+0.500000
+0.500000
+3.000000
+-0.500000
+4.000000
+2.000000
+3.000000
+-0.500000
+2.000000
+-3.000000
diff --git a/tests/autodiff-dstdlib/dstdlib-max.slang b/tests/autodiff-dstdlib/dstdlib-max.slang
diff --git a/tests/autodiff-dstdlib/dstdlib-max.slang.expected.txt b/tests/autodiff-dstdlib/dstdlib-max.slang.expected.txt
diff --git a/tests/metal/test_buffer.slang b/tests/metal/test_buffer.slang
@@ -0,0 +1,17 @@
+// Test that Buffer<T> maps to texture_buffer<uint, access::read> in Metal
+
+//TEST:SIMPLE(filecheck=METAL): -stage compute -entry computeMain -target metal
+
+
+// METAL: texture_buffer<uint, access::read> inputBuffer_{{.*}}
+Buffer<uint> inputBuffer;
+
+RWStructuredBuffer<uint> outputBuffer;
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dtid : SV_DispatchThreadID)
+{
+    uint idx = dtid.x;
+    // Load values from the buffer to verify correct access
+    outputBuffer[idx] = inputBuffer.Load(idx);
+}
diff --git a/tools/gfx/metal/metal-device.cpp b/tools/gfx/metal/metal-device.cpp
@@ -653,8 +653,8 @@ Result DeviceImpl::createTextureView(
     MTL::PixelFormat pixelFormat = desc.format == Format::Unknown
                                        ? textureImpl->m_pixelFormat
                                        : MetalUtil::translatePixelFormat(desc.format);
-    NS::Range levelRange(sr.baseArrayLayer, sr.layerCount);
-    NS::Range sliceRange(sr.mipLevel, sr.mipLevelCount);
+    NS::Range sliceRange(sr.baseArrayLayer, sr.layerCount);
+    NS::Range levelRange(sr.mipLevel, sr.mipLevelCount);
 
     viewImpl->m_textureView = NS::TransferPtr(textureImpl->m_texture->newTextureView(
         pixelFormat,
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
@@ -131,6 +131,7 @@ class RenderTestApp
     ComPtr<IBuffer> m_vertexBuffer;
     ComPtr<IShaderProgram> m_shaderProgram;
     ComPtr<IPipeline> m_pipeline;
+    ComPtr<IShaderTable> m_shaderTable;
     ComPtr<ITexture> m_depthBuffer;
     ComPtr<ITextureView> m_depthBufferView;
     ComPtr<ITexture> m_colorBuffer;
@@ -648,6 +649,7 @@ SlangResult RenderTestApp::initialize(
                 m_pipeline = device->createRenderPipeline(desc);
             }
             break;
+
         case Options::ShaderProgramType::GraphicsMeshCompute:
         case Options::ShaderProgramType::GraphicsTaskMeshCompute:
             {
@@ -660,6 +662,33 @@ SlangResult RenderTestApp::initialize(
                 desc.depthStencil.format = Format::D32_FLOAT;
                 m_pipeline = device->createRenderPipeline(desc);
             }
+            break;
+
+        case Options::ShaderProgramType::RayTracing:
+            {
+                RayTracingPipelineDesc desc;
+                desc.program = m_shaderProgram;
+
+                m_pipeline = device->createRayTracingPipeline(desc);
+
+                const char* raygenNames[] = {"raygenMain"};
+
+                // We don't define a miss shader for this test. OptiX allows
+                // passing nullptr to indicate no miss shader, but something in
+                // slang-rhi assumes that the miss shader always has a name. To
+                // work around that, use a dummy name.
+                const char* missNames[] = {"missNull"};
+
+                ShaderTableDesc shaderTableDesc = {};
+                shaderTableDesc.program = m_shaderProgram;
+                shaderTableDesc.rayGenShaderCount = 1;
+                shaderTableDesc.rayGenShaderEntryPointNames = raygenNames;
+                shaderTableDesc.missShaderCount = 1;
+                shaderTableDesc.missShaderEntryPointNames = missNames;
+                SLANG_RETURN_ON_FAIL(
+                    device->createShaderTable(shaderTableDesc, m_shaderTable.writeRef()));
+            }
+            break;
         }
     }
     // If success must have a pipeline state
@@ -972,6 +1001,25 @@ Result RenderTestApp::update()
             m_options.computeDispatchSize[2]);
         passEncoder->end();
     }
+    else if (m_options.shaderType == Options::ShaderProgramType::RayTracing)
+    {
+        auto rootObject = m_device->createRootShaderObject(m_pipeline);
+        applyBinding(rootObject);
+        rootObject->finalize();
+
+        auto passEncoder = encoder->beginRayTracingPass();
+        RayTracingState state;
+        state.pipeline = static_cast<IRayTracingPipeline*>(m_pipeline.get());
+        state.rootObject = rootObject;
+        state.shaderTable = m_shaderTable;
+        passEncoder->setRayTracingState(state);
+        passEncoder->dispatchRays(
+            0,
+            m_options.computeDispatchSize[0],
+            m_options.computeDispatchSize[1],
+            m_options.computeDispatchSize[2]);
+        passEncoder->end();
+    }
     else
     {
         auto rootObject = m_device->createRootShaderObject(m_pipeline);
@@ -1072,7 +1120,8 @@ Result RenderTestApp::update()
             if (m_options.shaderType == Options::ShaderProgramType::Compute ||
                 m_options.shaderType == Options::ShaderProgramType::GraphicsCompute ||
                 m_options.shaderType == Options::ShaderProgramType::GraphicsMeshCompute ||
-                m_options.shaderType == Options::ShaderProgramType::GraphicsTaskMeshCompute)
+                m_options.shaderType == Options::ShaderProgramType::GraphicsTaskMeshCompute ||
+                m_options.shaderType == Options::ShaderProgramType::RayTracing)
             {
                 SLANG_RETURN_ON_FAIL(writeBindingOutput(m_options.outputPath));
             }

Original file line number	Diff line number	Diff line change
`@@ -2074,7 +2074,7 @@ DifferentialPair<T> __d_max(DifferentialPair<T> dpx, DifferentialPair<T> dpy)`
`2074`	`2074`	`{`
`2075`	`2075`	`return DifferentialPair<T>(`
`2076`	`2076`	`max(dpx.p, dpy.p),`
`2077`		`- dpx.p > dpy.p ? dpx.d : dpy.d`
	`2077`	`+ dpx.p > dpy.p ? dpx.d : (dpx.p < dpy.p ? dpy.d : __mul_p_d(T(0.5), T.dadd(dpx.d, dpy.d)))`
`2078`	`2078`	`);`
`2079`	`2079`	`}`
`2080`	`2080`
`@@ -2084,8 +2084,8 @@ __generic<T : __BuiltinFloatingPointType>`
`2084`	`2084`	`[BackwardDerivativeOf(max)]`
`2085`	`2085`	`void __d_max(inout DifferentialPair<T> dpx, inout DifferentialPair<T> dpy, T.Differential dOut)`
`2086`	`2086`	`{`
`2087`		`- dpx = diffPair(dpx.p, dpx.p > dpy.p ? dOut : T.dzero());`
`2088`		`- dpy = diffPair(dpy.p, dpy.p > dpx.p ? dOut : T.dzero());`
	`2087`	`+ dpx = diffPair(dpx.p, dpx.p > dpy.p ? dOut : (dpx.p < dpy.p ? T.dzero() : __mul_p_d(T(0.5), dOut)));`
	`2088`	`+ dpy = diffPair(dpy.p, dpy.p > dpx.p ? dOut : (dpy.p < dpx.p ? T.dzero() : __mul_p_d(T(0.5), dOut)));`
`2089`	`2089`	`}`
`2090`	`2090`
`2091`	`2091`	`VECTOR_MATRIX_BINARY_DIFF_IMPL(max)`
`@@ -2099,7 +2099,7 @@ DifferentialPair<T> __d_min(DifferentialPair<T> dpx, DifferentialPair<T> dpy)`
`2099`	`2099`	`{`
`2100`	`2100`	`return DifferentialPair<T>(`
`2101`	`2101`	`min(dpx.p, dpy.p),`
`2102`		`- dpx.p < dpy.p ? dpx.d : dpy.d`
	`2102`	`+ dpx.p < dpy.p ? dpx.d : (dpx.p > dpy.p ? dpy.d : __mul_p_d(T(0.5), T.dadd(dpx.d, dpy.d)))`
`2103`	`2103`	`);`
`2104`	`2104`	`}`
`2105`	`2105`
`@@ -2109,8 +2109,8 @@ __generic<T : __BuiltinFloatingPointType>`
`2109`	`2109`	`[BackwardDerivativeOf(min)]`
`2110`	`2110`	`void __d_min(inout DifferentialPair<T> dpx, inout DifferentialPair<T> dpy, T.Differential dOut)`
`2111`	`2111`	`{`
`2112`		`- dpx = diffPair(dpx.p, dpx.p < dpy.p ? dOut : T.dzero());`
`2113`		`- dpy = diffPair(dpy.p, dpy.p < dpx.p ? dOut : T.dzero());`
	`2112`	`+ dpx = diffPair(dpx.p, dpx.p < dpy.p ? dOut : (dpx.p > dpy.p ? T.dzero() : __mul_p_d(T(0.5), dOut)));`
	`2113`	`+ dpy = diffPair(dpy.p, dpy.p < dpx.p ? dOut : (dpy.p > dpx.p ? T.dzero() : __mul_p_d(T(0.5), dOut)));`
`2114`	`2114`	`}`
`2115`	`2115`
`2116`	`2116`	`VECTOR_MATRIX_BINARY_DIFF_IMPL(min)`