Fix for KernelContext threading issue for C++ targets (shader-slang#1843)

jsmall-zzz · web-flow · commit 79d106fac18f · 2021-05-14T17:32:52.000-04:00
* #include an absolute path didn't work - because paths were taken to always be relative.

* Fix for issue where threading KernelContext was not working on C++ test when there were multiple invocations.

* Improve test for context threading.
diff --git a/source/slang/slang-ir-explicit-global-context.cpp b/source/slang/slang-ir-explicit-global-context.cpp
@@ -467,7 +467,7 @@ struct IntroduceExplicitGlobalContextPass
         addKernelContextNameHint(contextParam);
         contextParam->insertBefore(firstBlock->getFirstOrdinaryInst());
 
-        // The new parameter can be registerd as the context value
+        // The new parameter can be registered as the context value
         // to be used for `func` right away.
         //
         // Note: we register the value *before* modifying locations
@@ -482,8 +482,12 @@ struct IntroduceExplicitGlobalContextPass
         // TODO: There is an issue here if `func` might be called
         // dynamically, through something like a witness table.
         //
-        List<IRUse*> uses;
-        for( auto use = func->firstUse; use; use = use->nextUse )
+        // We collect all the uses first which are in calls.
+        // NOTE! That we collect all calls and then process (and don't iterate
+        // using the linked list), because when a replacement is made the func usage
+        // linked list will no longer hold all of the use sites.
+        List<IRCall*> callUses;
+        for (auto use = func->firstUse; use; use = use->nextUse)
         {
             // We will only fix up calls to `func`, and ignore
             // other operations that might refer to it.
@@ -495,9 +499,15 @@ struct IntroduceExplicitGlobalContextPass
             // to a higher-order function.
             //
             auto call = as<IRCall>(use->getUser());
-            if(!call)
-                continue;
+            if (call)
+            {
+                callUses.add(call);
+            }
+        }
 
+        // Fix up all of the call uses
+        for( auto call : callUses)
+        {
             // We are going to construct a new call to `func`
             // that has all of the arguments of the original call...
             //
diff --git a/tests/compute/kernel-context-threading.slang b/tests/compute/kernel-context-threading.slang
@@ -0,0 +1,49 @@
+// kernel-context-threading.slang
+
+// This test tests out the slang-ir-explicit-global-context functionality for C++ like targets. 
+// In particular these require that the KernelContext is threaded through functions that access globals. 
+// Currently this is only really applicable to C++, but for completeness all targets are tested.
+
+//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -compile-arg -O3 -xslang -matrix-layout-row-major -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -compile-arg -O3 -xslang -matrix-layout-row-major -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -compile-arg -O3 -xslang -matrix-layout-row-major -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -xslang -matrix-layout-row-major -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -dx12 -xslang -matrix-layout-row-major -shaderobj
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -xslang -matrix-layout-row-major -shaderobj
+
+//TEST_INPUT:cbuffer(data=[1.0 0.0 0.0 0.0  0.0 1.0 0.0 0.0  0.0 0.0 1.0 0.0 10.0 20.0 30.0 1.0]):name matrixBuffer
+ConstantBuffer<float4x4> matrixBuffer;
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0], stride=4):out,name rowOrderMatrixOutput
+RWStructuredBuffer<float> rowOrderMatrixOutput;
+
+void writeRow2(float4 v, int rowIndex)
+{
+    int baseIndex = rowIndex * 4;
+    
+    rowOrderMatrixOutput[baseIndex + 0] = v.x;
+    rowOrderMatrixOutput[baseIndex + 1] = v.y;
+    rowOrderMatrixOutput[baseIndex + 2] = v.z;
+    rowOrderMatrixOutput[baseIndex + 3] = v.w;
+}
+
+// Just to test threading works through multiple levels of functions.
+void writeRow(float4 v, int rowIndex)
+{
+    writeRow2(v, rowIndex);
+}
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 tid : SV_DispatchThreadID)
+{
+    float4 v = float4(1, 2, 3, 1);
+
+    float4x4 M = matrixBuffer;
+    
+    float4 r = mul(v, M);
+    
+    writeRow(M[0], 0);
+    writeRow(M[1], 1);
+    writeRow(M[2], 2);
+    writeRow(M[3], 3);
+}
diff --git a/tests/compute/kernel-context-threading.slang.expected.txt b/tests/compute/kernel-context-threading.slang.expected.txt
@@ -0,0 +1,17 @@
+type: float
+1.000000
+0.000000
+0.000000
+0.000000
+0.000000
+1.000000
+0.000000
+0.000000
+0.000000
+0.000000
+1.000000
+0.000000
+10.000000
+20.000000
+30.000000
+1.000000
diff --git a/tests/current-bugs/cpp-resource-issue.slang b/tests/current-bugs/cpp-resource-issue.slang