From e0499cb0e617a5dc2918902359a283e21d964838 Mon Sep 17 00:00:00 2001
From: Anders Leino <aleino@nvidia.com>
Date: Tue, 4 Mar 2025 10:40:20 +0200
Subject: [PATCH 1/3] Add cuda codegen bug repro

This just compiles tests/compute/simlpe.slang for PTX with the new compilation API, in
order to reproduce a code generation bug.
---
 .../unit-test-find-check-entrypoint.cpp       | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/tools/slang-unit-test/unit-test-find-check-entrypoint.cpp b/tools/slang-unit-test/unit-test-find-check-entrypoint.cpp
index 8ecab9671c..75da9aaf0f 100644
--- a/tools/slang-unit-test/unit-test-find-check-entrypoint.cpp
+++ b/tools/slang-unit-test/unit-test-find-check-entrypoint.cpp
@@ -71,3 +71,68 @@ SLANG_UNIT_TEST(findAndCheckEntryPoint)
     SLANG_CHECK(code != nullptr);
     SLANG_CHECK(code->getBufferSize() != 0);
 }
+
+// This test reproduces issue #6507, where it was noticed that compilation of
+// tests/compute/simple.slang for PTX target generates invalid code.
+// TODO: Remove this when issue #4760 is resolved, because at that point
+// tests/compute/simple.slang should cover the same issue.
+SLANG_UNIT_TEST(cudaCodeGenBug)
+{
+    // Source for a module that contains an undecorated entrypoint.
+    const char* userSourceBody = R"(
+        RWStructuredBuffer<float> outputBuffer;
+
+        [numthreads(4, 1, 1)]
+        void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+        {
+            outputBuffer[dispatchThreadID.x] = float(dispatchThreadID.x);
+        }
+        )";
+
+    auto moduleName = "moduleG" + String(Process::getId());
+    String userSource = "import " + moduleName + ";\n" + userSourceBody;
+    ComPtr<slang::IGlobalSession> globalSession;
+    SLANG_CHECK(slang_createGlobalSession(SLANG_API_VERSION, globalSession.writeRef()) == SLANG_OK);
+    slang::TargetDesc targetDesc = {};
+    targetDesc.format = SLANG_PTX;
+    slang::SessionDesc sessionDesc = {};
+    sessionDesc.targetCount = 1;
+    sessionDesc.targets = &targetDesc;
+    ComPtr<slang::ISession> session;
+    SLANG_CHECK(globalSession->createSession(sessionDesc, session.writeRef()) == SLANG_OK);
+
+    ComPtr<slang::IBlob> diagnosticBlob;
+    auto module = session->loadModuleFromSourceString(
+        "m",
+        "m.slang",
+        userSourceBody,
+        diagnosticBlob.writeRef());
+    SLANG_CHECK(module != nullptr);
+
+    ComPtr<slang::IEntryPoint> entryPoint;
+    module->findAndCheckEntryPoint(
+        "computeMain",
+        SLANG_STAGE_COMPUTE,
+        entryPoint.writeRef(),
+        diagnosticBlob.writeRef());
+    SLANG_CHECK(entryPoint != nullptr);
+
+    ComPtr<slang::IComponentType> compositeProgram;
+    slang::IComponentType* components[] = {module, entryPoint.get()};
+    session->createCompositeComponentType(
+        components,
+        2,
+        compositeProgram.writeRef(),
+        diagnosticBlob.writeRef());
+    SLANG_CHECK(compositeProgram != nullptr);
+
+    ComPtr<slang::IComponentType> linkedProgram;
+    compositeProgram->link(linkedProgram.writeRef(), diagnosticBlob.writeRef());
+    SLANG_CHECK(linkedProgram != nullptr);
+
+    ComPtr<slang::IBlob> code;
+    auto res = linkedProgram->getEntryPointCode(0, 0, code.writeRef(), diagnosticBlob.writeRef());
+    SLANG_CHECK(res == SLANG_OK);
+    SLANG_CHECK(code != nullptr);
+    SLANG_CHECK(code->getBufferSize() != 0);
+}

From a370965a6fdbd695736c88c56642feb9130cde43 Mon Sep 17 00:00:00 2001
From: Anders Leino <aleino@nvidia.com>
Date: Tue, 4 Mar 2025 10:33:59 +0200
Subject: [PATCH 2/3] Detect entrypoint more robustly when applying ConstRef
 hack during lowring

For shaders like tests/compute/simple.slang, which have a 'numthreads' attribute but no
'shader' attribute, the old compile request API would add an EntryPointAttribute to the
AST node of the entry point. However, the new API doesn't, and so a certain ConstRef hack
doesn't get applied when using the new API, leading to subsequent code generation issues.

This patch also checks for a 'numthreads' attribute when deciding whether to apply the
ConstRef hack.

This closes issue #6507 and helps to resolve issue #4760.
---
 source/slang/slang-lower-to-ir.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index e5ca77634c..4d692b7276 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -3214,7 +3214,8 @@ void collectParameterLists(
         // For now we will rely on a follow up pass to remove unnecessary temporary variables if
         // we can determine that they are never actually writtten to by the user.
         //
-        bool lowerVaryingInputAsConstRef = declRef.getDecl()->hasModifier<EntryPointAttribute>();
+        bool lowerVaryingInputAsConstRef = declRef.getDecl()->hasModifier<EntryPointAttribute>() ||
+                                           declRef.getDecl()->hasModifier<NumThreadsAttribute>();
 
         // Don't collect parameters from the outer scope if
         // we are in a `static` context.

From fb67a820c502217d87ea3a7bd081b63c767a8a8f Mon Sep 17 00:00:00 2001
From: Anders Leino <aleino@nvidia.com>
Date: Tue, 4 Mar 2025 11:31:52 +0200
Subject: [PATCH 3/3] Add expected failure list for GitHub runners

Our GitHub runners don't have the CUDA toolkits installed, so they can't run all tests.
---
 .github/workflows/ci.yml                 | 6 ++++--
 tests/expected-failure-github-runner.txt | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 tests/expected-failure-github-runner.txt

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 66d33bc087..27e255372f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -176,14 +176,16 @@ jobs:
               -category ${{ matrix.test-category }} \
               -api all-dx12 \
               -expected-failure-list tests/expected-failure-github.txt \
-              -expected-failure-list tests/expected-failure-record-replay-tests.txt
+              -expected-failure-list tests/expected-failure-record-replay-tests.txt \
+              -expected-failure-list tests/expected-failure-github-runner.txt
           else
             "$bin_dir/slang-test" \
               -use-test-server \
               -category ${{ matrix.test-category }} \
               -api all-dx12 \
               -expected-failure-list tests/expected-failure-github.txt \
-              -expected-failure-list tests/expected-failure-record-replay-tests.txt
+              -expected-failure-list tests/expected-failure-record-replay-tests.txt \
+              -expected-failure-list tests/expected-failure-github-runner.txt
           fi
       - name: Run Slang examples
         if: steps.filter.outputs.should-run == 'true' && matrix.platform != 'wasm' && matrix.full-gpu-tests
diff --git a/tests/expected-failure-github-runner.txt b/tests/expected-failure-github-runner.txt
new file mode 100644
index 0000000000..1da3a96695
--- /dev/null
+++ b/tests/expected-failure-github-runner.txt
@@ -0,0 +1 @@
+slang-unit-test-tool/cudaCodeGenBug.internal