48
48
#include " slang-ir-lower-bit-cast.h"
49
49
#include " slang-ir-lower-combined-texture-sampler.h"
50
50
#include " slang-ir-lower-l-value-cast.h"
51
- #include " slang-ir-lower-size-of.h"
52
51
#include " slang-ir-lower-reinterpret.h"
53
52
#include " slang-ir-loop-unroll.h"
54
53
#include " slang-ir-legalize-vector-types.h"
@@ -214,6 +213,99 @@ struct LinkingAndOptimizationOptions
214
213
CLikeSourceEmitter* sourceEmitter = nullptr ;
215
214
};
216
215
216
+ struct RequiredLoweringPassSet
217
+ {
218
+ bool resultType;
219
+ bool optionalType;
220
+ bool combinedTextureSamplers;
221
+ bool reinterpret;
222
+ bool generics;
223
+ bool autodiff;
224
+ bool derivativePyBindWrapper;
225
+ bool bitcast;
226
+ bool existentialTypeLayout;
227
+ bool bindingQuery;
228
+ bool meshOutput;
229
+ };
230
+
231
+ void calcRequiredLoweringPassSet (RequiredLoweringPassSet& result, CodeGenContext* codeGenContext, IRInst* inst)
232
+ {
233
+ switch (inst->getOp ())
234
+ {
235
+ case kIROp_ResultType :
236
+ result.resultType = true ;
237
+ break ;
238
+ case kIROp_OptionalType :
239
+ result.optionalType = true ;
240
+ break ;
241
+ case kIROp_TextureType :
242
+ if (!isKhronosTarget (codeGenContext->getTargetReq ()))
243
+ {
244
+ if (auto texType = as<IRTextureType>(inst))
245
+ {
246
+ auto isCombined = texType->getIsCombinedInst ();
247
+ if (auto isCombinedVal = as<IRIntLit>(isCombined))
248
+ {
249
+ if (isCombinedVal->getValue () != 0 )
250
+ {
251
+ result.combinedTextureSamplers = true ;
252
+ }
253
+ }
254
+ else
255
+ {
256
+ result.combinedTextureSamplers = true ;
257
+ }
258
+ }
259
+ }
260
+ break ;
261
+ case kIROp_PseudoPtrType :
262
+ result.existentialTypeLayout = true ;
263
+ break ;
264
+ case kIROp_GetRegisterIndex :
265
+ case kIROp_GetRegisterSpace :
266
+ result.bindingQuery = true ;
267
+ break ;
268
+ case kIROp_BackwardDifferentiate :
269
+ case kIROp_ForwardDifferentiate :
270
+ result.autodiff = true ;
271
+ break ;
272
+ case kIROp_VerticesType :
273
+ case kIROp_IndicesType :
274
+ case kIROp_PrimitivesType :
275
+ result.meshOutput = true ;
276
+ break ;
277
+ case kIROp_CreateExistentialObject :
278
+ case kIROp_MakeExistential :
279
+ case kIROp_ExtractExistentialType :
280
+ case kIROp_ExtractExistentialValue :
281
+ case kIROp_ExtractExistentialWitnessTable :
282
+ case kIROp_WrapExistential :
283
+ case kIROp_LookupWitness :
284
+ result.generics = true ;
285
+ break ;
286
+ case kIROp_Specialize :
287
+ {
288
+ auto specInst = as<IRSpecialize>(inst);
289
+ if (!findAnyTargetIntrinsicDecoration (getResolvedInstForDecorations (specInst)))
290
+ result.generics = true ;
291
+ }
292
+ break ;
293
+ case kIROp_Reinterpret :
294
+ result.reinterpret = true ;
295
+ break ;
296
+ case kIROp_BitCast :
297
+ result.bitcast = true ;
298
+ break ;
299
+ case kIROp_AutoPyBindCudaDecoration :
300
+ result.derivativePyBindWrapper = true ;
301
+ break ;
302
+ }
303
+ for (auto child : inst->getDecorationsAndChildren ())
304
+ {
305
+ calcRequiredLoweringPassSet (result, codeGenContext, child);
306
+ }
307
+ }
308
+
217
309
Result linkAndOptimizeIR (
218
310
CodeGenContext* codeGenContext,
219
311
LinkingAndOptimizationOptions const & options,
@@ -354,7 +446,11 @@ Result linkAndOptimizeIR(
354
446
break ;
355
447
}
356
448
357
- lowerOptionalType (irModule, sink);
449
+ RequiredLoweringPassSet requiredLoweringPassSet = {};
450
+ calcRequiredLoweringPassSet (requiredLoweringPassSet, codeGenContext, irModule->getModuleInst ());
451
+
452
+ if (requiredLoweringPassSet.optionalType )
453
+ lowerOptionalType (irModule, sink);
358
454
359
455
switch (target)
360
456
{
@@ -370,7 +466,8 @@ Result linkAndOptimizeIR(
370
466
}
371
467
372
468
// Lower `Result<T,E>` types into ordinary struct types.
373
- lowerResultType (irModule, sink);
469
+ if (requiredLoweringPassSet.resultType )
470
+ lowerResultType (irModule, sink);
374
471
375
472
#if 0
376
473
dumpIRIfEnabled(codeGenContext, irModule, "UNIONS DESUGARED");
@@ -403,7 +500,8 @@ Result linkAndOptimizeIR(
403
500
fuseCallsToSaturatedCooperation (irModule);
404
501
405
502
// Generate any requested derivative wrappers
406
- generateDerivativeWrappers (irModule, sink);
503
+ if (requiredLoweringPassSet.derivativePyBindWrapper )
504
+ generateDerivativeWrappers (irModule, sink);
407
505
408
506
// Next, we need to ensure that the code we emit for
409
507
// the target doesn't contain any operations that would
@@ -448,8 +546,11 @@ Result linkAndOptimizeIR(
448
546
// Unroll loops.
449
547
if (codeGenContext->getSink ()->getErrorCount () == 0 )
450
548
{
451
- if (!unrollLoopsInModule (targetProgram, irModule, codeGenContext->getSink ()))
452
- return SLANG_FAIL;
549
+ if (!fastIRSimplificationOptions.minimalOptimization )
550
+ {
551
+ if (!unrollLoopsInModule (targetProgram, irModule, codeGenContext->getSink ()))
552
+ return SLANG_FAIL;
553
+ }
453
554
}
454
555
455
556
// Few of our targets support higher order functions, and
@@ -462,15 +563,19 @@ Result linkAndOptimizeIR(
462
563
463
564
dumpIRIfEnabled (codeGenContext, irModule, " BEFORE-AUTODIFF" );
464
565
enableIRValidationAtInsert ();
465
- changed |= processAutodiffCalls (targetProgram, irModule, sink);
566
+ if (requiredLoweringPassSet.autodiff )
567
+ {
568
+ changed |= processAutodiffCalls (targetProgram, irModule, sink);
569
+ }
466
570
disableIRValidationAtInsert ();
467
571
dumpIRIfEnabled (codeGenContext, irModule, " AFTER-AUTODIFF" );
468
572
469
573
if (!changed)
470
574
break ;
471
575
}
472
576
473
- finalizeAutoDiffPass (targetProgram, irModule);
577
+ if (requiredLoweringPassSet.autodiff )
578
+ finalizeAutoDiffPass (targetProgram, irModule);
474
579
475
580
finalizeSpecialization (irModule);
476
581
@@ -507,7 +612,9 @@ Result linkAndOptimizeIR(
507
612
SLANG_RETURN_ON_FAIL (performTypeInlining (irModule, sink));
508
613
}
509
614
510
- lowerReinterpret (targetProgram, irModule, sink);
615
+ if (requiredLoweringPassSet.reinterpret )
616
+ lowerReinterpret (targetProgram, irModule, sink);
617
+
511
618
if (sink->getErrorCount () != 0 )
512
619
return SLANG_FAIL;
513
620
@@ -517,20 +624,33 @@ Result linkAndOptimizeIR(
517
624
// but are not used for dynamic dispatch, unpin them so we don't
518
625
// do unnecessary work to lower them.
519
626
unpinWitnessTables (irModule);
520
-
521
- simplifyIR (targetProgram, irModule, fastIRSimplificationOptions, sink);
627
+
628
+ if (fastIRSimplificationOptions.minimalOptimization )
629
+ {
630
+ eliminateDeadCode (irModule);
631
+ }
632
+ else
633
+ {
634
+ simplifyIR (targetProgram, irModule, fastIRSimplificationOptions, sink);
635
+ }
522
636
523
637
if (!ArtifactDescUtil::isCpuLikeTarget (artifactDesc))
524
638
{
525
639
// We could fail because (perhaps, somehow) end up with getStringHash that the operand is not a string literal
526
640
SLANG_RETURN_ON_FAIL (checkGetStringHashInsts (irModule, sink));
527
641
}
528
642
643
+ requiredLoweringPassSet = {};
644
+ calcRequiredLoweringPassSet (requiredLoweringPassSet, codeGenContext, irModule->getModuleInst ());
645
+
529
646
// For targets that supports dynamic dispatch, we need to lower the
530
647
// generics / interface types to ordinary functions and types using
531
648
// function pointers.
532
649
dumpIRIfEnabled (codeGenContext, irModule, " BEFORE-LOWER-GENERICS" );
533
- lowerGenerics (targetProgram, irModule, sink);
650
+ if (requiredLoweringPassSet.generics )
651
+ lowerGenerics (targetProgram, irModule, sink);
652
+ else
653
+ cleanupGenerics (targetProgram, irModule, sink);
534
654
dumpIRIfEnabled (codeGenContext, irModule, " AFTER-LOWER-GENERICS" );
535
655
536
656
if (sink->getErrorCount () != 0 )
@@ -547,7 +667,10 @@ Result linkAndOptimizeIR(
547
667
// up downstream passes like type legalization, so we
548
668
// will run a DCE pass to clean up after the specialization.
549
669
//
550
- simplifyIR (targetProgram, irModule, defaultIRSimplificationOptions, sink);
670
+ if (!fastIRSimplificationOptions.minimalOptimization )
671
+ {
672
+ simplifyIR (targetProgram, irModule, defaultIRSimplificationOptions, sink);
673
+ }
551
674
552
675
validateIRModuleIfEnabled (codeGenContext, irModule);
553
676
@@ -569,11 +692,15 @@ Result linkAndOptimizeIR(
569
692
case CodeGenTarget::Metal:
570
693
case CodeGenTarget::MetalLib:
571
694
case CodeGenTarget::MetalLibAssembly:
572
- lowerCombinedTextureSamplers (irModule, sink);
695
+ if (requiredLoweringPassSet.combinedTextureSamplers )
696
+ lowerCombinedTextureSamplers (irModule, sink);
573
697
break ;
574
698
}
575
699
576
- addUserTypeHintDecorations (irModule);
700
+ if (codeGenContext->getTargetProgram ()->getOptionSet ().getBoolOption (CompilerOptionName::VulkanEmitReflection))
701
+ {
702
+ addUserTypeHintDecorations (irModule);
703
+ }
577
704
578
705
// We don't need the legalize pass for C/C++ based types
579
706
if (options.shouldLegalizeExistentialAndResourceTypes )
@@ -603,10 +730,13 @@ Result linkAndOptimizeIR(
603
730
// we need to replace it with just an `X`, after which we
604
731
// will have (more) legal shader code.
605
732
//
606
- legalizeExistentialTypeLayout (
607
- targetProgram,
608
- irModule,
609
- sink);
733
+ if (requiredLoweringPassSet.existentialTypeLayout )
734
+ {
735
+ legalizeExistentialTypeLayout (
736
+ targetProgram,
737
+ irModule,
738
+ sink);
739
+ }
610
740
611
741
#if 0
612
742
dumpIRIfEnabled(codeGenContext, irModule, "EXISTENTIALS LEGALIZED");
@@ -652,7 +782,8 @@ Result linkAndOptimizeIR(
652
782
// to see if we can clean up any temporaries created by legalization.
653
783
// (e.g., things that used to be aggregated might now be split up,
654
784
// so that we can work with the individual fields).
655
- simplifyIR (targetProgram, irModule, fastIRSimplificationOptions, sink);
785
+ if (!fastIRSimplificationOptions.minimalOptimization )
786
+ simplifyIR (targetProgram, irModule, fastIRSimplificationOptions, sink);
656
787
657
788
#if 0
658
789
dumpIRIfEnabled(codeGenContext, irModule, "AFTER SSA");
@@ -678,7 +809,6 @@ Result linkAndOptimizeIR(
678
809
{
679
810
specializeArrayParameters (codeGenContext, irModule);
680
811
}
681
- eliminateDeadCode (irModule);
682
812
683
813
#if 0
684
814
dumpIRIfEnabled(codeGenContext, irModule, "AFTER RESOURCE SPECIALIZATION");
@@ -965,14 +1095,16 @@ Result linkAndOptimizeIR(
965
1095
966
1096
// Lower the `getRegisterIndex` and `getRegisterSpace` intrinsics.
967
1097
//
968
- lowerBindingQueries (irModule, sink);
1098
+ if (requiredLoweringPassSet.bindingQuery )
1099
+ lowerBindingQueries (irModule, sink);
969
1100
970
1101
// For some small improvement in type safety we represent these as opaque
971
1102
// structs instead of regular arrays.
972
1103
//
973
1104
// If any have survived this far, change them back to regular (decorated)
974
1105
// arrays that the emitters can deal with.
975
- legalizeMeshOutputTypes (irModule);
1106
+ if (requiredLoweringPassSet.meshOutput )
1107
+ legalizeMeshOutputTypes (irModule);
976
1108
977
1109
if (options.shouldLegalizeExistentialAndResourceTypes )
978
1110
{
@@ -997,13 +1129,10 @@ Result linkAndOptimizeIR(
997
1129
rcpWOfPositionInput (irModule);
998
1130
}
999
1131
1000
- // Lower sizeof/alignof
1001
-
1002
- lowerSizeOfLike (targetProgram, irModule, sink);
1003
-
1004
1132
// Lower all bit_cast operations on complex types into leaf-level
1005
1133
// bit_cast on basic types.
1006
- lowerBitCast (targetProgram, irModule, sink);
1134
+ if (requiredLoweringPassSet.bitcast )
1135
+ lowerBitCast (targetProgram, irModule, sink);
1007
1136
1008
1137
bool emitSpirvDirectly = targetProgram->shouldEmitSPIRVDirectly ();
1009
1138
@@ -1076,7 +1205,7 @@ Result linkAndOptimizeIR(
1076
1205
// For now we are avoiding that problem by simply *not* emitting live-range
1077
1206
// information when we fix variable scoping later on.
1078
1207
1079
- // Depending on the target, certain things that were represented as
1208
+ // Depending on the target, certain things that were represented ass
1080
1209
// single IR instructions will need to be emitted with the help of
1081
1210
// function declaratons in output high-level code.
1082
1211
//
0 commit comments