Skip to content

Commit 5672ad0

Browse files
committed
More optimizations.
1 parent 137daba commit 5672ad0

6 files changed

+124
-71
lines changed

source/slang/slang-emit.cpp

+48-10
Original file line numberDiff line numberDiff line change
@@ -220,12 +220,17 @@ struct RequiredLoweringPassSet
220220
bool combinedTextureSamplers;
221221
bool reinterpret;
222222
bool generics;
223+
bool bindExistential;
223224
bool autodiff;
224225
bool derivativePyBindWrapper;
225226
bool bitcast;
226227
bool existentialTypeLayout;
227228
bool bindingQuery;
228229
bool meshOutput;
230+
bool higherOrderFunc;
231+
bool glslGlobalVar;
232+
bool glslSSBO;
233+
bool byteAddressBuffer;
229234
};
230235

231236
void calcRequiredLoweringPassSet(RequiredLoweringPassSet& result, CodeGenContext* codeGenContext, IRInst* inst)
@@ -303,6 +308,27 @@ void calcRequiredLoweringPassSet(RequiredLoweringPassSet& result, CodeGenContext
303308
case kIROp_AutoPyBindCudaDecoration:
304309
result.derivativePyBindWrapper = true;
305310
break;
311+
case kIROp_Param:
312+
if (as<IRFuncType>(inst->getDataType()))
313+
result.higherOrderFunc = true;
314+
break;
315+
case kIROp_GlobalInputDecoration:
316+
case kIROp_GlobalOutputDecoration:
317+
case kIROp_GetWorkGroupSize:
318+
result.glslGlobalVar = true;
319+
break;
320+
case kIROp_BindExistentialSlotsDecoration:
321+
result.bindExistential = true;
322+
result.generics = true;
323+
result.existentialTypeLayout = true;
324+
break;
325+
case kIROp_GLSLShaderStorageBufferType:
326+
result.glslSSBO = true;
327+
break;
328+
case kIROp_ByteAddressBufferLoad:
329+
case kIROp_ByteAddressBufferStore:
330+
result.byteAddressBuffer = true;
331+
break;
306332
}
307333
for (auto child : inst->getDecorationsAndChildren())
308334
{
@@ -348,10 +374,14 @@ Result linkAndOptimizeIR(
348374
// un-specialized IR.
349375
dumpIRIfEnabled(codeGenContext, irModule, "POST IR VALIDATION");
350376

351-
if(!isKhronosTarget(targetRequest))
377+
// Scan the IR module and determine which lowering/legalization passes are needed.
378+
RequiredLoweringPassSet requiredLoweringPassSet = {};
379+
calcRequiredLoweringPassSet(requiredLoweringPassSet, codeGenContext, irModule->getModuleInst());
380+
381+
if(!isKhronosTarget(targetRequest) && requiredLoweringPassSet.glslSSBO)
352382
lowerGLSLShaderStorageBufferObjectsToStructuredBuffers(irModule, sink);
353383

354-
if (!targetProgram->getOptionSet().shouldPerformMinimumOptimizations())
384+
if (requiredLoweringPassSet.glslGlobalVar)
355385
translateGLSLGlobalVar(codeGenContext, irModule);
356386

357387
// Replace any global constants with their values.
@@ -370,7 +400,8 @@ Result linkAndOptimizeIR(
370400
// shader parameters for those slots, to be wired up to
371401
// use sites.
372402
//
373-
bindExistentialSlots(irModule, sink);
403+
if (requiredLoweringPassSet.bindExistential)
404+
bindExistentialSlots(irModule, sink);
374405
#if 0
375406
dumpIRIfEnabled(codeGenContext, irModule, "EXISTENTIALS BOUND");
376407
#endif
@@ -450,9 +481,6 @@ Result linkAndOptimizeIR(
450481
break;
451482
}
452483

453-
RequiredLoweringPassSet requiredLoweringPassSet = {};
454-
calcRequiredLoweringPassSet(requiredLoweringPassSet, codeGenContext, irModule->getModuleInst());
455-
456484
if (requiredLoweringPassSet.optionalType)
457485
lowerOptionalType(irModule, sink);
458486

@@ -540,7 +568,10 @@ Result linkAndOptimizeIR(
540568
return SLANG_FAIL;
541569
dumpIRIfEnabled(codeGenContext, irModule, "AFTER-SPECIALIZE");
542570

543-
applySparseConditionalConstantPropagation(irModule, codeGenContext->getSink());
571+
if (changed)
572+
{
573+
applySparseConditionalConstantPropagation(irModule, codeGenContext->getSink());
574+
}
544575
eliminateDeadCode(irModule, deadCodeEliminationOptions);
545576

546577
validateIRModuleIfEnabled(codeGenContext, irModule);
@@ -564,7 +595,7 @@ Result linkAndOptimizeIR(
564595
// which do.
565596
// Specialize away these parameters
566597
// TODO: We should implement a proper defunctionalization pass
567-
if (!targetProgram->getOptionSet().shouldPerformMinimumOptimizations())
598+
if (requiredLoweringPassSet.higherOrderFunc)
568599
changed |= specializeHigherOrderParameters(codeGenContext, irModule);
569600

570601
dumpIRIfEnabled(codeGenContext, irModule, "BEFORE-AUTODIFF");
@@ -673,7 +704,11 @@ Result linkAndOptimizeIR(
673704
// up downstream passes like type legalization, so we
674705
// will run a DCE pass to clean up after the specialization.
675706
//
676-
if (!fastIRSimplificationOptions.minimalOptimization)
707+
if (fastIRSimplificationOptions.minimalOptimization)
708+
{
709+
eliminateDeadCode(irModule, deadCodeEliminationOptions);
710+
}
711+
else
677712
{
678713
simplifyIR(targetProgram, irModule, defaultIRSimplificationOptions, sink);
679714
}
@@ -788,7 +823,9 @@ Result linkAndOptimizeIR(
788823
// to see if we can clean up any temporaries created by legalization.
789824
// (e.g., things that used to be aggregated might now be split up,
790825
// so that we can work with the individual fields).
791-
if (!fastIRSimplificationOptions.minimalOptimization)
826+
if (fastIRSimplificationOptions.minimalOptimization)
827+
eliminateDeadCode(irModule, deadCodeEliminationOptions);
828+
else
792829
simplifyIR(targetProgram, irModule, fastIRSimplificationOptions, sink);
793830

794831
#if 0
@@ -849,6 +886,7 @@ Result linkAndOptimizeIR(
849886
// of aggregate types from/to byte-address buffers into
850887
// stores of individual scalar or vector values.
851888
//
889+
if (requiredLoweringPassSet.byteAddressBuffer)
852890
{
853891
ByteAddressBufferLegalizationOptions byteAddressBufferOptions;
854892

source/slang/slang-ir-insts.h

+7
Original file line numberDiff line numberDiff line change
@@ -3681,6 +3681,13 @@ struct IRBuilder
36813681
{
36823682
return emitCallInst(type, func, args.getCount(), args.getBuffer());
36833683
}
3684+
IRCall* emitCallInst(
3685+
IRType* type,
3686+
IRInst* func,
3687+
ArrayView<IRInst*> args)
3688+
{
3689+
return emitCallInst(type, func, args.getCount(), args.getBuffer());
3690+
}
36843691

36853692
IRInst* emitTryCallInst(
36863693
IRType* type,

source/slang/slang-ir-legalize-types.cpp

+9-9
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ struct LegalCallBuilder
212212
IRCall* m_call = nullptr;
213213

214214
/// The legalized arguments for the call
215-
List<IRInst*> m_args;
215+
ShortList<IRInst*> m_args;
216216

217217
/// Add a logical argument to the call (which may map to zero or mmore actual arguments)
218218
void addArg(
@@ -463,7 +463,7 @@ struct LegalCallBuilder
463463
resultType,
464464
m_call->getCallee(),
465465
m_args.getCount(),
466-
m_args.getBuffer());
466+
m_args.getArrayView().getBuffer());
467467
}
468468
};
469469

@@ -706,7 +706,7 @@ static LegalVal legalizeRetVal(
706706
return LegalVal();
707707
}
708708

709-
static void _addVal(List<IRInst*>& rs, const LegalVal& legalVal)
709+
static void _addVal(ShortList<IRInst*>& rs, const LegalVal& legalVal)
710710
{
711711
switch (legalVal.flavor)
712712
{
@@ -733,7 +733,7 @@ static LegalVal legalizeUnconditionalBranch(
733733
ArrayView<LegalVal> args,
734734
IRUnconditionalBranch* branchInst)
735735
{
736-
List<IRInst*> newArgs;
736+
ShortList<IRInst*> newArgs;
737737
for (auto arg : args)
738738
{
739739
switch (arg.flavor)
@@ -757,7 +757,7 @@ static LegalVal legalizeUnconditionalBranch(
757757
SLANG_UNIMPLEMENTED_X("Unknown legalized val flavor.");
758758
}
759759
}
760-
context->builder->emitIntrinsicInst(nullptr, branchInst->getOp(), newArgs.getCount(), newArgs.getBuffer());
760+
context->builder->emitIntrinsicInst(nullptr, branchInst->getOp(), newArgs.getCount(), newArgs.getArrayView().getBuffer());
761761
return LegalVal();
762762
}
763763

@@ -861,7 +861,7 @@ static LegalVal legalizeDebugVar(IRTypeLegalizationContext* context, LegalType t
861861
static LegalVal legalizeDebugValue(IRTypeLegalizationContext* context, LegalVal debugVar, LegalVal debugValue, IRDebugValue* originalInst)
862862
{
863863
// For now we just discard any special part and keep the ordinary part.
864-
List<IRInst*> accessChain;
864+
ShortList<IRInst*> accessChain;
865865
for (UInt i = 0; i < originalInst->getAccessChainCount(); i++)
866866
{
867867
accessChain.add(originalInst->getAccessChain(i));
@@ -873,7 +873,7 @@ static LegalVal legalizeDebugValue(IRTypeLegalizationContext* context, LegalVal
873873
context->builder->emitDebugValue(
874874
debugVar.getSimple(),
875875
debugValue.getSimple(),
876-
accessChain.getArrayView()));
876+
accessChain.getArrayView().arrayView));
877877
case LegalType::Flavor::none:
878878
return LegalVal();
879879
case LegalType::Flavor::pair:
@@ -2205,7 +2205,7 @@ static LegalVal legalizeInst(
22052205
// value of each, and collect them in an array for subsequent use.
22062206
//
22072207
auto argCount = inst->getOperandCount();
2208-
List<LegalVal> legalArgs;
2208+
ShortList<LegalVal> legalArgs;
22092209
//
22102210
// Along the way we will also note whether there were any operands
22112211
// with non-simple legalized values.
@@ -2277,7 +2277,7 @@ static LegalVal legalizeInst(
22772277
context,
22782278
inst,
22792279
legalType,
2280-
legalArgs.getArrayView());
2280+
legalArgs.getArrayView().arrayView);
22812281

22822282
if (legalVal.flavor == LegalVal::Flavor::simple)
22832283
{

source/slang/slang-ir-specialize-resources.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -1189,8 +1189,11 @@ bool specializeResourceUsage(
11891189
// and turned into SSA temporaries. Such optimization may enable
11901190
// the following passes to "see" and specialize more cases.
11911191
//
1192-
simplifyIR(codeGenContext->getTargetProgram(), irModule,
1193-
IRSimplificationOptions::getFast(codeGenContext->getTargetProgram()));
1192+
if (changed)
1193+
{
1194+
simplifyIR(codeGenContext->getTargetProgram(), irModule,
1195+
IRSimplificationOptions::getFast(codeGenContext->getTargetProgram()));
1196+
}
11941197
result |= changed;
11951198
}
11961199
if (unspecializableFuncs.getCount() == 0)

0 commit comments

Comments
 (0)