Skip to content

Commit

Permalink
Merge branch 'cherry-pick-14709d7b' into 'master'
Browse files Browse the repository at this point in the history
Refactor MPI execution branching selection and allow unused branches to be optimized away

See merge request exastencils/exastencils!153
  • Loading branch information
RichardAngersbach committed Dec 20, 2024
2 parents 76bf47a + 26f6f1a commit 75066fe
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 15 deletions.
3 changes: 2 additions & 1 deletion Compiler/src/exastencils/app/ir/IR_LayerHandler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ object IR_DefaultLayerHandler extends IR_LayerHandler {
CUDA_AdaptKernelDimensionality,
CUDA_HandleFragmentLoops,
CUDA_HandleReductions,
CUDA_ReplaceStdFunctionCallsWrapper))
CUDA_ReplaceStdFunctionCallsWrapper,
CUDA_SetExecutionBranching))

scheduler.register(IR_LayoutTansformation)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,6 @@ object CUDA_AnnotateLoop extends DefaultStrategy("Calculate the annotations for
}
}, false)

this += new Transformation("Set final condition for host/device selection", {
case c : IR_IfCondition if c.hasAnnotation(CUDA_Util.CUDA_BRANCH_CONDITION) =>
c.condition = c.removeAnnotation(CUDA_Util.CUDA_BRANCH_CONDITION).get.asInstanceOf[NoDuplicateWrapper[IR_Expression]].value
c
}, false)

/// CUDA_GatherLoopIteratorUsage
object CUDA_GatherLoopIteratorUsage extends QuietDefaultStrategy("Gather surrounding loop iterator accesses") {
var loopIterators : Set[String] = Set[String]()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,38 @@ import exastencils.base.ir._
import exastencils.base.ir.IR_ImplicitConversion._
import exastencils.config.Knowledge
import exastencils.config.Platform
import exastencils.datastructures.DefaultStrategy
import exastencils.datastructures.Transformation
import exastencils.util.NoDuplicateWrapper

// compile switch for cpu/gpu exec
trait CUDA_ExecutionBranching {
def getHostDeviceBranchingMPI(hostStmts : ListBuffer[IR_Statement], deviceStmts : ListBuffer[IR_Statement]) : ListBuffer[IR_Statement] = {
val defaultChoice : IR_Expression = Knowledge.cuda_preferredExecution match {
case _ if !Platform.hw_gpu_gpuDirectAvailable => 1 // if GPUDirect is not available default to CPU
case "Host" => 1 // CPU by default
case "Device" => 0 // GPU by default
case "Performance" => 1 // FIXME: Knowledge flag

private def getDefaultChoiceMPI() : IR_Expression = {
Knowledge.cuda_preferredExecution match {
case _ if !Platform.hw_gpu_gpuDirectAvailable => true // if GPUDirect is not available default to CPU
case "Host" => true // CPU by default
case "Device" => false // GPU by default
case "Performance" => true // FIXME: Knowledge flag
case "Condition" => Knowledge.cuda_executionCondition
}
}

def getHostDeviceBranchingMPICondWrapper(condWrapper : NoDuplicateWrapper[IR_Expression],
hostStmts : ListBuffer[IR_Statement], deviceStmts : ListBuffer[IR_Statement]) : ListBuffer[IR_Statement] = {

// get execution choice
condWrapper.value = getDefaultChoiceMPI()

// set dummy first to prevent IR_GeneralSimplify from removing the branch statement until the condition is final
val branch = IR_IfCondition(IR_VariableAccess("replaceIn_CUDA_SetExecutionBranching", IR_BooleanDatatype), hostStmts, deviceStmts)
branch.annotate(CUDA_Util.CUDA_BRANCH_CONDITION, condWrapper)
ListBuffer[IR_Statement](branch)
}

def getHostDeviceBranchingMPI(hostStmts : ListBuffer[IR_Statement], deviceStmts : ListBuffer[IR_Statement]) : ListBuffer[IR_Statement] = {
// get execution choice
val defaultChoice = getDefaultChoiceMPI()

ListBuffer[IR_Statement](IR_IfCondition(defaultChoice, hostStmts, deviceStmts))
}
Expand Down Expand Up @@ -45,8 +65,16 @@ trait CUDA_ExecutionBranching {
condWrapper.value = getDefaultChoice(estimatedFasterHostExec)

// set dummy first to prevent IR_GeneralSimplify from removing the branch statement until the condition is final
val branch = IR_IfCondition(IR_VariableAccess("replaceIn_CUDA_AnnotateLoops", IR_BooleanDatatype), hostStmts, deviceStmts)
val branch = IR_IfCondition(IR_VariableAccess("replaceIn_CUDA_SetExecutionBranching", IR_BooleanDatatype), hostStmts, deviceStmts)
branch.annotate(CUDA_Util.CUDA_BRANCH_CONDITION, condWrapper)
ListBuffer[IR_Statement](branch)
}
}

object CUDA_SetExecutionBranching extends DefaultStrategy("Set final condition for host/device selection") {
this += new Transformation("..", {
case c : IR_IfCondition if c.hasAnnotation(CUDA_Util.CUDA_BRANCH_CONDITION) =>
c.condition = c.removeAnnotation(CUDA_Util.CUDA_BRANCH_CONDITION).get.asInstanceOf[NoDuplicateWrapper[IR_Expression]].value
c
}, false)
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import exastencils.logger.Logger
import exastencils.parallelization.api.mpi._
import exastencils.parallelization.ir.IR_HasParallelizationInfo
import exastencils.timing.ir.IR_TimerFunctions
import exastencils.util.NoDuplicateWrapper
import exastencils.util.ir._

/// CUDA_PrepareMPICode
Expand Down Expand Up @@ -251,7 +252,8 @@ object CUDA_PrepareMPICode extends DefaultStrategy("Prepare CUDA relevant code b
deviceStmts ++= afterDevice

/// compile final switch
getHostDeviceBranchingMPI(hostStmts, deviceStmts)
val condWrapper = NoDuplicateWrapper[IR_Expression](null)
getHostDeviceBranchingMPICondWrapper(condWrapper, hostStmts, deviceStmts)
}
}, false)
}

0 comments on commit 75066fe

Please sign in to comment.