Skip to content

Commit 64c77e9

Browse files
committed
Be more consistent and conservative with GPU syncing on D3D12
* In some cases we were only syncing one queue when we needed to flush and sync the whole GPU. Rename functions to be more clear about what is being synced, and only sync one queue/our internal queue when we know that's the only work we need to wait on.
1 parent 74a6e28 commit 64c77e9

11 files changed

+97
-91
lines changed

renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se
474474
ToStr(GetResourceManager()->GetOriginalID(m_PrevQueueId)).c_str(),
475475
ToStr(GetResourceManager()->GetOriginalID(GetResID(pQueue))).c_str());
476476
if(m_PrevQueueId != ResourceId())
477-
m_pDevice->GPUSync(GetResourceManager()->GetCurrentAs<ID3D12CommandQueue>(m_PrevQueueId));
477+
m_pDevice->DeviceWaitForIdle();
478478

479479
m_PrevQueueId = GetResID(pQueue);
480480
}
@@ -493,7 +493,7 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se
493493
ID3D12CommandList *list = Unwrap(ppCommandLists[i]);
494494
real->ExecuteCommandLists(1, &list);
495495
if(D3D12_Debug_SingleSubmitFlushing() || D3D12_Debug_RT_Auditing())
496-
m_pDevice->GPUSync();
496+
m_pDevice->DeviceWaitForIdle();
497497

498498
BakedCmdListInfo &info = m_Cmd.m_BakedCmdListInfo[cmd];
499499

@@ -582,7 +582,7 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se
582582
if(!info.executeEvents.empty())
583583
{
584584
// ensure all GPU work has finished for readback of arguments
585-
m_pDevice->GPUSync();
585+
m_pDevice->DeviceWaitForIdle();
586586

587587
if(m_pDevice->HasFatalError())
588588
return false;
@@ -778,7 +778,7 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se
778778
for(size_t i = 0; i < rerecordedCmds.size(); i++)
779779
{
780780
real->ExecuteCommandLists(1, &rerecordedCmds[i]);
781-
m_pDevice->GPUSync();
781+
m_pDevice->DeviceWaitForIdle();
782782
}
783783
}
784784
else
@@ -1092,7 +1092,7 @@ void WrappedID3D12CommandQueue::ExecuteCommandListsInternal(UINT NumCommandLists
10921092
queueReadback.list->Close();
10931093
ID3D12CommandList *listptr = Unwrap(queueReadback.list);
10941094
queueReadback.unwrappedQueue->ExecuteCommandLists(1, &listptr);
1095-
m_pDevice->GPUSync(queueReadback.unwrappedQueue, Unwrap(queueReadback.fence));
1095+
m_pDevice->QueueWaitForIdle(queueReadback.unwrappedQueue, Unwrap(queueReadback.fence));
10961096

10971097
data = queueReadback.readbackMapped;
10981098
}
@@ -1397,7 +1397,7 @@ bool WrappedID3D12CommandQueue::Serialise_Signal(SerialiserType &ser, ID3D12Fenc
13971397
if(IsReplayingAndReading() && pFence)
13981398
{
13991399
m_pReal->Signal(Unwrap(pFence), Value);
1400-
m_pDevice->GPUSync(pQueue);
1400+
m_pDevice->DeviceWaitForIdle();
14011401
}
14021402

14031403
return true;
@@ -1435,7 +1435,7 @@ bool WrappedID3D12CommandQueue::Serialise_Wait(SerialiserType &ser, ID3D12Fence
14351435

14361436
if(IsReplayingAndReading() && pFence)
14371437
{
1438-
m_pDevice->GPUSync(pQueue);
1438+
m_pDevice->DeviceWaitForIdle();
14391439
}
14401440

14411441
return true;

renderdoc/driver/d3d12/d3d12_counters.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ rdcarray<CounterResult> D3D12Replay::FetchCounters(const rdcarray<GPUCounter> &c
706706

707707
m_pDevice->ExecuteLists();
708708
m_pDevice->FlushLists();
709-
m_pDevice->GPUSyncAllQueues();
709+
m_pDevice->DeviceWaitForIdle();
710710

711711
D3D12_RANGE range;
712712
range.Begin = 0;

renderdoc/driver/d3d12/d3d12_debug.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -2116,7 +2116,7 @@ void D3D12DebugManager::GetBufferData(ID3D12Resource *buffer, uint64_t offset, u
21162116
if(buffer == NULL)
21172117
return;
21182118

2119-
m_pDevice->GPUSyncAllQueues();
2119+
m_pDevice->ReplayWorkWaitForIdle();
21202120

21212121
D3D12_RESOURCE_DESC desc = buffer->GetDesc();
21222122
D3D12_HEAP_PROPERTIES heapProps = {};
@@ -2207,7 +2207,7 @@ void D3D12DebugManager::GetBufferData(ID3D12Resource *buffer, uint64_t offset, u
22072207

22082208
ID3D12CommandList *l = m_DebugList;
22092209
m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
2210-
m_pDevice->GPUSync();
2210+
m_pDevice->InternalQueueWaitForIdle();
22112211
m_DebugAlloc->Reset();
22122212

22132213
D3D12_RANGE range = {0, (size_t)chunkSize};
@@ -2247,7 +2247,7 @@ void D3D12DebugManager::GetBufferData(ID3D12Resource *buffer, uint64_t offset, u
22472247

22482248
ID3D12CommandList *l = m_DebugList;
22492249
m_pDevice->GetQueue()->ExecuteCommandLists(1, &l);
2250-
m_pDevice->GPUSync();
2250+
m_pDevice->InternalQueueWaitForIdle();
22512251
m_DebugAlloc->Reset();
22522252
}
22532253

renderdoc/driver/d3d12/d3d12_device.cpp

+46-45
Original file line numberDiff line numberDiff line change
@@ -678,9 +678,9 @@ WrappedID3D12Device::WrappedID3D12Device(ID3D12Device *realDevice, D3D12InitPara
678678
m_HeaderChunk = NULL;
679679

680680
m_Alloc = m_DataUploadAlloc = NULL;
681-
m_GPUSyncFence = NULL;
682-
m_GPUSyncHandle = NULL;
683-
m_GPUSyncCounter = 0;
681+
m_WFIFence = NULL;
682+
m_WFIHandle = NULL;
683+
m_WFICounter = 0;
684684
m_OverlaySyncHandle = NULL;
685685

686686
initStateCurBatch = 0;
@@ -900,12 +900,9 @@ WrappedID3D12Device::~WrappedID3D12Device()
900900
for(size_t i = 0; i < m_InternalCmds.freecmds.size(); i++)
901901
SAFE_RELEASE(m_InternalCmds.freecmds[i]);
902902

903+
DeviceWaitForIdle();
903904
for(size_t i = 0; i < m_QueueFences.size(); i++)
904-
{
905-
GPUSync(m_Queues[i], m_QueueFences[i]);
906-
907905
SAFE_RELEASE(m_QueueFences[i]);
908-
}
909906

910907
for(auto it = m_UploadBuffers.begin(); it != m_UploadBuffers.end(); ++it)
911908
{
@@ -2180,7 +2177,7 @@ bool WrappedID3D12Device::Serialise_MapDataWrite(SerialiserType &ser, ID3D12Reso
21802177
m_CurDataUpload++;
21812178
if(m_CurDataUpload == ARRAY_COUNT(m_DataUploadList))
21822179
{
2183-
GPUSync();
2180+
InternalQueueWaitForIdle();
21842181
m_CurDataUpload = 0;
21852182
}
21862183
}
@@ -2331,7 +2328,7 @@ bool WrappedID3D12Device::Serialise_WriteToSubresource(SerialiserType &ser, ID3D
23312328
m_CurDataUpload++;
23322329
if(m_CurDataUpload == ARRAY_COUNT(m_DataUploadList))
23332330
{
2334-
GPUSync();
2331+
InternalQueueWaitForIdle();
23352332
m_CurDataUpload = 0;
23362333
}
23372334
}
@@ -2728,7 +2725,7 @@ void WrappedID3D12Device::StartFrameCapture(DeviceOwnedWindow devWnd)
27282725
initStateCurBatch = 0;
27292726
initStateCurList = NULL;
27302727

2731-
GPUSyncAllQueues();
2728+
DeviceWaitForIdle();
27322729

27332730
// wait until we've synced all queues to check for these
27342731
GetResourceManager()->GetRTManager()->TickASManagement();
@@ -2863,7 +2860,7 @@ bool WrappedID3D12Device::EndFrameCapture(DeviceOwnedWindow devWnd)
28632860

28642861
m_State = CaptureState::BackgroundCapturing;
28652862

2866-
GPUSync();
2863+
DeviceWaitForIdle();
28672864
}
28682865

28692866
rdcarray<MapState> maps = GetMaps();
@@ -3217,7 +3214,7 @@ bool WrappedID3D12Device::DiscardFrameCapture(DeviceOwnedWindow devWnd)
32173214

32183215
m_State = CaptureState::BackgroundCapturing;
32193216

3220-
GPUSync();
3217+
DeviceWaitForIdle();
32213218

32223219
queues = m_Queues;
32233220
}
@@ -4468,10 +4465,10 @@ void WrappedID3D12Device::CreateInternalResources()
44684465
CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, __uuidof(ID3D12CommandAllocator),
44694466
(void **)&m_Alloc);
44704467
InternalRef();
4471-
CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), (void **)&m_GPUSyncFence);
4472-
m_GPUSyncFence->SetName(L"m_GPUSyncFence");
4468+
CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), (void **)&m_WFIFence);
4469+
m_WFIFence->SetName(L"m_WFIFence");
44734470
InternalRef();
4474-
m_GPUSyncHandle = ::CreateEvent(NULL, FALSE, FALSE, NULL);
4471+
m_WFIHandle = ::CreateEvent(NULL, FALSE, FALSE, NULL);
44754472

44764473
CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), (void **)&m_OverlayFence);
44774474
m_OverlayFence->SetName(L"m_OverlayFence");
@@ -4493,7 +4490,7 @@ void WrappedID3D12Device::CreateInternalResources()
44934490
}
44944491

44954492
GetResourceManager()->SetInternalResource(m_Alloc);
4496-
GetResourceManager()->SetInternalResource(m_GPUSyncFence);
4493+
GetResourceManager()->SetInternalResource(m_WFIFence);
44974494

44984495
CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, __uuidof(ID3D12CommandAllocator),
44994496
(void **)&m_DataUploadAlloc);
@@ -4542,7 +4539,7 @@ void WrappedID3D12Device::CreateInternalResources()
45424539
RDCERR("Failed to create RTV heap");
45434540
}
45444541

4545-
m_GPUSyncCounter = 0;
4542+
m_WFICounter = 0;
45464543

45474544
if(m_TextRenderer == NULL)
45484545
m_TextRenderer = new D3D12TextRenderer(this);
@@ -4555,7 +4552,7 @@ void WrappedID3D12Device::CreateInternalResources()
45554552

45564553
void WrappedID3D12Device::DestroyInternalResources()
45574554
{
4558-
if(m_GPUSyncHandle == NULL)
4555+
if(m_WFIHandle == NULL)
45594556
return;
45604557

45614558
SAFE_RELEASE(m_pAMDExtObject);
@@ -4585,67 +4582,71 @@ void WrappedID3D12Device::DestroyInternalResources()
45854582
}
45864583

45874584
SAFE_RELEASE(m_Alloc);
4588-
SAFE_RELEASE(m_GPUSyncFence);
4585+
SAFE_RELEASE(m_WFIFence);
45894586
SAFE_RELEASE(m_OverlayFence);
4590-
CloseHandle(m_GPUSyncHandle);
4587+
CloseHandle(m_WFIHandle);
45914588
CloseHandle(m_OverlaySyncHandle);
45924589
}
45934590

45944591
void WrappedID3D12Device::DataUploadSync()
45954592
{
45964593
if(m_CurDataUpload >= 0)
45974594
{
4598-
GPUSync();
4595+
InternalQueueWaitForIdle();
45994596
m_CurDataUpload = 0;
46004597
}
46014598
}
46024599

4603-
void WrappedID3D12Device::GPUSync(ID3D12CommandQueue *queue, ID3D12Fence *fence)
4600+
void WrappedID3D12Device::InternalQueueWaitForIdle()
4601+
{
4602+
QueueWaitForIdle(GetQueue(), m_WFIFence);
4603+
}
4604+
4605+
void WrappedID3D12Device::QueueWaitForIdle(ID3D12CommandQueue *queue, ID3D12Fence *fence)
46044606
{
4605-
m_GPUSyncCounter++;
4607+
m_WFICounter++;
46064608

46074609
if(HasFatalError())
46084610
return;
46094611

4610-
if(queue == NULL)
4611-
queue = GetQueue();
4612-
4613-
if(fence == NULL)
4614-
fence = m_GPUSyncFence;
4615-
4616-
HRESULT hr = queue->Signal(fence, m_GPUSyncCounter);
4612+
HRESULT hr = queue->Signal(fence, m_WFICounter);
46174613
CHECK_HR(this, hr);
46184614
RDCASSERTEQUAL(hr, S_OK);
46194615

4620-
fence->SetEventOnCompletion(m_GPUSyncCounter, m_GPUSyncHandle);
4616+
fence->SetEventOnCompletion(m_WFICounter, m_WFIHandle);
46214617

46224618
// wait 10s for hardware GPUs, 100s for CPU
46234619
if(m_Replay && m_Replay->GetDriverInfo().vendor == GPUVendor::Software)
4624-
WaitForSingleObject(m_GPUSyncHandle, 100000);
4620+
WaitForSingleObject(m_WFIHandle, 100000);
46254621
else
4626-
WaitForSingleObject(m_GPUSyncHandle, 10000);
4622+
WaitForSingleObject(m_WFIHandle, 10000);
46274623

46284624
hr = m_pDevice->GetDeviceRemovedReason();
46294625
CHECK_HR(this, hr);
46304626
RDCASSERTEQUAL(hr, S_OK);
46314627
}
46324628

4633-
void WrappedID3D12Device::GPUSyncAllQueues()
4629+
void WrappedID3D12Device::ReplayWorkWaitForIdle()
46344630
{
4635-
if(m_GPUSynced)
4631+
if(m_WaitedForIdleAfterReplay)
46364632
return;
46374633

4638-
for(size_t i = 0; i < m_QueueFences.size(); i++)
4639-
GPUSync(m_Queues[i], m_QueueFences[i]);
4634+
DeviceWaitForIdle();
46404635

4641-
m_GPUSynced = true;
4636+
m_WaitedForIdleAfterReplay = true;
4637+
}
4638+
4639+
void WrappedID3D12Device::DeviceWaitForIdle()
4640+
{
4641+
for(size_t i = 0; i < m_QueueFences.size(); i++)
4642+
QueueWaitForIdle(m_Queues[i], m_QueueFences[i]);
46424643
}
46434644

46444645
ID3D12GraphicsCommandListX *WrappedID3D12Device::GetNewList()
46454646
{
46464647
ID3D12GraphicsCommandListX *ret = NULL;
46474648

4648-
m_GPUSynced = false;
4649+
m_WaitedForIdleAfterReplay = false;
46494650

46504651
if(!m_InternalCmds.freecmds.empty())
46514652
{
@@ -4783,7 +4784,7 @@ void WrappedID3D12Device::FlushLists(bool forceSync, ID3D12CommandQueue *queue)
47834784

47844785
if(!m_InternalCmds.submittedcmds.empty() || forceSync)
47854786
{
4786-
GPUSync(queue);
4787+
QueueWaitForIdle(queue, m_WFIFence);
47874788

47884789
if(!m_InternalCmds.submittedcmds.empty())
47894790
m_InternalCmds.freecmds.append(m_InternalCmds.submittedcmds);
@@ -5382,22 +5383,22 @@ void WrappedID3D12Device::ReplayLog(uint32_t startEventID, uint32_t endEventID,
53825383
{
53835384
bool partial = true;
53845385

5385-
m_GPUSynced = false;
5386+
m_WaitedForIdleAfterReplay = false;
53865387

53875388
if(startEventID == 0 && (replayType == eReplay_WithoutDraw || replayType == eReplay_Full))
53885389
{
53895390
startEventID = 1;
53905391
partial = false;
53915392

5392-
m_GPUSyncCounter++;
5393+
m_WFICounter++;
53935394

5394-
GPUSyncAllQueues();
5395+
DeviceWaitForIdle();
53955396

53965397
// I'm not sure the reason for this, but the debug layer warns about being unable to resubmit
53975398
// command lists due to the 'previous queue fence' not being ready yet, even if no fences are
53985399
// signalled or waited. So instead we just signal a dummy fence each new 'frame'
53995400
for(size_t i = 0; i < m_Queues.size(); i++)
5400-
CHECK_HR(this, m_Queues[i]->Signal(m_QueueFences[i], m_GPUSyncCounter));
5401+
CHECK_HR(this, m_Queues[i]->Signal(m_QueueFences[i], m_WFICounter));
54015402

54025403
FlushLists(true);
54035404
m_CurDataUpload = 0;
@@ -5423,7 +5424,7 @@ void WrappedID3D12Device::ReplayLog(uint32_t startEventID, uint32_t endEventID,
54235424
ExecuteLists();
54245425
FlushLists(true);
54255426

5426-
GPUSyncAllQueues();
5427+
DeviceWaitForIdle();
54275428

54285429
// clear any previous ray dispatch references
54295430
D3D12CommandData &cmd = *m_Queue->GetCommandData();

renderdoc/driver/d3d12/d3d12_device.h

+15-7
Original file line numberDiff line numberDiff line change
@@ -595,8 +595,8 @@ class WrappedID3D12Device : public IFrameCapturer, public ID3DDevice, public ID3
595595
rdcarray<WrappedID3D12CommandQueue *> m_Queues;
596596
rdcarray<ID3D12Fence *> m_QueueFences;
597597

598-
// if we've called GPUSyncAllQueues since the last replay
599-
bool m_GPUSynced = false;
598+
// if we've called ReplayWorkWaitForIdle since the last replay or internal work
599+
bool m_WaitedForIdleAfterReplay = false;
600600

601601
// list of queues and buffers kept alive during capture artificially even if the user destroys
602602
// them, so we can use them in the capture. Storing this separately prevents races where a
@@ -634,9 +634,9 @@ class WrappedID3D12Device : public IFrameCapturer, public ID3DDevice, public ID3
634634
ID3D12GraphicsCommandList *m_DataUploadList[64] = {};
635635
size_t m_CurDataUpload = 0;
636636
ID3D12DescriptorHeap *m_RTVHeap = NULL;
637-
ID3D12Fence *m_GPUSyncFence;
638-
HANDLE m_GPUSyncHandle;
639-
UINT64 m_GPUSyncCounter;
637+
ID3D12Fence *m_WFIFence;
638+
HANDLE m_WFIHandle;
639+
UINT64 m_WFICounter;
640640

641641
ID3D12Fence *m_OverlayFence = NULL;
642642
UINT64 m_CurOverlay = 0;
@@ -1073,8 +1073,16 @@ class WrappedID3D12Device : public IFrameCapturer, public ID3DDevice, public ID3
10731073

10741074
void DataUploadSync();
10751075

1076-
void GPUSync(ID3D12CommandQueue *queue = NULL, ID3D12Fence *fence = NULL);
1077-
void GPUSyncAllQueues();
1076+
// Sync a single queue, by submitting the fence then waiting on it
1077+
void QueueWaitForIdle(ID3D12CommandQueue *queue, ID3D12Fence *fence);
1078+
// Sync to the internal queue - used to ensure any internal work has finished (e.g. FlushLists() above)
1079+
// or generally any internal command buffers submitted to the GetQueue() main internal queue.
1080+
void InternalQueueWaitForIdle();
1081+
// Sync all queues - this always flushes the entire GPU
1082+
void DeviceWaitForIdle();
1083+
// Sync all queues but only once after each replay or internal work submit. used when fetching data
1084+
// or after a replay to ensure work completes on all captured queues before doing any analysis work
1085+
void ReplayWorkWaitForIdle();
10781086

10791087
RDCDriver GetFrameCaptureDriver() { return RDCDriver::D3D12; }
10801088
void StartFrameCapture(DeviceOwnedWindow devWnd);

0 commit comments

Comments
 (0)