Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

performance: using reserve() for reduce cost time inserts #793

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ void ZebinDecoder<numBits>::dumpSymtab(ElfT &elf, ArrayRef<const uint8_t> symtab
template <Elf::ElfIdentifierClass numBits>
std::vector<SectionInfo> ZebinDecoder<numBits>::dumpElfSections(ElfT &elf) {
std::vector<SectionInfo> sectionInfos;
sectionInfos.reserve(elf.sectionHeaders.size() - 1U);
for (size_t secId = 1U; secId < elf.sectionHeaders.size(); secId++) {
auto &[header, data] = elf.sectionHeaders[secId];
auto sectionName = elf.getSectionName(static_cast<uint32_t>(secId));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,18 @@ SupportedDevicesHelper::SupportedDevicesData SupportedDevicesHelper::collectSupp
SupportedDevicesData data;

// Populate IP Versions, Device Infos, Acronyms
data.deviceIpVersions.reserve(enabledDevices.size());
data.deviceInfos.reserve(enabledDevices.size());
data.acronyms.reserve(enabledDevices.size());
for (const auto &device : enabledDevices) {
data.deviceIpVersions.push_back(device.aotConfig.value);

data.deviceInfos.reserve((*device.deviceIds).size());
for (const auto &deviceId : *device.deviceIds) {
data.deviceInfos.push_back({deviceId, device.aotConfig.revision, device.aotConfig.value});
}

data.acronyms.reserve(device.deviceAcronyms.size());
for (const auto &acronym : device.deviceAcronyms) {
data.acronyms.push_back({acronym.data(), device.aotConfig.value});
}
Expand All @@ -44,6 +49,7 @@ SupportedDevicesHelper::SupportedDevicesData SupportedDevicesHelper::collectSupp
for (const auto &device : enabledDevices) {
groupedDevices[device.family].push_back(device.aotConfig.value);
}
data.familyGroups.reserve(groupedDevices.size());
for (const auto &entry : groupedDevices) {
data.familyGroups.push_back({productConfigHelper->getAcronymFromAFamily(entry.first).data(), entry.second});
}
Expand Down Expand Up @@ -271,13 +277,15 @@ SupportedDevicesHelper::SupportedDevicesData SupportedDevicesHelper::mergeOclocD
[](const auto &a, const auto &b) { return a.second < b.second; });

// Sort FamilyGroups (alphabetically by group name)
mergedData.familyGroups.reserve(uniqueFamilyGroups.size());
for (const auto &[family, ipVersions] : uniqueFamilyGroups) {
mergedData.familyGroups.push_back({family, std::vector<uint32_t>(ipVersions.begin(), ipVersions.end())});
}
std::sort(mergedData.familyGroups.begin(), mergedData.familyGroups.end(),
[](const auto &a, const auto &b) { return a.first < b.first; });

// Sort ReleaseGroups (alphabetically by group name)
mergedData.releaseGroups.reserve(uniqueReleaseGroups.size());
for (const auto &[release, ipVersions] : uniqueReleaseGroups) {
mergedData.releaseGroups.push_back({release, std::vector<uint32_t>(ipVersions.begin(), ipVersions.end())});
}
Expand Down
1 change: 1 addition & 0 deletions shared/source/aub_mem_dump/aub_mem_dump_pvc_and_later.inl
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ static const MMIOList mmioListVECS = {
static MMIOList mmioListCCSInstance(uint32_t mmioBase) {
MMIOList mmioList;

mmioList.reserve(17);
mmioList.push_back(MMIOPair(0x0000ce90, 0x00030003)); // GFX_MULT_CTXT_CTL - enable multi-context with 4CCS
mmioList.push_back(MMIOPair(0x0000b170, 0x00030003)); // MULT_CTXT_CTL - enable multi-context with 4CCS
mmioList.push_back(MMIOPair(0x00014800, 0xFFFF0001)); // RCU_MODE
Expand Down
1 change: 1 addition & 0 deletions shared/source/aub_mem_dump/aub_mem_dump_xehp_and_later.inl
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ static const MMIOList mmioListVECS = {
static MMIOList mmioListCCSInstance(uint32_t mmioBase) {
MMIOList mmioList;

mmioList.reserve(17);
mmioList.push_back(MMIOPair(0x0000ce90, 0x00030003)); // GFX_MULT_CTXT_CTL - enable multi-context with 4CCS
mmioList.push_back(MMIOPair(0x0000b170, 0x00030003)); // MULT_CTXT_CTL - enable multi-context with 4CCS
mmioList.push_back(MMIOPair(0x00014800, 0xFFFF0001)); // RCU_MODE
Expand Down
1 change: 1 addition & 0 deletions shared/source/command_container/cmdcontainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ void CommandContainer::fillReusableAllocationLists() {
return;
}

this->getResidencyContainer().reserve(amountToFill);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto reserveSize = this->useSecondaryCommandStream ? amountToFill * 2 : amountToFill;

for (auto i = 0u; i < amountToFill; i++) {
auto allocToReuse = obtainNextCommandBufferAllocation();
this->immediateReusableAllocationList->pushTailOne(*allocToReuse);
Expand Down
1 change: 1 addition & 0 deletions shared/source/command_stream/submissions_aggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourc
totalUsedSize += nextCommandBufferNewResourcesSize;
currentNode->inspectionId = currentInspection;

resourcePackage.reserve(newResources.size());
for (auto &newResource : newResources) {
resourcePackage.push_back(newResource);
}
Expand Down
8 changes: 8 additions & 0 deletions shared/source/compiler_interface/oclc_extensions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
openclCFeatures.push_back(openClCFeature);

if (hwInfo.capabilityTable.supportsImages) {
openclCFeatures.reserve(3);

strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_3d_image_writes");
openclCFeatures.push_back(openClCFeature);

Expand All @@ -37,6 +39,8 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
}

if (hwInfo.capabilityTable.supportsOcl21Features) {
openclCFeatures.reserve(8);

strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_atomic_order_acq_rel");
openclCFeatures.push_back(openClCFeature);

Expand All @@ -62,6 +66,8 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
openclCFeatures.push_back(openClCFeature);

if (hwInfo.capabilityTable.supportsFloatAtomics) {
openclCFeatures.reserve(8);

strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_global_atomic_add");
openclCFeatures.push_back(openClCFeature);

Expand Down Expand Up @@ -102,6 +108,8 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
openclCFeatures.push_back(openClCFeature);

if (hwInfo.capabilityTable.supportsOcl21Features && hwInfo.capabilityTable.supportsFloatAtomics) {
openclCFeatures.reserve(4);

strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_global_atomic_add");
openclCFeatures.push_back(openClCFeature);

Expand Down
2 changes: 2 additions & 0 deletions shared/source/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,7 @@ void Device::initializeRayTracing(uint32_t maxBvhLevels) {
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
}

rtDispatchGlobalsInfos.reserve(maxBvhLevels - rtDispatchGlobalsInfos.size() + 1);
while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) {
rtDispatchGlobalsInfos.push_back(nullptr);
}
Expand Down Expand Up @@ -1103,6 +1104,7 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
return;
}

dispatchGlobalsInfo->rtStacks.reserve(tileCount);
for (unsigned int tile = 0; tile < tileCount; tile++) {
DeviceBitfield deviceBitfield =
(tileCount == 1)
Expand Down
6 changes: 4 additions & 2 deletions shared/source/device_binary_format/elf/elf_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ Elf<numBits> decodeElf(const ArrayRef<const uint8_t> binary, std::string &outErr
}

const ElfProgramHeader<numBits> *programHeader = reinterpret_cast<const ElfProgramHeader<numBits> *>(binary.begin() + ret.elfFileHeader->phOff);
ret.programHeaders.reserve(ret.elfFileHeader->phNum);
for (decltype(ret.elfFileHeader->phNum) i = 0; i < ret.elfFileHeader->phNum; ++i) {
if (programHeader->offset + programHeader->fileSz > binary.size()) {
outErrReason = "Out of bounds program header offset/filesz, program header idx : " + std::to_string(i);
Expand All @@ -91,6 +92,7 @@ Elf<numBits> decodeElf(const ArrayRef<const uint8_t> binary, std::string &outErr
}

const ElfSectionHeader<numBits> *sectionHeader = reinterpret_cast<const ElfSectionHeader<numBits> *>(binary.begin() + ret.elfFileHeader->shOff);
ret.sectionHeaders.reserve(ret.elfFileHeader->shNum);
for (decltype(ret.elfFileHeader->shNum) i = 0; i < ret.elfFileHeader->shNum; ++i) {
ArrayRef<const uint8_t> data;
if (SHT_NOBITS != sectionHeader->type) {
Expand Down Expand Up @@ -151,7 +153,7 @@ bool Elf<numBits>::decodeRelocations(SectionHeaderAndData<numBits> &sectionHeade
// there may be multiple rela sections, reserve additional size
auto previousEntries = relocations.size();
auto allEntries = previousEntries + numberOfEntries;
relocs.reserve(allEntries);
relocs.reserve(allEntries - previousEntries);

for (auto i = previousEntries; i < allEntries; i++) {

Expand Down Expand Up @@ -186,7 +188,7 @@ bool Elf<numBits>::decodeRelocations(SectionHeaderAndData<numBits> &sectionHeade
// there may be multiple rel sections, reserve additional size
auto previousEntries = relocations.size();
auto allEntries = previousEntries + numberOfEntries;
relocs.reserve(allEntries);
relocs.reserve(allEntries - previousEntries);

for (auto i = previousEntries; i < allEntries; i++) {
int symbolIndex = extractSymbolIndex<ElfRel<numBits>>(*reloc);
Expand Down
2 changes: 2 additions & 0 deletions shared/source/device_binary_format/elf/elf_rewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@ struct ElfRewriter {

ElfRewriter(NEO::Elf::Elf<numBits> &src) {
elfFileHeader = *src.elfFileHeader;
this->sectionHeaders.reserve(src.sectionHeaders.size());
for (const auto &sh : src.sectionHeaders) {
this->sectionHeaders.push_back(std::make_unique<MutableSectionHeader<numBits>>(src.getName(sh.header->name), *sh.header, std::vector<uint8_t>{sh.data.begin(), sh.data.end()}));
}
this->programHeaders.reserve(src.programHeaders.size());
for (const auto &ph : src.programHeaders) {
this->programHeaders.push_back(std::make_unique<MutableProgramHeader<numBits>>(*ph.header, std::vector<uint8_t>{ph.data.begin(), ph.data.end()}));
for (const auto &sh : this->sectionHeaders) {
Expand Down
1 change: 1 addition & 0 deletions shared/source/gen12lp/gfx_core_helper_gen12lp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
engines.push_back({aub_stream::ENGINE_CCS, EngineUsage::regular});
}

engines.reserve(3);
engines.push_back({aub_stream::ENGINE_RCS, EngineUsage::regular});
engines.push_back({aub_stream::ENGINE_RCS, EngineUsage::lowPriority}); // low priority
engines.push_back({defaultEngine, EngineUsage::internal}); // internal usage
Expand Down
1 change: 1 addition & 0 deletions shared/source/helpers/bindless_heaps_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsConte
rootDeviceIndex(rootDevice->getRootDeviceIndex()),
deviceBitfield(rootDevice->getDeviceBitfield()) {

ssHeapsAllocations.reserve(BindlesHeapType::numHeapTypes);
for (auto heapType = 0; heapType < BindlesHeapType::numHeapTypes; heapType++) {
auto size = MemoryConstants::pageSize64k;

Expand Down
1 change: 1 addition & 0 deletions shared/source/helpers/gfx_core_helper_xehp_and_later.inl
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ aub_stream::MMIOList GfxCoreHelperHw<GfxFamily>::getExtraMmioList(const Hardware
uint32_t value = 1; // [0] enable
value |= (format << 3); // [3:7] compression_format

mmioList.reserve(3);
mmioList.push_back({0x519C, value});
mmioList.push_back({0xB0F0, value});
mmioList.push_back({0xE4C0, value});
Expand Down
25 changes: 22 additions & 3 deletions shared/source/helpers/l3_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ inline bool operator!=(const L3Range &lhs, const L3Range &rhs) {
return (false == (lhs == rhs));
}

template <class T>
constexpr bool isCanPreallocStlContainer(const std::vector<T>& v) { return true; }

template <class T1, class T2>
constexpr bool isCanPreallocStlContainer(const std::unordered_map<T1, T2>& v) { return true; }

template <typename ContainerT>
inline void coverRangeExactImpl(uint64_t address, uint64_t size, ContainerT &ret, uint64_t policy) {
UNRECOVERABLE_IF(false == L3Range::meetsMinimumAlignment(address));
Expand All @@ -114,10 +120,23 @@ inline void coverRangeExactImpl(uint64_t address, uint64_t size, ContainerT &ret
const uint64_t end = address + size;

uint64_t offset = address;

uint64_t maxRangeSizeBySize;
uint64_t maxRangeSizeByOffset;
uint64_t rangeSize;

if constexpr (is_can_prealloc_stl_container(ret)) {
maxRangeSizeBySize = Math::prevPowerOfTwo(end - offset);
maxRangeSizeByOffset = offset ? (1ULL << Math::ffs(offset)) : L3Range::maxSingleRange;
rangeSize = std::min(maxRangeSizeBySize, maxRangeSizeByOffset);
rangeSize = std::min(rangeSize, +L3Range::maxSingleRange);
ret.reserve((end - offset) / rangeSize);
}

while (offset < end) {
uint64_t maxRangeSizeBySize = Math::prevPowerOfTwo(end - offset);
uint64_t maxRangeSizeByOffset = offset ? (1ULL << Math::ffs(offset)) : L3Range::maxSingleRange;
uint64_t rangeSize = std::min(maxRangeSizeBySize, maxRangeSizeByOffset);
maxRangeSizeBySize = Math::prevPowerOfTwo(end - offset);
maxRangeSizeByOffset = offset ? (1ULL << Math::ffs(offset)) : L3Range::maxSingleRange;
rangeSize = std::min(maxRangeSizeBySize, maxRangeSizeByOffset);
rangeSize = std::min(rangeSize, +L3Range::maxSingleRange);
ret.push_back(L3Range::fromAddressSizeWithPolicy(offset, rangeSize, policy));
offset += rangeSize;
Expand Down
1 change: 1 addition & 0 deletions shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
EngineInstancesContainer engines;

if (hwInfo.featureTable.flags.ftrCCSNode) {
engines.reserve(hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
for (uint32_t i = 0; i < hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; i++) {
engines.push_back({static_cast<aub_stream::EngineType>(i + aub_stream::ENGINE_CCS), EngineUsage::regular});
if (productHelper.isCooperativeEngineSupported(hwInfo)) {
Expand Down
1 change: 1 addition & 0 deletions shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
auto ailHelper = rootDeviceEnvironment.getAILConfigurationHelper();
auto forceRcs = ailHelper && ailHelper->forceRcs();
if (hwInfo.featureTable.flags.ftrCCSNode && !forceRcs) {
engines.reserve(hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
for (uint32_t i = 0; i < hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; i++) {
engines.push_back({static_cast<aub_stream::EngineType>(i + aub_stream::ENGINE_CCS), EngineUsage::regular});
}
Expand Down