-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First rough benchmarking implementation
- Loading branch information
Showing
6 changed files
with
509 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# Benchmarks | ||
|
||
What are interesting questions we would like to answer? | ||
|
||
## Build example program | ||
Measure: | ||
* Source code size: | ||
* Header | ||
* Source | ||
* Total | ||
* Binary size: | ||
* libassets | ||
* libcrl | ||
* executable | ||
* Time to build: | ||
* Each step separately? (compile crl, compile assets, compile main, linking separate?) | ||
|
||
Dependend on: | ||
* Build status: | ||
* Clean | ||
* Nothing changed | ||
* One file added | ||
* All files changed | ||
* cmake/ninja | ||
* testsets | ||
* features | ||
|
||
## Build benchmark program | ||
* all features enabled | ||
|
||
Measure: | ||
* Time to run different get functions | ||
|
||
Dependend on: | ||
* testsets (file size should not be relevant) | ||
* search file: | ||
* exists/does not exist | ||
* order in sorted file list | ||
|
||
## Testset characteristics | ||
Testset characteristics: | ||
* 1kb - 10MB | ||
* 1 file - 10'000 files | ||
* What about path length/directory depth? | ||
|
||
## Features | ||
* str comparison | ||
* directory tree based | ||
* simple hashing | ||
* gperf perfect hashing | ||
* static access |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,70 @@ | ||
#include <benchmark/benchmark.h> | ||
|
||
#include <assets.h> | ||
#include <cstdio> | ||
#include <string> | ||
#include <string_view> | ||
|
||
static void bench_get_file_found(benchmark::State &state) { | ||
for (auto _ : state) { | ||
auto r = assets::get_file("logs/source/t.txt"); | ||
assert(r.has_value()); | ||
} | ||
} | ||
static std::string g_PATH; | ||
static std::array<std::string_view, 4> g_PATHV; | ||
static bool g_EXISTS; | ||
|
||
static void bench_get_file_not_found(benchmark::State &state) { | ||
static void bench_get_file(benchmark::State &state) { | ||
for (auto _ : state) { | ||
auto r = assets::get_file("logs/source/t.txd"); | ||
assert(!r.has_value()); | ||
auto r = assets::get_file(g_PATH); | ||
assert(r.has_value() == g_EXISTS); | ||
} | ||
} | ||
|
||
static void bench_get_filev_found(benchmark::State &state) { | ||
std::array<std::string_view, 4> b{"logs", "/", "source/t", ".txt"}; | ||
static void bench_get_filev(benchmark::State &state) { | ||
for (auto _ : state) { | ||
auto r = assets::get_filev(b); | ||
assert(r.has_value()); | ||
auto r = assets::get_filev(g_PATHV); | ||
assert(r.has_value() == g_EXISTS); | ||
} | ||
} | ||
|
||
static void bench_get_filev_not_found(benchmark::State &state) { | ||
std::array<std::string_view, 4> b{"logs", "/", "source/t", ".txd"}; | ||
static void bench_get_file_ph(benchmark::State &state) { | ||
for (auto _ : state) { | ||
auto r = assets::get_filev(b); | ||
assert(!r.has_value()); | ||
auto r = assets::get_file_ph(g_PATH); | ||
assert(r.has_value() == g_EXISTS); | ||
} | ||
} | ||
|
||
static void bench_get_file_static(benchmark::State &state) { | ||
static void bench_get_file_strcmp(benchmark::State &state) { | ||
for (auto _ : state) { | ||
auto r = assets::get<"logs/source/t.txt">(); | ||
assert(r.size() > 1); | ||
auto r = assets::get_file_strcmp(g_PATH); | ||
assert(r.has_value() == g_EXISTS); | ||
} | ||
} | ||
|
||
static void bench_get_file_ph_found(benchmark::State &state) { | ||
for (auto _ : state) { | ||
auto r = assets::get_file_ph("logs/source/t.txt"); | ||
assert(r.has_value()); | ||
} | ||
} | ||
BENCHMARK(bench_get_file); | ||
BENCHMARK(bench_get_filev); | ||
BENCHMARK(bench_get_file_ph); | ||
BENCHMARK(bench_get_file_strcmp); | ||
|
||
static void bench_get_file_ph_not_found(benchmark::State &state) { | ||
for (auto _ : state) { | ||
auto r = assets::get_file_ph("logs/source/t.txd"); | ||
assert(!r.has_value()); | ||
} | ||
} | ||
void usage() { printf("Usage: <path> <exists> [gperf args...]\n"); } | ||
|
||
static void bench_get_file_strcmp_found(benchmark::State &state) { | ||
for (auto _ : state) { | ||
auto r = assets::get_file_strcmp("logs/source/t.txt"); | ||
assert(r.has_value()); | ||
int main(int argc, char **argv) { | ||
if (argc != 3) { | ||
usage(); | ||
return 1; | ||
} | ||
} | ||
|
||
static void bench_get_file_strcmp_not_found(benchmark::State &state) { | ||
for (auto _ : state) { | ||
auto r = assets::get_file_strcmp("logs/source/t.txd"); | ||
assert(!r.has_value()); | ||
std::string exists{argv[2]}; | ||
if (exists == "true") { | ||
g_EXISTS = true; | ||
} else if (exists == "false") { | ||
g_EXISTS = false; | ||
} else { | ||
usage(); | ||
return 1; | ||
} | ||
} | ||
|
||
BENCHMARK(bench_get_file_static); | ||
g_PATH = argv[1]; | ||
int l = g_PATHV.size() / 4; | ||
std::get<0>(g_PATHV) = std::string_view(g_PATH).substr(0, l); | ||
std::get<1>(g_PATHV) = std::string_view(g_PATH).substr(l, l); | ||
std::get<2>(g_PATHV) = std::string_view(g_PATH).substr(l + l, l); | ||
std::get<3>(g_PATHV) = std::string_view(g_PATH).substr(l + l + l); | ||
|
||
BENCHMARK(bench_get_file_found); | ||
BENCHMARK(bench_get_file_not_found); | ||
BENCHMARK(bench_get_filev_found); | ||
BENCHMARK(bench_get_filev_not_found); | ||
BENCHMARK(bench_get_file_ph_found); | ||
BENCHMARK(bench_get_file_ph_not_found); | ||
BENCHMARK(bench_get_file_strcmp_found); | ||
BENCHMARK(bench_get_file_strcmp_not_found); | ||
|
||
BENCHMARK_MAIN(); | ||
benchmark::Initialize(&argc, argv); | ||
benchmark::RunSpecifiedBenchmarks(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os | ||
import argparse | ||
import random | ||
import string | ||
import math | ||
import sys | ||
|
||
def parse_size(size_str): | ||
return int(size_str) | ||
|
||
def random_string(min_length=5, max_length=15): | ||
length = random.randint(min_length, max_length) | ||
return ''.join(random.choices(string.ascii_letters + string.digits, k=length)) | ||
|
||
def create_file(path, size): | ||
with open(path, 'wb') as f: | ||
f.write(os.urandom(size)) | ||
|
||
def distribute_files_balanced(num_files, files_per_dir): | ||
num_dirs = math.ceil(num_files / files_per_dir) | ||
distribution = [files_per_dir] * num_dirs | ||
remaining = num_files - files_per_dir * num_dirs | ||
for i in range(remaining): | ||
distribution[i] += 1 | ||
return distribution | ||
|
||
def distribute_files_unbalanced(num_files, files_per_dir): | ||
num_dirs = math.ceil(num_files / files_per_dir) | ||
distribution = [] | ||
remaining = num_files | ||
for _ in range(num_dirs): | ||
if remaining <= 0: | ||
break | ||
# Assign between 1 and files_per_dir files to each directory | ||
files_in_dir = random.randint(1, min(files_per_dir, remaining)) | ||
distribution.append(files_in_dir) | ||
remaining -= files_in_dir | ||
# If there are remaining files, assign them randomly | ||
while remaining > 0: | ||
idx = random.randint(0, len(distribution)-1) | ||
if distribution[idx] < files_per_dir: | ||
distribution[idx] += 1 | ||
remaining -= 1 | ||
return distribution | ||
|
||
def adjust_name_lengths(target_path_length, current_depth, max_depth, current_path_length, path_parts): | ||
""" | ||
Adjust directory and file name lengths to approach the target path length. | ||
This is a heuristic approach and may not be exact. | ||
""" | ||
remaining_length = target_path_length - current_path_length | ||
remaining_dirs = max_depth - current_depth | ||
if remaining_dirs <= 0: | ||
file_name_length = max(5, remaining_length - 10) # Reserve some for file name | ||
else: | ||
avg_dir_length = remaining_length // (remaining_dirs + 1) | ||
file_name_length = max(5, avg_dir_length) | ||
return file_name_length | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description="Generate a test dataset with configurable directory and file characteristics.") | ||
parser.add_argument('--output-dir', type=str, required=True, help='Root directory to generate the dataset.') | ||
parser.add_argument('--num-files', type=int, required=True, help='Total number of files to generate.') | ||
parser.add_argument('--files-per-dir', type=int, default=10, help='Number of files per directory.') | ||
parser.add_argument('--distribution', type=str, choices=['balanced', 'unbalanced'], default='balanced', help='Distribution of files across directories.') | ||
parser.add_argument('--content-size', type=str, default='10', help='Size of each file\'s content in bytes.') | ||
parser.add_argument('--dir-depth', type=int, default=3, help='Maximum depth of the directory tree.') | ||
parser.add_argument('--path-length', type=int, default=100, help='Desired average path length in characters.') | ||
|
||
args = parser.parse_args() | ||
|
||
output_dir = args.output_dir | ||
num_files = args.num_files | ||
files_per_dir = args.files_per_dir | ||
distribution = args.distribution | ||
try: | ||
content_size = parse_size(args.content_size) | ||
except ValueError: | ||
print("Invalid content size format. Use numbers with optional suffixes like KB, MB.") | ||
sys.exit(1) | ||
dir_depth = args.dir_depth | ||
path_length = args.path_length | ||
|
||
# Create the root output directory | ||
os.makedirs(output_dir, exist_ok=True) | ||
|
||
# Determine file distribution | ||
if distribution == 'balanced': | ||
files_distribution = distribute_files_balanced(num_files, files_per_dir) | ||
else: | ||
files_distribution = distribute_files_unbalanced(num_files, files_per_dir) | ||
|
||
total_dirs = len(files_distribution) | ||
print(f"Generating {num_files} files across {total_dirs} directories with a maximum depth of {dir_depth}.") | ||
|
||
# Generate directories and files | ||
for dir_idx, num_files_in_dir in enumerate(files_distribution): | ||
# Generate directory path | ||
current_path = output_dir | ||
current_depth = 0 | ||
current_path_length = len(os.path.abspath(current_path)) | ||
path_parts = [] | ||
while current_depth < dir_depth: | ||
# Adjust name lengths to approach target path length | ||
file_name_length = adjust_name_lengths(path_length, current_depth, dir_depth, current_path_length, path_parts) | ||
dir_name = random_string(min_length=3, max_length=file_name_length) | ||
path_parts.append(dir_name) | ||
current_path = os.path.join(current_path, dir_name) | ||
current_path_length += len(dir_name) + 1 # +1 for the os.sep | ||
current_depth += 1 | ||
# Break early if adding more directories would exceed path length | ||
if current_path_length >= path_length - 50: # Reserve some space for file name | ||
break | ||
os.makedirs(current_path, exist_ok=True) | ||
|
||
# Generate files in the directory | ||
for file_idx in range(num_files_in_dir): | ||
# Generate file name | ||
file_name_length = max(5, min(15, path_length // 10)) # Heuristic for file name length | ||
file_name = f"file_{random_string(min_length=3, max_length=file_name_length)}.bin" | ||
file_path = os.path.join(current_path, file_name) | ||
# Ensure the path length does not exceed the target | ||
if len(os.path.abspath(file_path)) > path_length: | ||
# Adjust by shortening the file name | ||
excess = len(os.path.abspath(file_path)) - path_length | ||
if excess > 0 and len(file_name) > excess + 4: # 4 for 'file_' and '.bin' | ||
new_length = len(file_name) - excess | ||
file_name = f"file_{random_string(min_length=3, max_length=new_length)}.bin" | ||
file_path = os.path.join(current_path, file_name) | ||
# Create the file | ||
create_file(file_path, content_size) | ||
|
||
print(f"Dataset generation complete. Files are located in '{output_dir}'.") | ||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.