Skip to content

Commit

Permalink
1. Add threading to README.md 2. set Torch thread nums when device is…
Browse files Browse the repository at this point in the history
… cpu
  • Loading branch information
wgzintel committed Sep 27, 2024
1 parent 9c2f800 commit 59eddc3
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 13 deletions.
1 change: 1 addition & 0 deletions llm_bench/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,4 @@ For example, --load_config config.json as following in OpenVINO 2024.0.0 will re
> If you encounter any errors, please check **[NOTES.md](./doc/NOTES.md)** which provides solutions to the known errors.
### 2. Image generation
> To configure more parameters for image generation models, reference to **[IMAGE_GEN.md](./doc/IMAGE_GEN.md)**
### 3. Threading
27 changes: 14 additions & 13 deletions llm_bench/python/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,8 @@ def run_text_generation_benchmark(model_path, framework, device, args, num_iters
prompt_idx_list.append(i)
if len(input_text_list) == 0:
raise RuntimeError('==Failure prompts is empty ==')
log.info(f"Benchmarking iter nums(exclude warm-up): {num_iters}, "
f'prompt nums: {len(text_list)}, prompt idx: {prompt_idx_list}')
log.info(f'Benchmarking iter nums(exclude warm-up): {num_iters}, prompt nums: {len(text_list)}, '
f"prompt idx: {prompt_idx_list}, num_beams: {args['num_beams']}")

# if num_iters == 0, just output warm-up data
text_gen_fn = run_text_generation if not use_genai else run_text_generation_genai
Expand Down Expand Up @@ -725,9 +725,6 @@ def get_argprser():
def main():
logging_kwargs = {"encoding": "utf-8"} if sys.version_info[1] > 8 else {}
log.basicConfig(format='[ %(levelname)s ] %(message)s', level=os.environ.get("LOGLEVEL", log.INFO), stream=sys.stdout, **logging_kwargs)
env_omp = os.getenv('OMP_WAIT_POLICY')
if env_omp is None or env_omp != 'PASSIVE':
log.warning('It is recommended to set the environment variable OMP_WAIT_POLICY to PASSIVE, so that OpenVINO inference can use all CPU resources without waiting.')
args = get_argprser()
model_path, framework, model_args, model_name = llm_bench_utils.model_utils.analyze_args(args)

Expand All @@ -742,14 +739,18 @@ def main():
out_str += ', openvino runtime version: {}'.format(get_version())
if model_args['config'].get('PREC_BF16') and model_args['config']['PREC_BF16'] is True:
log.warning('[Warning] Param bf16/prec_bf16 only work for framework pt. It will be disabled.')
original_torch_thread_nums = torch.get_num_threads()
num_beams = model_args['num_beams']
if num_beams > 1:
torch.set_num_threads(int(original_torch_thread_nums / 2))
else:
torch.set_num_threads(1)
log.info(f'The num_beams is {num_beams}, update Torch thread num from {original_torch_thread_nums} to {torch.get_num_threads()}, '
f'avoid to use the CPU cores for OpenVINO inference.')
if 'cpu' in args.device.lower():
env_omp = os.getenv('OMP_WAIT_POLICY')
if env_omp is None or env_omp != 'PASSIVE':
log.warning(f'It is recommended to set the environment variable OMP_WAIT_POLICY to PASSIVE, '
f'so that OpenVINO inference can use all CPU resources without waiting.')
original_torch_thread_nums = torch.get_num_threads()
if model_args['num_beams'] > 1:
torch.set_num_threads(int(original_torch_thread_nums / 2))
else:
torch.set_num_threads(1)
log.info(f"The num_beams is {model_args['num_beams']}, update Torch thread num from "
f'{original_torch_thread_nums} to {torch.get_num_threads()}, avoid to use the CPU cores for OpenVINO inference.')
log.info(out_str)
if args.memory_consumption:
mem_consumption.start_collect_mem_consumption_thread()
Expand Down

0 comments on commit 59eddc3

Please sign in to comment.