Skip to content

Commit 03142e8

Browse files
[DOCS] aipc model table update 24.4 (#27587)
port: #27586
1 parent 86e6512 commit 03142e8

File tree

4 files changed

+363
-9
lines changed

4 files changed

+363
-9
lines changed

docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst

+25-9
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ This page is regularly updated to help you identify the best-performing LLMs on
55
Intel® Core™ Ultra processor family and AI PCs.
66
The current data is as of OpenVINO 2024.4, 24 Oct. 2024
77

8-
The tables below list the key performance indicators for a selection of Large Language Models,
9-
running on an Intel® Core™ Ultra 7-165H, Intel® Core™ Ultra 7-265V, and Intel® Core™ Ultra
10-
7-288V based system, on built-in GPUs.
8+
The tables below list the key performance indicators for inference on built-in GPUs.
119

1210

1311

@@ -16,14 +14,32 @@ running on an Intel® Core™ Ultra 7-165H, Intel® Core™ Ultra 7-265V, and In
1614
<label><link rel="stylesheet" type="text/css" href="../../_static/css/openVinoDataTables.css"></label>
1715

1816

17+
.. tab-set::
1918

20-
.. csv-table::
21-
:class: modeldata stripe
22-
:name: supportedModelsTableOv
23-
:header-rows: 1
24-
:file: ../../_static/benchmarks_files/llm_models.csv
19+
.. tab-item:: 9-288V
20+
21+
.. csv-table::
22+
:class: modeldata stripe
23+
:name: supportedModelsTableOv
24+
:header-rows: 1
25+
:file: ../../_static/benchmarks_files/llm_models_9-288V.csv
26+
27+
.. tab-item:: 7-268V
28+
29+
.. csv-table::
30+
:class: modeldata stripe
31+
:name: supportedModelsTableOv
32+
:header-rows: 1
33+
:file: ../../_static/benchmarks_files/llm_models_7-258V.csv
34+
35+
.. tab-item:: 7-155H
36+
37+
.. csv-table::
38+
:class: modeldata stripe
39+
:name: supportedModelsTableOv
40+
:header-rows: 1
41+
:file: ../../_static/benchmarks_files/llm_models_7-155H.csv
2542

26-
|
2743

2844
.. grid:: 1 1 2 2
2945
:gutter: 4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec,,,
2+
opt-125m-gptq,INT4-MIXED,32,965.9,29,7.7,129.87,,,
3+
opt-125m-gptq,INT4-MIXED,1024,1507.9,113.1,7.8,128.21,,,
4+
tiny-llama-1.1b-chat,INT4-MIXED,32,1831.8,46.5,16.7,59.88,,,
5+
tiny-llama-1.1b-chat,INT4-MIXED,1024,1806.3,635,17.8,56.18,,,
6+
qwen2-0.5b,INT4-MIXED,32,2551.7,61.4,18.3,54.64,,,
7+
qwen2-0.5b,INT4-MIXED,1024,2976.6,356.1,19.2,52.08,,,
8+
tiny-llama-1.1b-chat,INT8-CW,32,1987.4,56,21.6,46.30,,,
9+
tiny-llama-1.1b-chat,INT8-CW,1024,2209.1,772.7,22.6,44.25,,,
10+
qwen2-0.5b,INT8-CW,32,2484.9,57.3,22.8,43.86,,,
11+
qwen2-0.5b,INT8-CW,1024,3102.5,407.1,23.9,41.84,,,
12+
qwen2-1.5b,INT4-MIXED,32,4265.2,71.7,25.5,39.22,,,
13+
qwen2-1.5b,INT4-MIXED,1024,4884.5,862.4,26.8,37.31,,,
14+
dolly-v2-3b,INT4-MIXED,32,2401.3,89.6,27.5,36.36,,,
15+
red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2511.5,78.6,28.2,35.46,,,
16+
phi-2,INT4-MIXED,32,2279.5,95.7,29.1,34.36,,,
17+
minicpm-1b-sft,INT4-MIXED,31,2759.9,104.4,30.9,32.36,,,
18+
phi-2,INT4-MIXED,32,2620.1,100.8,31,32.26,,,
19+
stable-zephyr-3b-dpo,INT4-MIXED,30,2636.5,86.8,31.7,31.55,,,
20+
dolly-v2-3b,INT4-MIXED,1024,3137.1,1782.9,32.2,31.06,,,
21+
red-pajama-incite-chat-3b-v1,INT4-MIXED,1020,3118.5,1831.7,33.3,30.03,,,
22+
red-pajama-incite-chat-3b-v1,INT4-MIXED,1024,2862.7,1821.1,33.5,29.85,,,
23+
qwen2-1.5b,INT8-CW,32,4831.2,87,33.8,29.59,,,
24+
opt-2.7b,INT4-MIXED,31,2898.3,73.2,33.9,29.50,,,
25+
phi-2,INT4-MIXED,1024,2797.4,1887,34,29.41,,,
26+
orca-mini-3b,INT4-MIXED,32,2877.8,100.3,35,28.57,,,
27+
stablelm-3b-4e1t,INT4-MIXED,32,2669.4,94.7,35.3,28.33,,,
28+
qwen2-1.5b,INT8-CW,1024,5455.8,1047.6,35.3,28.33,,,
29+
minicpm-1b-sft,INT8-CW,31,3104.1,103.5,35.3,28.33,,,
30+
phi-2,INT4-MIXED,1024,3039.8,1917.4,35.9,27.86,,,
31+
stable-zephyr-3b-dpo,INT4-MIXED,946,3411.4,1695,37,27.03,,,
32+
gemma-2b-it,INT4-MIXED,32,3991.7,116.1,37.9,26.39,,,
33+
opt-2.7b,INT4-MIXED,937,3617.5,1764.9,38.2,26.18,,,
34+
phi-3-mini-4k-instruct,INT4-MIXED,31,2935.3,111.6,38.2,26.18,,,
35+
phi-3-mini-4k-instruct,INT4-MIXED,38,3102.4,134,38.4,26.04,,,
36+
phi-3-mini-4k-instruct,INT4-MIXED,31,2986.1,114.1,38.9,25.71,,,
37+
phi-3-mini-4k-instruct,INT4-MIXED,38,2977.4,131.1,39,25.64,,,
38+
gemma-2b-it,INT4-MIXED,1024,4973.3,1249.2,39.7,25.19,,,
39+
stablelm-3b-4e1t,INT4-MIXED,1024,3196.9,2045.4,39.9,25.06,,,
40+
dolly-v2-3b,INT8-CW,32,3490.2,107.4,41.5,24.10,,,
41+
red-pajama-incite-chat-3b-v1,INT8-CW,32,3457.9,105,42.5,23.53,,,
42+
opt-2.7b,INT8-CW,31,3686.8,107.5,44.1,22.68,,,
43+
phi-2,INT8-CW,32,3554.9,116.6,44.1,22.68,,,
44+
phi-3-mini-4k-instruct,INT4-MIXED,1023,3390.7,2277.1,44.2,22.62,,,
45+
phi-3-mini-4k-instruct,INT4-MIXED,1061,3643.6,2485,44.4,22.52,,,
46+
phi-3-mini-4k-instruct,INT4-MIXED,1023,3516.4,2280.9,44.5,22.47,,,
47+
phi-3-mini-4k-instruct,INT4-MIXED,1061,3537.2,2522.4,44.7,22.37,,,
48+
orca-mini-3b,INT4-MIXED,1024,3557.3,1898.9,45,22.22,,,
49+
minicpm-1b-sft,FP16,31,3814.4,97.9,45.4,22.03,,,
50+
stablelm-3b-4e1t,INT8-CW,32,3486.9,100.5,46.1,21.69,,,
51+
stable-zephyr-3b-dpo,INT8-CW,30,3516.7,101.9,46.1,21.69,,,
52+
dolly-v2-3b,INT8-CW,1024,4265.9,2178.6,46.2,21.65,,,
53+
red-pajama-incite-chat-3b-v1,INT8-CW,1020,3979.1,2219.7,47.2,21.19,,,
54+
red-pajama-incite-chat-3b-v1,INT8-CW,1024,3975.5,2199.7,47.3,21.14,,,
55+
opt-2.7b,INT8-CW,937,4358.6,1981.8,48.4,20.66,,,
56+
phi-2,INT8-CW,1024,4058.1,2280.1,48.9,20.45,,,
57+
gemma-2b-it,INT8-CW,32,4786.8,119.8,49.4,20.24,,,
58+
chatglm3-6b,INT4-MIXED,32,4141.5,166.6,49.7,20.12,,,
59+
stablelm-3b-4e1t,INT8-CW,1024,4054.8,2243.5,50.7,19.72,,,
60+
stable-zephyr-3b-dpo,INT8-CW,946,4521.8,1816.4,51.3,19.49,,,
61+
gemma-2b-it,INT8-CW,1024,5810.7,1580,51.3,19.49,,,
62+
chatglm3-6b,INT4-MIXED,32,4651.4,164.7,51.6,19.38,,,
63+
chatglm3-6b,INT4-MIXED,1024,4235.1,2818.7,52.3,19.12,,,
64+
orca-mini-3b,INT8-CW,32,4162,109.2,53.3,18.76,,,
65+
chatglm3-6b,INT4-MIXED,1024,4783.8,2869,54.4,18.38,,,
66+
gpt-j-6b,INT4-MIXED,32,4667.3,176.7,56.3,17.76,,,
67+
chatglm3-6b-gptq,INT4-MIXED,32,5369.4,173.9,58.9,16.98,,,
68+
llama-2-7b-chat-hf,INT4-MIXED,32,4280,173.2,60.1,16.64,,,
69+
phi-3-mini-4k-instruct,INT8-CW,31,4585.1,123,60.5,16.53,,,
70+
phi-3-mini-4k-instruct,INT8-CW,38,4597,152,60.5,16.53,,,
71+
chatglm2-6b,INT4-MIXED,32,4847.8,158.7,60.6,16.50,,,
72+
vicuna-7b-v1.5,INT4-MIXED,32,4476.9,178.2,61.2,16.34,,,
73+
chatglm3-6b-gptq,INT4-MIXED,1024,5217.6,2863.7,61.3,16.31,,,
74+
mistral-7b-v0.1,INT4-MIXED,31,4413.6,194,61.7,16.21,,,
75+
qwen2-7b,INT4-MIXED,32,7044.7,184.4,61.7,16.21,,,
76+
mistral-7b-v0.1,INT4-MIXED,32,4427.6,193.3,61.8,16.18,,,
77+
orca-mini-3b,INT8-CW,1024,4821.6,2239.1,62,16.13,,,
78+
codegen25-7b,INT4-MIXED,32,4687.2,176.2,62.7,15.95,,,
79+
chatglm2-6b,INT4-MIXED,1024,5165.9,3148,63,15.87,,,
80+
llama-2-7b-gptq,INT4-MIXED,32,4632.8,175.2,63.4,15.77,,,
81+
stablelm-7b,INT4-MIXED,32,5219.5,206.3,63.4,15.77,,,
82+
qwen-7b-chat,INT4-MIXED,32,7805.6,193.8,63.6,15.72,,,
83+
gpt-j-6b,INT4-MIXED,1024,5314.9,3111.8,63.6,15.72,,,
84+
qwen2-7b,INT4-MIXED,1024,7716.2,3548.3,64.1,15.60,,,
85+
llama-3-8b,INT4-MIXED,32,4910.9,204.8,64.7,15.46,,,
86+
mistral-7b-v0.1,INT4-MIXED,1024,4720.8,3667.1,64.8,15.43,,,
87+
mistral-7b-v0.1,INT4-MIXED,1007,4704.7,3685.4,64.9,15.41,,,
88+
llama-3.1-8b,INT4-MIXED,31,4850.3,211.5,64.9,15.41,,,
89+
phi-3-mini-4k-instruct,INT8-CW,1023,5128.6,2815.2,65.7,15.22,,,
90+
phi-3-mini-4k-instruct,INT8-CW,1061,5155,3407.9,65.9,15.17,,,
91+
mistral-7b-v0.1,INT4-MIXED,32,4939.3,192,66.5,15.04,,,
92+
llama-3-8b,INT4-MIXED,33,4919.4,261.9,67.2,14.88,,,
93+
llama-2-7b-chat-hf,INT4-MIXED,1024,4948.2,3811,67.3,14.86,,,
94+
qwen1.5-7b-chat,INT4-MIXED,32,5943.1,180.5,67.7,14.77,,,
95+
qwen-7b-chat-gptq,INT4-MIXED,32,8057,187,68.1,14.68,,,
96+
llama-3-8b,INT4-MIXED,32,5503.5,198.4,68.1,14.68,,,
97+
qwen-7b-chat,INT4-MIXED,32,8091.6,185.9,68.1,14.68,,,
98+
llama-3-8b,INT4-MIXED,1024,5569.1,3920.5,68.2,14.66,,,
99+
llama-3.1-8b,INT4-MIXED,31,5358.6,201,68.2,14.66,,,
100+
stablelm-7b,INT4-MIXED,1020,5804.4,3726.6,68.8,14.53,,,
101+
llama-3.1-8b,INT4-MIXED,31,5452.6,202.9,68.8,14.53,,,
102+
llama-2-7b-chat-hf,INT4-MIXED,32,5023,165.7,69,14.49,,,
103+
llama-3-8b,INT4-MIXED,32,5413.6,202,69.1,14.47,,,
104+
llama-3-8b,INT4-MIXED,33,5440.4,262.1,69.2,14.45,,,
105+
codegen25-7b,INT4-MIXED,1024,5434.6,3513.2,69.9,14.31,,,
106+
mistral-7b-v0.1,INT4-MIXED,1024,5614.9,3819.1,70,14.29,,,
107+
mistral-7b-v0.1,INT4-MIXED,31,4927.8,205,70.5,14.18,,,
108+
llama-3-8b,INT4-MIXED,33,5498.9,270.7,70.6,14.16,,,
109+
llama-3-8b,INT4-MIXED,1025,5577.4,4271.2,70.6,14.16,,,
110+
llama-2-7b-gptq,INT4-MIXED,1024,5302.2,3529.4,70.7,14.14,,,
111+
zephyr-7b-beta,INT4-MIXED,32,5212.4,190.6,71.2,14.04,,,
112+
llama-3-8b,INT4-MIXED,1024,6161.1,3918,71.5,13.99,,,
113+
llama-3-8b,INT4-MIXED,1025,6098,4441.8,72.3,13.83,,,
114+
llama-3-8b,INT4-MIXED,1024,6071.7,3972.2,72.4,13.81,,,
115+
mistral-7b-v0.1,INT4-MIXED,1007,5224.1,4153.4,73.8,13.55,,,
116+
llama-3-8b,INT4-MIXED,1025,6156.9,4357,73.9,13.53,,,
117+
zephyr-7b-beta,INT4-MIXED,1024,5511.6,3978,74.4,13.44,,,
118+
opt-2.7b,FP16,31,9220.3,107.8,74.7,13.39,,,
119+
dolly-v2-3b,FP16,32,6058.9,109.9,74.7,13.39,,,
120+
qwen1.5-7b-chat,INT4-MIXED,1024,7063.2,3791.7,75,13.33,,,
121+
qwen-7b-chat,INT4-MIXED,1024,8919.5,3763.9,75,13.33,,,
122+
red-pajama-incite-chat-3b-v1,FP16,32,6036.5,107.5,75.9,13.18,,,
123+
llama-2-7b-chat-hf,INT4-MIXED,1024,5716.8,4231.7,76.2,13.12,,,
124+
phi-2,FP16,32,6090.1,115.2,77.1,12.97,,,
125+
stable-zephyr-3b-dpo,FP16,30,6113.1,112.1,78.6,12.72,,,
126+
qwen-7b-chat,INT4-MIXED,1024,9212.9,3857.4,78.6,12.72,,,
127+
stablelm-3b-4e1t,FP16,32,6065.4,110.2,78.7,12.71,,,
128+
opt-2.7b,FP16,937,9733.8,3750.8,78.8,12.69,,,
129+
dolly-v2-3b,FP16,1024,6615.2,2230.9,79.1,12.64,,,
130+
red-pajama-incite-chat-3b-v1,FP16,1020,6588.3,2259.4,80.2,12.47,,,
131+
glm-4-9b,INT4-MIXED,33,6386.2,328,80.4,12.44,,,
132+
red-pajama-incite-chat-3b-v1,FP16,1024,6570.3,2268.7,80.4,12.44,,,
133+
baichuan2-7b-chat,INT4-MIXED,32,5977.9,201.7,81,12.35,,,
134+
glm-4-9b,INT4-MIXED,32,6389.7,248.1,81,12.35,,,
135+
phi-2,FP16,1024,6646.2,2406.7,81.4,12.29,,,
136+
stable-zephyr-3b-dpo,FP16,946,6875.7,1868.2,82.9,12.06,,,
137+
stablelm-3b-4e1t,FP16,1024,6636.1,2036.9,83,12.05,,,
138+
chatglm2-6b,INT8-CW,32,6731.8,159.2,84.4,11.85,,,
139+
glm-4-9b,INT4-MIXED,1025,7061.4,4939.2,85.2,11.74,,,
140+
qwen-7b-chat-gptq,INT4-MIXED,1024,9175.3,3898,85.3,11.72,,,
141+
gemma-7b-it,INT4-MIXED,32,7883.9,230.5,86,11.63,,,
142+
gemma-7b-it,INT4-MIXED,32,8002.6,235,86.1,11.61,,,
143+
glm-4-9b,INT4-MIXED,1024,7064.9,4411.2,86.2,11.60,,,
144+
gpt-j-6b,INT8-CW,32,7009.2,176.8,86.4,11.57,,,
145+
chatglm2-6b,INT8-CW,1024,7050.5,3871.6,86.8,11.52,,,
146+
chatglm3-6b,INT8-CW,32,6755.9,159,86.8,11.52,,,
147+
baichuan2-7b-chat,INT4-MIXED,1024,7033.3,4049,88.8,11.26,,,
148+
chatglm3-6b,INT8-CW,1024,7076.5,3865.9,89.2,11.21,,,
149+
qwen-7b-chat,INT4-MIXED,32,9245.7,176.3,90,11.11,,,
150+
gemma-7b-it,INT4-MIXED,1024,9449.4,4305.8,93.2,10.73,,,
151+
gpt-j-6b,INT8-CW,1024,7672.3,4181.1,93.5,10.70,,,
152+
gemma-7b-it,INT4-MIXED,1024,9330.5,4222.5,93.7,10.67,,,
153+
orca-mini-3b,FP16,32,7416.5,122.3,94.7,10.56,,,
154+
codegen25-7b,INT8-CW,32,7557.6,170.7,98.4,10.16,,,
155+
qwen-7b-chat,INT4-MIXED,1024,10371.1,4271.7,98.9,10.11,,,
156+
llama-2-7b-chat-hf,INT8-CW,32,7390.6,171.6,99.9,10.01,,,

0 commit comments

Comments
 (0)