Skip to content

Commit c305a74

Browse files
committed
tests: benchdnn: add gqa v2 case
1 parent ddca382 commit c305a74

File tree

3 files changed

+375
-0
lines changed

3 files changed

+375
-0
lines changed

tests/benchdnn/inputs/graph/complex_fusion/harness_mha_all

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-simplified-f16.json
1313
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-wo-scale-f16-bs1.json
1414
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/GQA-fp16.json
15+
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/GQA-fp16-v2.json
1516
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-wo-mask-f16.json
1617
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-scale-by-mul-f16.json
1718
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-implicit-causal-mask-fp32-bs1.json

tests/benchdnn/inputs/graph/complex_fusion/harness_mha_ci

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-simplified-f16.json
1111
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-wo-scale-f16-bs1.json
1212
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/GQA-fp16.json
13+
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/GQA-fp16-v2.json
1314
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-wo-mask-f16.json
1415
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-scale-by-mul-f16.json
1516
--reset --dt=f32,bf16,f16 --case=complex_fusion/mha/sdpa-plain-implicit-causal-mask-fp32-bs1.json
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,373 @@
1+
{
2+
"version": "3.8.0",
3+
"engine_kind": "gpu",
4+
"fpmath_mode": "strict",
5+
"fpmath_mode_apply_to_int": "false",
6+
"input_ports": [
7+
1,
8+
3,
9+
8,
10+
11,
11+
19
12+
],
13+
"output_ports": [
14+
20
15+
],
16+
"graph": [
17+
{
18+
"id": 7,
19+
"name": "bmm1",
20+
"kind": "MatMul",
21+
"attrs": {
22+
"transpose_a": {
23+
"type": "bool",
24+
"value": 0
25+
},
26+
"transpose_b": {
27+
"type": "bool",
28+
"value": 1
29+
}
30+
},
31+
"inputs": [
32+
{
33+
"id": 1,
34+
"dtype": "f16",
35+
"shape": [
36+
32,
37+
2,
38+
8,
39+
384,
40+
64
41+
],
42+
"stride": [
43+
393216,
44+
196608,
45+
24576,
46+
64,
47+
1
48+
],
49+
"layout_type": "strided",
50+
"property_type": "undef"
51+
},
52+
{
53+
"id": 3,
54+
"dtype": "f16",
55+
"shape": [
56+
32,
57+
2,
58+
1,
59+
384,
60+
64
61+
],
62+
"stride": [
63+
49152,
64+
24576,
65+
24576,
66+
64,
67+
1
68+
],
69+
"layout_type": "strided",
70+
"property_type": "undef"
71+
}
72+
],
73+
"outputs": [
74+
{
75+
"id": 4,
76+
"dtype": "f16",
77+
"shape": [
78+
32,
79+
2,
80+
8,
81+
384,
82+
384
83+
],
84+
"stride": [
85+
2359296,
86+
1179648,
87+
147456,
88+
384,
89+
1
90+
],
91+
"layout_type": "strided",
92+
"property_type": "undef"
93+
}
94+
]
95+
},
96+
{
97+
"id": 10,
98+
"name": "scale_div",
99+
"kind": "Divide",
100+
"attrs": {
101+
"auto_broadcast": {
102+
"type": "string",
103+
"value": "numpy"
104+
}
105+
},
106+
"inputs": [
107+
{
108+
"id": 4,
109+
"dtype": "f16",
110+
"shape": [
111+
32,
112+
2,
113+
8,
114+
384,
115+
384
116+
],
117+
"stride": [
118+
2359296,
119+
1179648,
120+
147456,
121+
384,
122+
1
123+
],
124+
"layout_type": "strided",
125+
"property_type": "undef"
126+
},
127+
{
128+
"id": 8,
129+
"dtype": "f16",
130+
"shape": [
131+
1
132+
],
133+
"stride": [
134+
1
135+
],
136+
"layout_type": "strided",
137+
"property_type": "undef"
138+
}
139+
],
140+
"outputs": [
141+
{
142+
"id": 9,
143+
"dtype": "f16",
144+
"shape": [
145+
32,
146+
2,
147+
8,
148+
384,
149+
384
150+
],
151+
"stride": [
152+
2359296,
153+
1179648,
154+
147456,
155+
384,
156+
1
157+
],
158+
"layout_type": "strided",
159+
"property_type": "undef"
160+
}
161+
]
162+
},
163+
{
164+
"id": 15,
165+
"name": "mask_add",
166+
"kind": "Add",
167+
"attrs": {
168+
"auto_broadcast": {
169+
"type": "string",
170+
"value": "numpy"
171+
}
172+
},
173+
"inputs": [
174+
{
175+
"id": 9,
176+
"dtype": "f16",
177+
"shape": [
178+
32,
179+
2,
180+
8,
181+
384,
182+
384
183+
],
184+
"stride": [
185+
2359296,
186+
1179648,
187+
147456,
188+
384,
189+
1
190+
],
191+
"layout_type": "strided",
192+
"property_type": "undef"
193+
},
194+
{
195+
"id": 11,
196+
"dtype": "f16",
197+
"shape": [
198+
32,
199+
1,
200+
1,
201+
1,
202+
384
203+
],
204+
"stride": [
205+
384,
206+
384,
207+
384,
208+
384,
209+
1
210+
],
211+
"layout_type": "strided",
212+
"property_type": "undef"
213+
}
214+
],
215+
"outputs": [
216+
{
217+
"id": 14,
218+
"dtype": "f16",
219+
"shape": [
220+
32,
221+
2,
222+
8,
223+
384,
224+
384
225+
],
226+
"stride": [
227+
2359296,
228+
1179648,
229+
147456,
230+
384,
231+
1
232+
],
233+
"layout_type": "strided",
234+
"property_type": "undef"
235+
}
236+
]
237+
},
238+
{
239+
"id": 17,
240+
"name": "softmax",
241+
"kind": "SoftMax",
242+
"attrs": {
243+
"axis": {
244+
"type": "s64",
245+
"value": -1
246+
}
247+
},
248+
"inputs": [
249+
{
250+
"id": 14,
251+
"dtype": "f16",
252+
"shape": [
253+
32,
254+
2,
255+
8,
256+
384,
257+
384
258+
],
259+
"stride": [
260+
2359296,
261+
1179648,
262+
147456,
263+
384,
264+
1
265+
],
266+
"layout_type": "strided",
267+
"property_type": "undef"
268+
}
269+
],
270+
"outputs": [
271+
{
272+
"id": 16,
273+
"dtype": "f16",
274+
"shape": [
275+
32,
276+
2,
277+
8,
278+
384,
279+
384
280+
],
281+
"stride": [
282+
2359296,
283+
1179648,
284+
147456,
285+
384,
286+
1
287+
],
288+
"layout_type": "strided",
289+
"property_type": "undef"
290+
}
291+
]
292+
},
293+
{
294+
"id": 22,
295+
"name": "bmm2",
296+
"kind": "MatMul",
297+
"attrs": {
298+
"transpose_a": {
299+
"type": "bool",
300+
"value": 0
301+
},
302+
"transpose_b": {
303+
"type": "bool",
304+
"value": 0
305+
}
306+
},
307+
"inputs": [
308+
{
309+
"id": 16,
310+
"dtype": "f16",
311+
"shape": [
312+
32,
313+
2,
314+
8,
315+
384,
316+
384
317+
],
318+
"stride": [
319+
2359296,
320+
1179648,
321+
147456,
322+
384,
323+
1
324+
],
325+
"layout_type": "strided",
326+
"property_type": "undef"
327+
},
328+
{
329+
"id": 19,
330+
"dtype": "f16",
331+
"shape": [
332+
32,
333+
2,
334+
1,
335+
384,
336+
64
337+
],
338+
"stride": [
339+
49152,
340+
24576,
341+
24576,
342+
64,
343+
1
344+
],
345+
"layout_type": "strided",
346+
"property_type": "undef"
347+
}
348+
],
349+
"outputs": [
350+
{
351+
"id": 20,
352+
"dtype": "f16",
353+
"shape": [
354+
32,
355+
2,
356+
8,
357+
384,
358+
64
359+
],
360+
"stride": [
361+
393216,
362+
196608,
363+
24576,
364+
64,
365+
1
366+
],
367+
"layout_type": "strided",
368+
"property_type": "undef"
369+
}
370+
]
371+
}
372+
]
373+
}

0 commit comments

Comments
 (0)