Skip to content

Commit d655009

Browse files
xiang1guovpirogov
authored andcommitted
benchdnn: inputs: graph: add sdpa case w/o trailing transpose/reshape
1 parent 8898a6f commit d655009

File tree

4 files changed

+558
-0
lines changed

4 files changed

+558
-0
lines changed

tests/benchdnn/inputs/graph/complex_fusion/harness_mha_all

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
--reset --case=complex_fusion/mha/MHA-distill_bert-inf-bf16-bs1.json
1616
--reset --case=complex_fusion/mha/MHA-distill_bert-inf-fp32-bs1.json
1717
--reset --case=complex_fusion/mha/MHA-distill_bert-inf-int8-bs1.json
18+
--reset --case=complex_fusion/mha/MHA-stable_diffusion-inf-bf16-bs1.json
19+
--reset --case=complex_fusion/mha/MHA-stable_diffusion-inf-fp32-bs1.json
1820
--reset --case=complex_fusion/mha/MHA-starcoder-inf-bf16-bs1.json
1921
--reset --case=complex_fusion/mha/MHA-starcoder-inf-fp32-bs1.json
2022
--reset --case=complex_fusion/mha/MHA-starcoder-inf-int8-bs1.json
@@ -35,6 +37,8 @@
3537
--reset --in-shapes=4:56x12x128x64+5:56x12x64x128+0:56x12x128x64+1:56x1x1x128 --case=complex_fusion/mha/MHA-distill_bert-inf-fp32-bs1.json
3638
--reset --in-shapes=4:56x12x128x64+5:56x12x64x128+0:56x12x128x64+1:56x1x1x128 --case=complex_fusion/mha/MHA-distill_bert-inf-bf16-bs1.json
3739
--reset --in-shapes=5:56x12x128x64+4:56x12x64x128+0:56x12x128x64+1:56x1x1x128 --case=complex_fusion/mha/MHA-distill_bert-inf-int8-bs1.json
40+
--reset --in-shapes=0:56x8x1024x80+1:56x8x77x80+2:56x8x77x80 --case=complex_fusion/mha/MHA-stable_diffusion-inf-bf16-bs1.json
41+
--reset --in-shapes=0:56x8x1024x80+1:56x8x77x80+2:56x8x77x80 --case=complex_fusion/mha/MHA-stable_diffusion-inf-fp32-bs1.json
3842
--reset --in-shapes=5:20x117x48x128+6:20x1x128x117+19:20x1x117x128 --case=complex_fusion/mha/MHA-starcoder-inf-fp32-bs1.json
3943
--reset --in-shapes=5:20x117x48x128+6:20x1x128x117+19:20x1x117x128 --case=complex_fusion/mha/MHA-starcoder-inf-bf16-bs1.json
4044
--reset --in-shapes=4:20x117x48x128+3:20x1x128x117+0:20x1x117x128 --case=complex_fusion/mha/MHA-starcoder-inf-int8-bs1.json

tests/benchdnn/inputs/graph/complex_fusion/harness_mha_ci

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
--reset --case=complex_fusion/mha/MHA-bert_large-inf-bf16-bs1.json
88
--reset --case=complex_fusion/mha/MHA-bert_large-inf-fp32-bs1.json
99
--reset --case=complex_fusion/mha/MHA-bert_large-inf-int8-bs1.json
10+
--reset --case=complex_fusion/mha/MHA-stable_diffusion-inf-bf16-bs1.json
11+
--reset --case=complex_fusion/mha/MHA-stable_diffusion-inf-fp32-bs1.json
1012
--reset --case=complex_fusion/mha/MHA-distill_bert-inf-bf16-bs1.json
1113
--reset --case=complex_fusion/mha/MHA-distill_bert-inf-fp32-bs1.json
1214
--reset --case=complex_fusion/mha/MHA-distill_bert-inf-int8-bs1.json
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
{
2+
"version": "3.6.0",
3+
"engine_kind": "gpu",
4+
"fpmath_mode": "strict",
5+
"input_ports": [
6+
0,
7+
1,
8+
3,
9+
2
10+
],
11+
"output_ports": [
12+
8
13+
],
14+
"graph": [
15+
{
16+
"id": 0,
17+
"name": "matmul_qk",
18+
"kind": "MatMul",
19+
"attrs": {
20+
"transpose_a": {
21+
"type": "bool",
22+
"value": 0
23+
},
24+
"transpose_b": {
25+
"type": "bool",
26+
"value": 1
27+
}
28+
},
29+
"inputs": [
30+
{
31+
"id": 0,
32+
"dtype": "bf16",
33+
"shape": [
34+
2,
35+
8,
36+
1024,
37+
80
38+
],
39+
"stride": [
40+
655360,
41+
81920,
42+
80,
43+
1
44+
],
45+
"layout_type": "strided",
46+
"property_type": "undef"
47+
},
48+
{
49+
"id": 1,
50+
"dtype": "bf16",
51+
"shape": [
52+
2,
53+
8,
54+
77,
55+
80
56+
],
57+
"stride": [
58+
49280,
59+
6160,
60+
80,
61+
1
62+
],
63+
"layout_type": "strided",
64+
"property_type": "undef"
65+
}
66+
],
67+
"outputs": [
68+
{
69+
"id": 5,
70+
"dtype": "bf16",
71+
"shape": [
72+
2,
73+
8,
74+
1024,
75+
77
76+
],
77+
"stride": [
78+
630784,
79+
78848,
80+
77,
81+
1
82+
],
83+
"layout_type": "strided",
84+
"property_type": "undef"
85+
}
86+
]
87+
},
88+
{
89+
"id": 1,
90+
"name": "scale_div",
91+
"kind": "Multiply",
92+
"attrs": {
93+
"auto_broadcast": {
94+
"type": "string",
95+
"value": "numpy"
96+
}
97+
},
98+
"inputs": [
99+
{
100+
"id": 5,
101+
"dtype": "bf16",
102+
"shape": [
103+
2,
104+
8,
105+
1024,
106+
77
107+
],
108+
"stride": [
109+
630784,
110+
78848,
111+
77,
112+
1
113+
],
114+
"layout_type": "strided",
115+
"property_type": "undef"
116+
},
117+
{
118+
"id": 3,
119+
"dtype": "bf16",
120+
"shape": [
121+
1
122+
],
123+
"stride": [
124+
1
125+
],
126+
"layout_type": "strided",
127+
"property_type": "constant"
128+
}
129+
],
130+
"outputs": [
131+
{
132+
"id": 6,
133+
"dtype": "bf16",
134+
"shape": [
135+
2,
136+
8,
137+
1024,
138+
77
139+
],
140+
"stride": [
141+
630784,
142+
78848,
143+
77,
144+
1
145+
],
146+
"layout_type": "strided",
147+
"property_type": "undef"
148+
}
149+
]
150+
},
151+
{
152+
"id": 3,
153+
"name": "softmax",
154+
"kind": "SoftMax",
155+
"attrs": {
156+
"axis": {
157+
"type": "s64",
158+
"value": -1
159+
}
160+
},
161+
"inputs": [
162+
{
163+
"id": 6,
164+
"dtype": "bf16",
165+
"shape": [
166+
2,
167+
8,
168+
1024,
169+
77
170+
],
171+
"stride": [
172+
630784,
173+
78848,
174+
77,
175+
1
176+
],
177+
"layout_type": "strided",
178+
"property_type": "undef"
179+
}
180+
],
181+
"outputs": [
182+
{
183+
"id": 7,
184+
"dtype": "bf16",
185+
"shape": [
186+
2,
187+
8,
188+
1024,
189+
77
190+
],
191+
"stride": [
192+
630784,
193+
78848,
194+
77,
195+
1
196+
],
197+
"layout_type": "strided",
198+
"property_type": "undef"
199+
}
200+
]
201+
},
202+
{
203+
"id": 4,
204+
"name": "matmul_v",
205+
"kind": "MatMul",
206+
"attrs": {
207+
"transpose_a": {
208+
"type": "bool",
209+
"value": 0
210+
},
211+
"transpose_b": {
212+
"type": "bool",
213+
"value": 0
214+
}
215+
},
216+
"inputs": [
217+
{
218+
"id": 7,
219+
"dtype": "bf16",
220+
"shape": [
221+
2,
222+
8,
223+
1024,
224+
77
225+
],
226+
"stride": [
227+
630784,
228+
78848,
229+
77,
230+
1
231+
],
232+
"layout_type": "strided",
233+
"property_type": "undef"
234+
},
235+
{
236+
"id": 2,
237+
"dtype": "bf16",
238+
"shape": [
239+
2,
240+
8,
241+
77,
242+
80
243+
],
244+
"stride": [
245+
49280,
246+
6160,
247+
80,
248+
1
249+
],
250+
"layout_type": "strided",
251+
"property_type": "undef"
252+
}
253+
],
254+
"outputs": [
255+
{
256+
"id": 8,
257+
"dtype": "bf16",
258+
"shape": [
259+
2,
260+
8,
261+
1024,
262+
80
263+
],
264+
"stride": [
265+
655360,
266+
81920,
267+
80,
268+
1
269+
],
270+
"layout_type": "strided",
271+
"property_type": "undef"
272+
}
273+
]
274+
}
275+
]
276+
}

0 commit comments

Comments
 (0)