5
5
// are passed down.
6
6
7
7
#ifdef SLANG_CUDA_ENABLE_HALF
8
+ // We don't want half2 operators, because it will implement comparison operators that return a bool(!). We want to generate
9
+ // those functions. Doing so means that we will have to define all the other half2 operators.
10
+ # define __CUDA_NO_HALF2_OPERATORS__
8
11
# include < cuda_fp16.h>
9
12
#endif
10
13
@@ -155,6 +158,7 @@ union Union64
155
158
struct __half3 { __half2 xy; __half z; };
156
159
struct __half4 { __half2 xy; __half2 zw; };
157
160
161
+ // *** convert ***
158
162
159
163
// half -> other
160
164
@@ -196,7 +200,43 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 convert___half2(const double2& v) { r
196
200
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 convert___half3 (const double3& v) { return __half3{ __float22half2_rn (float2{v.x , v.y }), __float2half_rn (v.z ) }; }
197
201
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 convert___half4 (const double4& v) { return __half4{ __float22half2_rn (float2{v.x , v.y }), __float22half2_rn (float2{v.z , v.w }) }; }
198
202
199
- // half2
203
+ // *** make ***
204
+
205
+ // Mechanism to make half vectors
206
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 make___half2 (__half x, __half y) { return __halves2half2 (x, y); }
207
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 make___half3 (__half x, __half y, __half z) { return __half3{ __halves2half2 (x, y), z }; }
208
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 make___half4 (__half x, __half y, __half z, __half w) { return __half4{ __halves2half2 (x, y), __halves2half2 (z, w)}; }
209
+
210
+ // *** constructFromScalar ***
211
+
212
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 constructFromScalar___half2 (half x) { return __half2half2 (x); }
213
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 constructFromScalar___half3 (half x) { return __half3{__half2half2 (x), x}; }
214
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 constructFromScalar___half4 (half x) { const __half2 v = __half2half2 (x); return __half4{v, v}; }
215
+
216
+ // *** half2 ***
217
+
218
+ // half2 maths ops
219
+
220
+ // NOTE! That by default these are in cuda_fp16.hpp, but we disable them, because we need to define the comparison operators
221
+ // as we need versions that will return vector<bool>
222
+
223
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator +(const __half2& lh, const __half2& rh) { return __hadd2 (lh, rh); }
224
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator -(const __half2& lh, const __half2& rh) { return __hsub2 (lh, rh); }
225
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator *(const __half2& lh, const __half2& rh) { return __hmul2 (lh, rh); }
226
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator /(const __half2& lh, const __half2& rh) { return __h2div (lh, rh); }
227
+
228
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2& operator +=(__half2& lh, const __half2& rh) { lh = __hadd2 (lh, rh); return lh; }
229
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2& operator -=(__half2& lh, const __half2& rh) { lh = __hsub2 (lh, rh); return lh; }
230
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2& operator *=(__half2& lh, const __half2& rh) { lh = __hmul2 (lh, rh); return lh; }
231
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2& operator /=(__half2& lh, const __half2& rh) { lh = __h2div (lh, rh); return lh; }
232
+
233
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 &operator ++(__half2 &h) { __half2_raw one; one.x = 0x3C00 ; one.y = 0x3C00 ; h = __hadd2 (h, one); return h; }
234
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 &operator --(__half2 &h) { __half2_raw one; one.x = 0x3C00 ; one.y = 0x3C00 ; h = __hsub2 (h, one); return h; }
235
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator ++(__half2 &h, int ) { __half2 ret = h; __half2_raw one; one.x = 0x3C00 ; one.y = 0x3C00 ; h = __hadd2 (h, one); return ret; }
236
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator --(__half2 &h, int ) { __half2 ret = h; __half2_raw one; one.x = 0x3C00 ; one.y = 0x3C00 ; h = __hsub2 (h, one); return ret; }
237
+
238
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator +(const __half2 &h) { return h; }
239
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator -(const __half2 &h) { return __hneg2 (h); }
200
240
201
241
// vec op scalar
202
242
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator +(const __half2& lh, __half rh) { return __hadd2 (lh, __half2half2 (rh)); }
@@ -210,16 +250,7 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator-(__half lh, const __half2& r
210
250
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator *(__half lh, const __half2& rh) { return __hmul2 (__half2half2 (lh), rh); }
211
251
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 operator /(__half lh, const __half2& rh) { return __h2div (__half2half2 (lh), rh); }
212
252
213
- // Mechanism to make half vectors
214
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 make___half2 (__half x, __half y) { return __halves2half2 (x, y); }
215
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 make___half3 (__half x, __half y, __half z) { return __half3{ __halves2half2 (x, y), z }; }
216
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 make___half4 (__half x, __half y, __half z, __half w) { return __half4{ __halves2half2 (x, y), __halves2half2 (z, w)}; }
217
-
218
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 constructFromScalar___half2 (half x) { return __half2half2 (x); }
219
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 constructFromScalar___half3 (half x) { return __half3{__half2half2 (x), x}; }
220
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 constructFromScalar___half4 (half x) { const __half2 v = __half2half2 (x); return __half4{v, v}; }
221
-
222
- // Half3 maths ops
253
+ // *** half3 ***
223
254
224
255
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator +(const __half3& lh, const __half3& rh) { return __half3{__hadd2 (lh.xy , rh.xy ), __hadd (lh.z , rh.z )}; }
225
256
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator -(const __half3& lh, const __half3& rh) { return __half3{__hsub2 (lh.xy , rh.xy ), __hsub (lh.z , rh.z )}; }
@@ -241,18 +272,7 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator-(__half lh, const __half3& r
241
272
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator *(__half lh, const __half3& rh) { return __half3{__hmul2 (__half2half2 (lh), rh.xy ), __hmul (lh, rh.z )}; }
242
273
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator /(__half lh, const __half3& rh) { return __half3{__h2div (__half2half2 (lh), rh.xy ), __hdiv (lh, rh.z )}; }
243
274
244
-
245
- #if 0
246
- // We need to return the vector<bool> type
247
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator==(const __half3& lh, const __half3& rh) { return __hbeq2(lh.xy, rh.xy) && __heq(lh.z, rh.z); }
248
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator!=(const __half3& lh, const __half3& rh) { return __hbneu2(lh.xy, rh.xy) && __hneu(lh.z, rh.z); }
249
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator>(const __half3& lh, const __half3& rh) { return __hbgt2(lh.xy, rh.xy) && __hgt(lh.z, rh.z); }
250
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator<(const __half3& lh, const __half3& rh) { return __hblt2(lh.xy, rh.xy) && __hlt(lh.z, rh.z); }
251
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator>=(const __half3& lh, const __half3& rh) { return __hbge2(lh.xy, rh.xy) && __hge(lh.z, rh.z); }
252
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator<=(const __half3& lh, const __half3& rh) { return __hble2(lh.xy, rh.xy) && __hle(lh.z, rh.z); }
253
- #endif
254
-
255
- // Half4 maths ops
275
+ // *** half4 ***
256
276
257
277
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator +(const __half4& lh, const __half4& rh) { return __half4{__hadd2 (lh.xy , rh.xy ), __hadd2 (lh.zw , rh.zw )}; }
258
278
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator -(const __half4& lh, const __half4& rh) { return __half4{__hsub2 (lh.xy , rh.xy ), __hsub2 (lh.zw , rh.zw )}; }
@@ -274,28 +294,6 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator/(__half lh, const __half4& r
274
294
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator -(const __half4& h) { return __half4{__hneg2 (h.xy ), __hneg2 (h.zw )}; }
275
295
SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator +(const __half4& h) { return h; }
276
296
277
- #if 0
278
- // We need to return vector<bool> type
279
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator==(const __half4& lh, const __half4& rh) { return __hbeq2(lh.xy, rh.xy) && __hbeq2(lh.zw, rh.zw); }
280
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator!=(const __half4& lh, const __half4& rh) { return __hbneu2(lh.xy, rh.xy) && __hbneu2(lh.zw, rh.zw); }
281
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator>(const __half4& lh, const __half4& rh) { return __hbgt2(lh.xy, rh.xy) && __hbgt2(lh.zw, rh.zw); }
282
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator<(const __half4& lh, const __half4& rh) { return __hblt2(lh.xy, rh.xy) && __hblt2(lh.zw, rh.zw); }
283
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator>=(const __half4& lh, const __half4& rh) { return __hbge2(lh.xy, rh.xy) && __hbge2(lh.zw, rh.zw); }
284
- SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator<=(const __half4& lh, const __half4& rh) { return __hble2(lh.xy, rh.xy) && __hble2(lh.zw, rh.zw); }
285
- #endif
286
-
287
- // Use the round nearest as the default - it is the only one defined
288
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 __float22half2 (const float2 a) { return __float22half2_rn (a); }
289
-
290
- // Implement the vector versions
291
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 __float2half (float2 a) { return __float22half2 (a); }
292
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 __float2half (float3 a) { __half3 o; o.xy = __float22half2 (make_float2 (a.x , a.y )); o.z = __float2half (a.z ); return o; }
293
- SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 __float2half (float4 a) { __half4 o; o.xy = __float22half2 (make_float2 (a.x , a.y )); o.zw = __float22half2 (make_float2 (a.z , a.w )); return o; }
294
-
295
- SLANG_FORCE_INLINE SLANG_CUDA_CALL float2 __half2float (__half2 a) { return __half22float2 (a); }
296
- SLANG_FORCE_INLINE SLANG_CUDA_CALL float3 __half2float (__half3 a) { float2 xy = __half22float2 (a.xy ); float z = __half2float (a.z ); return make_float3 (xy.x , xy.y , z); }
297
- SLANG_FORCE_INLINE SLANG_CUDA_CALL float4 __half2float (__half4 a) { float2 xy = __half22float2 (a.xy ); float2 zw = __half22float2 (a.zw ); return make_float4 (xy.x , xy.y , zw.x , zw.y ); }
298
-
299
297
#endif
300
298
301
299
// ----------------------------- F32 -----------------------------------------
0 commit comments