Add support for bfloat16 and float8 tensors

einarhov-arm · web-flow · commit 5ed4003a18a1 · 2026-04-23T22:52:34.000-04:00
Add the following extensions:

GL_ARM_tensors_bfloat16
GL_ARM_tensors_float_e5m2
GL_ARM_tensors_float_e4m3

Also add GL_ARM_tensors extension definitions to preamble
diff --git a/Test/baseResults/spv.tensorARM.all_accesses.comp.out b/Test/baseResults/spv.tensorARM.all_accesses.comp.out
@@ -1,7 +1,7 @@
 spv.tensorARM.all_accesses.comp
 // Module Version 10000
 // Generated by (magic number): 8000b
-// Id's are bound by 199
+// Id's are bound by 276
 
                               Capability Shader
                               Capability Float16
@@ -10,13 +10,23 @@ spv.tensorARM.all_accesses.comp
                               Capability Int16
                               Capability Int8
                               Capability TensorsARM
+                              Capability Float8EXT
+                              Capability BFloat16TypeKHR
                               Extension  "SPV_ARM_tensors"
+                              Extension  "SPV_EXT_float8"
+                              Extension  "SPV_KHR_bfloat16"
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
                               EntryPoint GLCompute 4  "main"
                               ExecutionMode 4 LocalSize 1 1 1
                               Source GLSL 460
                               SourceExtension  "GL_ARM_tensors"
+                              SourceExtension  "GL_ARM_tensors_bfloat16"
+                              SourceExtension  "GL_ARM_tensors_float_e4m3"
+                              SourceExtension  "GL_ARM_tensors_float_e5m2"
+                              SourceExtension  "GL_EXT_bfloat16"
+                              SourceExtension  "GL_EXT_float_e4m3"
+                              SourceExtension  "GL_EXT_float_e5m2"
                               SourceExtension  "GL_EXT_shader_explicit_arithmetic_types"
                               Name 4  "main"
                               Name 10  "coords"
@@ -59,6 +69,22 @@ spv.tensorARM.all_accesses.comp
                               Name 189  "fr"
                               Name 193  "fr64"
                               Name 197  "frv"
+                              Name 201  "bf16w"
+                              Name 205  "fe5m2w"
+                              Name 209  "fe4m3w"
+                              Name 213  "bft16"
+                              Name 219  "fte5m2"
+                              Name 225  "fte4m3"
+                              Name 231  "bf16wv"
+                              Name 238  "fe5m2wv"
+                              Name 243  "fe4m3one"
+                              Name 246  "fe4m3wv"
+                              Name 254  "bf16r"
+                              Name 258  "fe5m2r"
+                              Name 262  "fe4m3r"
+                              Name 266  "bf16rv"
+                              Name 270  "fe5m2rv"
+                              Name 274  "fe4m3rv"
                               Decorate 31(it8) Binding 0
                               Decorate 31(it8) DescriptorSet 0
                               Decorate 37(it16) Binding 1
@@ -81,6 +107,12 @@ spv.tensorARM.all_accesses.comp
                               Decorate 166(ft32) DescriptorSet 0
                               Decorate 172(ft64) Binding 10
                               Decorate 172(ft64) DescriptorSet 0
+                              Decorate 213(bft16) Binding 11
+                              Decorate 213(bft16) DescriptorSet 0
+                              Decorate 219(fte5m2) Binding 12
+                              Decorate 219(fte5m2) DescriptorSet 0
+                              Decorate 225(fte4m3) Binding 13
+                              Decorate 225(fte4m3) DescriptorSet 0
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeInt 32 0
@@ -163,6 +195,32 @@ spv.tensorARM.all_accesses.comp
              176:             TypeArray 146(float) 53
              177:             TypePointer Function 176
              179:         176 ConstantComposite 149 149 149 149
+             199:             TypeFloat 16 0
+             200:             TypePointer Function 199(bfloat16_t)
+             202:199(bfloat16_t) Constant 16256
+             203:             TypeFloat 8 4215
+             204:             TypePointer Function 203(floate5m2_t)
+             206:203(floate5m2_t) Constant 60
+             207:             TypeFloat 8 4214
+             208:             TypePointer Function 207(floate4m3_t)
+             210:207(floate4m3_t) Constant 56
+             211:             TypeTensorARM 199(bfloat16_t) 7
+             212:             TypePointer UniformConstant 211
+      213(bft16):    212(ptr) Variable UniformConstant
+             217:             TypeTensorARM 203(floate5m2_t) 7
+             218:             TypePointer UniformConstant 217
+     219(fte5m2):    218(ptr) Variable UniformConstant
+             223:             TypeTensorARM 207(floate4m3_t) 7
+             224:             TypePointer UniformConstant 223
+     225(fte4m3):    224(ptr) Variable UniformConstant
+             229:             TypeArray 199(bfloat16_t) 53
+             230:             TypePointer Function 229
+             232:         229 ConstantComposite 202 202 202 202
+             236:             TypeArray 203(floate5m2_t) 53
+             237:             TypePointer Function 236
+             239:         236 ConstantComposite 206 206 206 206
+             244:             TypeArray 207(floate4m3_t) 53
+             245:             TypePointer Function 244
          4(main):           2 Function None 3
                5:             Label
       10(coords):      9(ptr) Variable Function
@@ -194,6 +252,19 @@ spv.tensorARM.all_accesses.comp
          189(fr):    147(ptr) Variable Function
        193(fr64):    155(ptr) Variable Function
         197(frv):    177(ptr) Variable Function
+      201(bf16w):    200(ptr) Variable Function
+     205(fe5m2w):    204(ptr) Variable Function
+     209(fe4m3w):    208(ptr) Variable Function
+     231(bf16wv):    230(ptr) Variable Function
+    238(fe5m2wv):    237(ptr) Variable Function
+   243(fe4m3one):    208(ptr) Variable Function
+    246(fe4m3wv):    245(ptr) Variable Function
+      254(bf16r):    200(ptr) Variable Function
+     258(fe5m2r):    204(ptr) Variable Function
+     262(fe4m3r):    208(ptr) Variable Function
+     266(bf16rv):    230(ptr) Variable Function
+    270(fe5m2rv):    237(ptr) Variable Function
+    274(fe4m3rv):    245(ptr) Variable Function
                               Store 10(coords) 12
                               Store 15(iw8) 16
                               Store 19(iw16) 20
@@ -321,5 +392,62 @@ spv.tensorARM.all_accesses.comp
              196:           8 Load 10(coords)
              198:         176 TensorReadARM 195 196
                               Store 197(frv) 198
+                              Store 201(bf16w) 202
+                              Store 205(fe5m2w) 206
+                              Store 209(fe4m3w) 210
+             214:         211 Load 213(bft16)
+             215:           8 Load 10(coords)
+             216:199(bfloat16_t) Load 201(bf16w)
+                              TensorWriteARM 214 215 216
+             220:         217 Load 219(fte5m2)
+             221:           8 Load 10(coords)
+             222:203(floate5m2_t) Load 205(fe5m2w)
+                              TensorWriteARM 220 221 222
+             226:         223 Load 225(fte4m3)
+             227:           8 Load 10(coords)
+             228:207(floate4m3_t) Load 209(fe4m3w)
+                              TensorWriteARM 226 227 228
+                              Store 231(bf16wv) 232
+             233:         211 Load 213(bft16)
+             234:           8 Load 10(coords)
+             235:         229 Load 231(bf16wv)
+                              TensorWriteARM 233 234 235
+                              Store 238(fe5m2wv) 239
+             240:         217 Load 219(fte5m2)
+             241:           8 Load 10(coords)
+             242:         236 Load 238(fe5m2wv)
+                              TensorWriteARM 240 241 242
+                              Store 243(fe4m3one) 210
+             247:207(floate4m3_t) Load 243(fe4m3one)
+             248:         244 CompositeConstruct 247 247 247 247
+                              Store 246(fe4m3wv) 248
+             249:         223 Load 225(fte4m3)
+             250:           8 Load 10(coords)
+             251:         244 Load 246(fe4m3wv)
+                              TensorWriteARM 249 250 251
+             252:         211 Load 213(bft16)
+             253:           8 Load 10(coords)
+             255:199(bfloat16_t) TensorReadARM 252 253
+                              Store 254(bf16r) 255
+             256:         217 Load 219(fte5m2)
+             257:           8 Load 10(coords)
+             259:203(floate5m2_t) TensorReadARM 256 257
+                              Store 258(fe5m2r) 259
+             260:         223 Load 225(fte4m3)
+             261:           8 Load 10(coords)
+             263:207(floate4m3_t) TensorReadARM 260 261
+                              Store 262(fe4m3r) 263
+             264:         211 Load 213(bft16)
+             265:           8 Load 10(coords)
+             267:         229 TensorReadARM 264 265
+                              Store 266(bf16rv) 267
+             268:         217 Load 219(fte5m2)
+             269:           8 Load 10(coords)
+             271:         236 TensorReadARM 268 269
+                              Store 270(fe5m2rv) 271
+             272:         223 Load 225(fte4m3)
+             273:           8 Load 10(coords)
+             275:         244 TensorReadARM 272 273
+                              Store 274(fe4m3rv) 275
                               Return
                               FunctionEnd
diff --git a/Test/baseResults/spv.tensorARM.unrequested_extension_types.comp.out b/Test/baseResults/spv.tensorARM.unrequested_extension_types.comp.out
@@ -0,0 +1,8 @@
+spv.tensorARM.unrequested_extension_types.comp
+ERROR: 0:8: 'tensor with bfloat16_t type' : required extension not requested: GL_ARM_tensors_bfloat16
+ERROR: 0:9: 'tensor with floate5m2_t type' : required extension not requested: GL_ARM_tensors_float_e5m2
+ERROR: 0:10: 'tensor with floate4m3_t type' : required extension not requested: GL_ARM_tensors_float_e4m3
+ERROR: 3 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/spv.tensorARM.all_accesses.comp b/Test/spv.tensorARM.all_accesses.comp
@@ -2,6 +2,12 @@
 
 #extension GL_ARM_tensors : enable
 #extension GL_EXT_shader_explicit_arithmetic_types : enable
+#extension GL_EXT_bfloat16 : enable
+#extension GL_EXT_float_e5m2 : enable
+#extension GL_EXT_float_e4m3 : enable
+#extension GL_ARM_tensors_bfloat16 : enable
+#extension GL_ARM_tensors_float_e5m2 : enable
+#extension GL_ARM_tensors_float_e4m3 : enable
 
 // Int types
 uniform tensorARM<int8_t, 1> it8;
@@ -20,6 +26,11 @@ uniform tensorARM<float16_t, 1> ft16;
 uniform tensorARM<float, 1> ft32;
 uniform tensorARM<float64_t, 1> ft64;
 
+// bfloat16, e5m2 and e4m3 types
+uniform tensorARM<bfloat16_t, 1> bft16;
+uniform tensorARM<floate5m2_t, 1> fte5m2;
+uniform tensorARM<floate4m3_t, 1> fte4m3;
+
 void main() {
     uint coords[] = {0};
 
@@ -87,4 +98,38 @@ void main() {
     tensorReadARM(ft64, coords, fr64);
     float frv[4];
     tensorReadARM(ft32, coords, frv);
+
+    // bfloat16, e5m2 and e4m3 types
+    bfloat16_t bf16w = bfloat16_t(1.0);
+    floate5m2_t fe5m2w = floate5m2_t(1.0);
+    floate4m3_t fe4m3w = floate4m3_t(1.0);
+    tensorWriteARM(bft16, coords, bf16w);
+    tensorWriteARM(fte5m2, coords, fe5m2w);
+    tensorWriteARM(fte4m3, coords, fe4m3w);
+
+    const bfloat16_t bf16one = bfloat16_t(1.0);
+    bfloat16_t bf16wv[4] = bfloat16_t[](bf16one, bf16one, bf16one, bf16one);
+    tensorWriteARM(bft16, coords, bf16wv);
+
+    const floate5m2_t fe5m2one = floate5m2_t(1.0);
+    floate5m2_t fe5m2wv[4] = floate5m2_t[](fe5m2one, fe5m2one, fe5m2one, fe5m2one);
+    tensorWriteARM(fte5m2, coords, fe5m2wv);
+
+    floate4m3_t fe4m3one = floate4m3_t(1.0);
+    floate4m3_t fe4m3wv[4] = floate4m3_t[](fe4m3one, fe4m3one, fe4m3one, fe4m3one);
+    tensorWriteARM(fte4m3, coords, fe4m3wv);
+
+    bfloat16_t bf16r;
+    floate5m2_t fe5m2r;
+    floate4m3_t fe4m3r;
+    tensorReadARM(bft16, coords, bf16r);
+    tensorReadARM(fte5m2, coords, fe5m2r);
+    tensorReadARM(fte4m3, coords, fe4m3r);
+
+    bfloat16_t bf16rv[4];
+    floate5m2_t fe5m2rv[4];
+    floate4m3_t fe4m3rv[4];
+    tensorReadARM(bft16, coords, bf16rv);
+    tensorReadARM(fte5m2, coords, fe5m2rv);
+    tensorReadARM(fte4m3, coords, fe4m3rv);
 }
diff --git a/Test/spv.tensorARM.declare.comp b/Test/spv.tensorARM.declare.comp
@@ -3,6 +3,30 @@
 #extension GL_ARM_tensors : enable
 #extension GL_EXT_shader_explicit_arithmetic_types : enable
 
+#if !defined GL_ARM_tensors
+#  error GL_ARM_tensors is not defined
+#elif GL_ARM_tensors != 1
+#  error GL_ARM_tensors is not equal to 1
+#endif
+
+#if !defined GL_ARM_tensors_bfloat16
+#  error GL_ARM_tensors_bfloat16 is not defined
+#elif GL_ARM_tensors_bfloat16 != 1
+#  error GL_ARM_tensors_bfloat16 is not equal to 1
+#endif
+
+#if !defined GL_ARM_tensors_float_e5m2
+#  error GL_ARM_tensors_float_e5m2 is not defined
+#elif GL_ARM_tensors_float_e5m2 != 1
+#  error GL_ARM_tensors_float_e5m2 is not equal to 1
+#endif
+
+#if !defined GL_ARM_tensors_float_e4m3
+#  error GL_ARM_tensors_float_e4m3 is not defined
+#elif GL_ARM_tensors_float_e4m3 != 1
+#  error GL_ARM_tensors_float_e4m3 is not equal to 1
+#endif
+
 layout(binding = 0) uniform tensorARM<int32_t, 4> t;
 layout(set = 0, binding = 1) uniform tensorARM<bool, 2> tb;
 
diff --git a/Test/spv.tensorARM.unrequested_extension_types.comp b/Test/spv.tensorARM.unrequested_extension_types.comp
@@ -0,0 +1,12 @@
+#version 460 core
+
+#extension GL_ARM_tensors : enable
+#extension GL_EXT_bfloat16 : enable
+#extension GL_EXT_float_e5m2 : enable
+#extension GL_EXT_float_e4m3 : enable
+
+uniform tensorARM<bfloat16_t, 1> bfloat16;
+uniform tensorARM<floate5m2_t, 1> floate5m2;
+uniform tensorARM<floate4m3_t, 1> floate4m3;
+
+void main() {}
diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp
@@ -5074,6 +5074,7 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "int8_t", "int16_t", "int32_t", "int64_t",
             "uint8_t", "uint16_t", "uint32_t", "uint64_t",
             "float16_t", "float32_t", "float64_t",
+            "bfloat16_t", "floate5m2_t", "floate4m3_t",
         };
         std::ostringstream ostream;
         for (auto t : tensorDataTypesARM) {
diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp
@@ -9262,10 +9262,24 @@ TIntermNode* TParseContext::declareVariable(const TSourceLoc& loc, TString& iden
                 publicType.typeParameters->basicType != EbtUint64 &&
                 publicType.typeParameters->basicType != EbtFloat16 &&
                 publicType.typeParameters->basicType != EbtFloat &&
-                publicType.typeParameters->basicType != EbtDouble) {
+                publicType.typeParameters->basicType != EbtDouble &&
+                publicType.typeParameters->basicType != EbtBFloat16 &&
+                publicType.typeParameters->basicType != EbtFloatE5M2 &&
+                publicType.typeParameters->basicType != EbtFloatE4M3) {
                 error(loc, "expected bool, integer or floating point type parameter", identifier.c_str(), "");
             }
 
+            if (publicType.typeParameters->basicType == EbtBFloat16) {
+                requireExtensions(loc, 1, &E_GL_ARM_tensors_bfloat16, "tensor with bfloat16_t type");
+            }
+
+            if (publicType.typeParameters->basicType == EbtFloatE5M2) {
+                requireExtensions(loc, 1, &E_GL_ARM_tensors_float_e5m2, "tensor with floate5m2_t type");
+            }
+
+            if (publicType.typeParameters->basicType == EbtFloatE4M3) {
+                requireExtensions(loc, 1, &E_GL_ARM_tensors_float_e4m3, "tensor with floate4m3_t type");
+            }
         }
     } else {
         if (publicType.typeParameters && publicType.typeParameters->arraySizes->getNumDims() != 0) {
diff --git a/glslang/MachineIndependent/Versions.cpp b/glslang/MachineIndependent/Versions.cpp
@@ -323,6 +323,9 @@ void TParseVersions::initializeExtensionBehavior()
     // ARM
     extensionBehavior[E_GL_ARM_shader_core_builtins]                 = EBhDisable;
     extensionBehavior[E_GL_ARM_tensors]                              = EBhDisable;
+    extensionBehavior[E_GL_ARM_tensors_bfloat16]                     = EBhDisable;
+    extensionBehavior[E_GL_ARM_tensors_float_e5m2]                   = EBhDisable;
+    extensionBehavior[E_GL_ARM_tensors_float_e4m3]                   = EBhDisable;
 
     // QCOM
     extensionBehavior[E_GL_QCOM_image_processing]                    = EBhDisable;
@@ -667,6 +670,15 @@ void TParseVersions::getPreamble(std::string& preamble)
         if (version >= 130) {
             preamble +="#define GL_FRAGMENT_PRECISION_HIGH 1\n";
         }
+
+        if (version >= 460) {
+            preamble +=
+                "#define GL_ARM_tensors 1\n"
+                "#define GL_ARM_tensors_bfloat16 1\n"
+                "#define GL_ARM_tensors_float_e5m2 1\n"
+                "#define GL_ARM_tensors_float_e4m3 1\n"
+                ;
+        }
     }
 
     if ((!isEsProfile() && version >= 460) ||
diff --git a/glslang/MachineIndependent/Versions.h b/glslang/MachineIndependent/Versions.h
@@ -301,6 +301,9 @@ const char* const E_GL_NV_explicit_typecast                     = "GL_NV_explici
 // ARM
 const char* const E_GL_ARM_shader_core_builtins                 = "GL_ARM_shader_core_builtins";
 const char* const E_GL_ARM_tensors                              = "GL_ARM_tensors";
+const char* const E_GL_ARM_tensors_bfloat16                     = "GL_ARM_tensors_bfloat16";
+const char* const E_GL_ARM_tensors_float_e5m2                   = "GL_ARM_tensors_float_e5m2";
+const char* const E_GL_ARM_tensors_float_e4m3                   = "GL_ARM_tensors_float_e4m3";
 
 // Arrays of extensions for the above viewportEXTs duplications
 
diff --git a/gtests/Spv.FromFile.cpp b/gtests/Spv.FromFile.cpp
@@ -604,6 +604,7 @@ INSTANTIATE_TEST_SUITE_P(
         "spv.tensorARM.params.comp",
         "spv.tensorARM.read.comp",
         "spv.tensorARM.size.comp",
+        "spv.tensorARM.unrequested_extension_types.comp",
     })),
     FileNameAsCustomTestSuffix
 );