[vulkan] register more NVIDIA extensions (#3546)

commit: 2093b644a7495104b0a6f86ef10b1a7b0b113d30 [log] [tgz]
author: Lei Zhang <antiagainst@google.com> Tue Oct 20 17:16:49 2020 -0400
committer: GitHub <noreply@github.com> Tue Oct 20 17:16:49 2020 -0400
tree: c30efeb8597486f0c3fe5d42038c5ceb2378b397
parent: 72d56d372c3fa0eacbc24fc181cfa66294ba9a31 [diff]
diff --git a/iree/compiler/Dialect/Vulkan/Utils/TargetEnvUtils.cpp b/iree/compiler/Dialect/Vulkan/Utils/TargetEnvUtils.cpp
index 48e0604..392adb5 100644
--- a/iree/compiler/Dialect/Vulkan/Utils/TargetEnvUtils.cpp
+++ b/iree/compiler/Dialect/Vulkan/Utils/TargetEnvUtils.cpp

@@ -237,8 +237,10 @@
 
   if (triple == "turing-t4-unknown-linux") {
     return R"(#vk.target_env<
-     v1.2, r(133), [VK_KHR_storage_buffer_storage_class,
-       VK_NV_cooperative_matrix], NVIDIA:DiscreteGPU, {
+     v1.2, r(133), [
+        VK_KHR_16bit_storage, VK_KHR_8bit_storage, VK_KHR_shader_float16_int8,
+        VK_KHR_spirv_1_4, VK_KHR_storage_buffer_storage_class,
+        VK_KHR_variable_pointers, VK_NV_cooperative_matrix], NVIDIA:DiscreteGPU, {
        maxComputeSharedMemorySize = 49152: i32,
        maxComputeWorkGroupInvocations = 1024: i32,
        maxComputeWorkGroupSize = dense<[2147483647, 65535, 65535]> : vector<3xi32>,

diff --git a/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir b/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir
index a09cbb6..7549884 100644
--- a/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir
+++ b/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir

@@ -10,7 +10,7 @@
 // DEFAULT: #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>
 // ADRENO640: #spv.target_env<#spv.vce<v1.3, [Shader, Int16, GroupNonUniform, GroupNonUniformVote, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, Qualcomm:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 1024 : i32, max_compute_workgroup_size = dense<[1024, 1024, 64]> : vector<3xi32>, subgroup_size = 64 : i32}>
 // MALIG77: #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>
-// TURINGT4: #spv.target_env<#spv.vce<v1.5, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_storage_buffer_storage_class, SPV_NV_cooperative_matrix]>, NVIDIA:DiscreteGPU, {cooperative_matrix_properties_nv = [{a_type = i8, b_type = i8, c_type = i32, k_size = 32 : i32, m_size = 8 : i32, n_size = 8 : i32, result_type = i32, scope = 3 : i32}, {a_type = f16, b_type = f16, c_type = f16, k_size = 16 : i32, m_size = 8 : i32, n_size = 8 : i32, result_type = f16, scope = 3 : i32}, {a_type = f16, b_type = f16, c_type = f32, k_size = 16 : i32, m_size = 8 : i32, n_size = 8 : i32, result_type = f32, scope = 3 : i32}], max_compute_shared_memory_size = 49152 : i32, max_compute_workgroup_invocations = 1024 : i32, max_compute_workgroup_size = dense<[2147483647, 65535, 65535]> : vector<3xi32>, subgroup_size = 32 : i32}>
+// TURINGT4: #spv.target_env<#spv.vce<v1.5, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, NVIDIA:DiscreteGPU, {cooperative_matrix_properties_nv = [{a_type = i8, b_type = i8, c_type = i32, k_size = 32 : i32, m_size = 8 : i32, n_size = 8 : i32, result_type = i32, scope = 3 : i32}, {a_type = f16, b_type = f16, c_type = f16, k_size = 16 : i32, m_size = 8 : i32, n_size = 8 : i32, result_type = f16, scope = 3 : i32}, {a_type = f16, b_type = f16, c_type = f32, k_size = 16 : i32, m_size = 8 : i32, n_size = 8 : i32, result_type = f32, scope = 3 : i32}], max_compute_shared_memory_size = 49152 : i32, max_compute_workgroup_invocations = 1024 : i32, max_compute_workgroup_size = dense<[2147483647, 65535, 65535]> : vector<3xi32>, subgroup_size = 32 : i32}>
 flow.executable @simpleMath_ex_dispatch_0 {
   flow.dispatch.entry @simpleMath_rgn_dispatch_0 attributes {
       workload = 4 : index
commit	2093b644a7495104b0a6f86ef10b1a7b0b113d30	[log] [tgz]
author	Lei Zhang <antiagainst@google.com>	Tue Oct 20 17:16:49 2020 -0400
committer	GitHub <noreply@github.com>	Tue Oct 20 17:16:49 2020 -0400
tree	c30efeb8597486f0c3fe5d42038c5ceb2378b397
parent	72d56d372c3fa0eacbc24fc181cfa66294ba9a31 [diff]