[cuda] Add command-line option to drop legacy sync mode (#15582)
Legacy sync mode forces waiting on semaphore immediately so
it effectively runs all async allocation/execution in sync mode.
Now we have proper semaphore emulation in cuda2, we don't
need this anymore. Disable it for end-to-end op tests for
experimental cuda2 driver.
Progress towards https://github.com/openxla/iree/issues/13245
diff --git a/compiler/plugins/target/CUDA/CUDATarget.cpp b/compiler/plugins/target/CUDA/CUDATarget.cpp
index 2e976bb..59e581c 100644
--- a/compiler/plugins/target/CUDA/CUDATarget.cpp
+++ b/compiler/plugins/target/CUDA/CUDATarget.cpp
@@ -62,6 +62,7 @@
bool clUsePtxas = false;
std::string clUsePtxasFrom;
std::string clUsePtxasParams;
+ bool enableLegacySync = true;
void bindOptions(OptionsBinder &binder) {
static llvm::cl::OptionCategory category("CUDA HAL Target");
@@ -104,6 +105,12 @@
"iree-hal-cuda-use-ptxas-params", clUsePtxasParams,
llvm::cl::cat(category),
llvm::cl::desc("Passes the given additional parameters to ptxas."));
+
+ binder.opt<bool>(
+ "iree-hal-cuda-enable-legacy-sync", enableLegacySync,
+ llvm::cl::cat(category),
+ llvm::cl::desc(
+ "Enable legacy sync mode that handles semaphores synchronously."));
}
};
} // namespace
@@ -390,7 +397,9 @@
// Indicates that the runtime HAL driver operates only in the legacy
// synchronous mode.
- configItems.emplace_back(b.getStringAttr("legacy_sync"), b.getUnitAttr());
+ if (options.enableLegacySync) {
+ configItems.emplace_back(b.getStringAttr("legacy_sync"), b.getUnitAttr());
+ }
configItems.emplace_back(b.getStringAttr("executable_targets"),
getExecutableTargets(context));
diff --git a/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt b/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt
index 482f154..2b1772e 100644
--- a/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt
+++ b/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt
@@ -76,6 +76,7 @@
"--iree-input-type=stablehlo"
# TODO(#13984): We need memset emulation to workaround CUDA graph issues for now.
"--iree-stream-emulate-memset"
+ "--iree-hal-cuda-enable-legacy-sync=false"
RUNNER_ARGS
"--cuda2_use_streams=false"
LABELS
@@ -156,6 +157,7 @@
"cuda2"
COMPILER_FLAGS
"--iree-input-type=stablehlo"
+ "--iree-hal-cuda-enable-legacy-sync=false"
RUNNER_ARGS
"--cuda2_use_streams=true"
LABELS
diff --git a/experimental/cuda2/tests/tosa_ops/CMakeLists.txt b/experimental/cuda2/tests/tosa_ops/CMakeLists.txt
index 88752fe..e82be08 100644
--- a/experimental/cuda2/tests/tosa_ops/CMakeLists.txt
+++ b/experimental/cuda2/tests/tosa_ops/CMakeLists.txt
@@ -57,6 +57,7 @@
"--iree-input-type=tosa"
# TODO(#13984): We need memset emulation to workaround CUDA graph issues for now.
"--iree-stream-emulate-memset"
+ "--iree-hal-cuda-enable-legacy-sync=false"
RUNNER_ARGS
"--cuda2_use_streams=false"
LABELS
@@ -118,6 +119,7 @@
"cuda2"
COMPILER_FLAGS
"--iree-input-type=tosa"
+ "--iree-hal-cuda-enable-legacy-sync=false"
RUNNER_ARGS
"--cuda2_use_streams=true"
LABELS