blob: ca992b977f93c0dabe3703102601fdd02be010dd [file] [log] [blame]
// Copyright 2026 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Pure HAL sample: buffer fill, copy, and readback via command buffer.
//
// Demonstrates the core HAL workflow without any VM, bytecode modules, or
// compiled executables. This is the minimal path through the HAL:
//
// 1. Create a device from the registry by URI.
// 2. Allocate two buffers with appropriate memory types and usage flags.
// 3. Record a command buffer that fills one buffer and copies to the other.
// 4. Submit the command buffer to the device queue with a signal semaphore.
// 5. Wait for the semaphore, then read back and verify results.
//
// No compiled shaders are needed -- fill and copy are built-in transfer
// operations supported by every HAL driver.
//
// Usage:
// hello --device=local-sync
// hello --device=local-task
// hello --device=vulkan
#include <inttypes.h>
#include <stdio.h>
#include "iree/base/api.h"
#include "iree/base/tooling/flags.h"
#include "iree/hal/api.h"
#include "iree/hal/drivers/init.h"
IREE_FLAG(string, device, "local-sync",
"HAL device URI to use for execution.\n"
"Examples: local-sync, local-task, vulkan, cuda, hip");
// The fill pattern and buffer size are chosen to be small enough to print
// but large enough to exercise the command buffer path (not inlined).
#define BUFFER_ELEMENT_COUNT 16
#define BUFFER_SIZE (BUFFER_ELEMENT_COUNT * sizeof(uint32_t))
#define FILL_PATTERN UINT32_C(0xCAFEF00D)
//===----------------------------------------------------------------------===//
// Command recording, submission, and verification.
//
// This function owns nothing -- all resources are passed in and released by
// the caller. That means every operation can use IREE_RETURN_IF_ERROR without
// worrying about cleanup.
//===----------------------------------------------------------------------===//
static iree_status_t record_and_verify(
iree_hal_device_t* device, iree_hal_buffer_t* source_buffer,
iree_hal_buffer_t* destination_buffer,
iree_hal_command_buffer_t* command_buffer,
iree_hal_semaphore_t* semaphore) {
//--- Record command buffer --------------------------------------------------
fprintf(stdout, "3. Recording command buffer...\n");
IREE_RETURN_IF_ERROR(iree_hal_command_buffer_begin(command_buffer));
// Fill the source buffer with a repeating 32-bit pattern.
// The fill operation writes directly into device memory via the transfer
// engine -- no shader or dispatch is needed.
uint32_t fill_pattern = FILL_PATTERN;
IREE_RETURN_IF_ERROR(iree_hal_command_buffer_fill_buffer(
command_buffer,
iree_hal_make_buffer_ref(source_buffer, /*offset=*/0, BUFFER_SIZE),
&fill_pattern, sizeof(fill_pattern), IREE_HAL_FILL_FLAG_NONE));
// Barrier: the fill must complete before we read the source buffer for copy.
iree_hal_memory_barrier_t memory_barrier = {
.source_scope = IREE_HAL_ACCESS_SCOPE_TRANSFER_WRITE,
.target_scope = IREE_HAL_ACCESS_SCOPE_TRANSFER_READ,
};
IREE_RETURN_IF_ERROR(iree_hal_command_buffer_execution_barrier(
command_buffer, IREE_HAL_EXECUTION_STAGE_TRANSFER,
IREE_HAL_EXECUTION_STAGE_TRANSFER, IREE_HAL_EXECUTION_BARRIER_FLAG_NONE,
/*memory_barrier_count=*/1, &memory_barrier,
/*buffer_barrier_count=*/0, /*buffer_barriers=*/NULL));
// Copy the entire source buffer into the destination buffer.
IREE_RETURN_IF_ERROR(iree_hal_command_buffer_copy_buffer(
command_buffer,
iree_hal_make_buffer_ref(source_buffer, /*offset=*/0, BUFFER_SIZE),
iree_hal_make_buffer_ref(destination_buffer, /*offset=*/0, BUFFER_SIZE),
IREE_HAL_COPY_FLAG_NONE));
IREE_RETURN_IF_ERROR(iree_hal_command_buffer_end(command_buffer));
//--- Submit to queue with semaphore signaling -------------------------------
fprintf(stdout, "4. Submitting to device queue...\n");
// Submit: no wait semaphores (execute immediately), signal semaphore to 1
// when the command buffer retires.
uint64_t signal_value = 1;
iree_hal_semaphore_list_t signal_list = {
.count = 1,
.semaphores = &semaphore,
.payload_values = &signal_value,
};
IREE_RETURN_IF_ERROR(iree_hal_device_queue_execute(
device, IREE_HAL_QUEUE_AFFINITY_ANY, iree_hal_semaphore_list_empty(),
signal_list, command_buffer, iree_hal_buffer_binding_table_empty(),
IREE_HAL_EXECUTE_FLAG_NONE));
//--- Wait and verify --------------------------------------------------------
fprintf(stdout, "5. Waiting for completion...\n");
// Block until the semaphore reaches value 1 (command buffer retired).
IREE_RETURN_IF_ERROR(iree_hal_semaphore_wait(semaphore, signal_value,
iree_infinite_timeout(),
IREE_HAL_WAIT_FLAG_DEFAULT));
fprintf(stdout, " Command buffer completed.\n");
// Read back the destination buffer and verify every element matches the fill
// pattern. This uses a synchronous map -- for production code you would use
// queue_copy to a staging buffer instead.
uint32_t readback[BUFFER_ELEMENT_COUNT] = {0};
IREE_RETURN_IF_ERROR(iree_hal_buffer_map_read(
destination_buffer, /*source_offset=*/0, readback, sizeof(readback)));
fprintf(stdout, " Verifying %d elements...\n", BUFFER_ELEMENT_COUNT);
for (int i = 0; i < BUFFER_ELEMENT_COUNT; ++i) {
if (readback[i] != FILL_PATTERN) {
return iree_make_status(IREE_STATUS_DATA_LOSS,
"mismatch at element %d: expected 0x%08" PRIX32
", got 0x%08" PRIX32,
i, (uint32_t)FILL_PATTERN, readback[i]);
}
}
fprintf(stdout, "\nAll %d elements verified: 0x%08" PRIX32 "\n",
BUFFER_ELEMENT_COUNT, (uint32_t)FILL_PATTERN);
return iree_ok_status();
}
//===----------------------------------------------------------------------===//
// Resource creation and teardown.
//
// All resources are initialized to NULL and unconditionally released at the
// end -- every iree_hal_*_release is safe to call with NULL.
//===----------------------------------------------------------------------===//
static iree_status_t run_sample(void) {
iree_allocator_t host_allocator = iree_allocator_system();
// All NULL-initialized so cleanup is unconditional.
iree_hal_device_t* device = NULL;
iree_hal_buffer_t* source_buffer = NULL;
iree_hal_buffer_t* destination_buffer = NULL;
iree_hal_command_buffer_t* command_buffer = NULL;
iree_hal_semaphore_t* semaphore = NULL;
//--- 1. Create device from registry -----------------------------------------
fprintf(stdout, "1. Creating device (--device=%s)...\n", FLAG_device);
// Register all linked-in drivers with the default registry.
iree_status_t status = iree_hal_register_all_available_drivers(
iree_hal_driver_registry_default());
// Create a device directly from the URI. This handles driver lookup,
// instantiation, and default device selection in one step.
if (iree_status_is_ok(status)) {
status = iree_hal_create_device(iree_hal_driver_registry_default(),
iree_make_cstring_view(FLAG_device),
host_allocator, &device);
}
if (iree_status_is_ok(status)) {
iree_string_view_t device_id = iree_hal_device_id(device);
if (!device_id.size) device_id = iree_make_cstring_view("(unnamed)");
fprintf(stdout, " Device: '%.*s'\n", (int)device_id.size, device_id.data);
}
//--- 2. Allocate buffers, command buffer, semaphore -------------------------
if (iree_status_is_ok(status)) {
fprintf(stdout, "2. Allocating buffers (%" PRIhsz " bytes each)...\n",
(iree_host_size_t)BUFFER_SIZE);
}
// Source buffer: lives on-device. Filled by the command buffer and read
// during copy -- the host never touches it, so it can be device-local for
// optimal placement (VRAM on GPUs, same as host memory on CPU backends).
iree_hal_buffer_params_t source_params = {
.type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
.usage = IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE |
IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET,
};
if (iree_status_is_ok(status)) {
status = iree_hal_allocator_allocate_buffer(
iree_hal_device_allocator(device), source_params, BUFFER_SIZE,
&source_buffer);
}
// Destination buffer: host-visible for readback. The device writes it via
// copy, then the host maps it to verify results. HOST_LOCAL + DEVICE_VISIBLE
// places this in system RAM accessible to both host and device.
iree_hal_buffer_params_t destination_params = {
.type =
IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
.usage =
IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET | IREE_HAL_BUFFER_USAGE_MAPPING,
};
if (iree_status_is_ok(status)) {
status = iree_hal_allocator_allocate_buffer(
iree_hal_device_allocator(device), destination_params, BUFFER_SIZE,
&destination_buffer);
}
// One-shot command buffer for transfer operations.
if (iree_status_is_ok(status)) {
status = iree_hal_command_buffer_create(
device, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
IREE_HAL_COMMAND_CATEGORY_TRANSFER, IREE_HAL_QUEUE_AFFINITY_ANY,
/*binding_capacity=*/0, &command_buffer);
}
// Timeline semaphore: initial value 0, will be signaled to 1 on completion.
if (iree_status_is_ok(status)) {
status = iree_hal_semaphore_create(
device, IREE_HAL_QUEUE_AFFINITY_ANY,
/*initial_value=*/0, IREE_HAL_SEMAPHORE_FLAG_NONE, &semaphore);
}
//--- 3-5. Record, submit, wait, verify --------------------------------------
if (iree_status_is_ok(status)) {
status = record_and_verify(device, source_buffer, destination_buffer,
command_buffer, semaphore);
}
if (iree_status_is_ok(status)) {
fprintf(stdout, "HAL hello sample completed successfully.\n");
}
//--- Cleanup (all NULL-safe, reverse order) ---------------------------------
iree_hal_semaphore_release(semaphore);
iree_hal_command_buffer_release(command_buffer);
iree_hal_buffer_release(destination_buffer);
iree_hal_buffer_release(source_buffer);
iree_hal_device_release(device);
return status;
}
//===----------------------------------------------------------------------===//
// Main
//===----------------------------------------------------------------------===//
int main(int argc, char** argv) {
iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv);
iree_status_t status = run_sample();
if (iree_status_is_ok(status)) {
return 0;
}
iree_status_fprint(stderr, status);
fprintf(stderr, "\n");
int code = (int)iree_status_code(status);
iree_status_free(status);
return code;
}