blob: df9b40fb9f61b42cabe34baa1debf763079e6aa7 [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef IREE_HAL_COMMAND_BUFFER_H_
#define IREE_HAL_COMMAND_BUFFER_H_
#include <cstdint>
#include "base/bitfield.h"
#include "base/shape.h"
#include "base/status.h"
#include "hal/allocator.h"
#include "hal/buffer.h"
#include "hal/buffer_view.h"
#include "hal/event.h"
#include "hal/executable.h"
#include "hal/resource.h"
namespace iree {
namespace hal {
// A bitfield specifying the mode of operation for a command buffer.
enum class CommandBufferMode : uint32_t {
// Command buffer will be submitted once and never used again.
// This may enable in-place patching of command buffers that reduce overhead
// when it's known that command buffers will not be reused.
kOneShot = 1 << 0,
};
IREE_BITFIELD(CommandBufferMode);
using CommandBufferModeBitfield = CommandBufferMode;
std::string CommandBufferModeString(CommandBufferModeBitfield mode);
// A bitfield specifying the category of commands in a command queue.
enum class CommandCategory : uint32_t {
// Command is considered a transfer operation (memcpy, etc).
kTransfer = 1 << 0,
// Command is considered a dispatch operation (dispatch/execute).
kDispatch = 1 << 1,
};
IREE_BITFIELD(CommandCategory);
using CommandCategoryBitfield = CommandCategory;
std::string CommandCategoryString(CommandCategoryBitfield categories);
// Bitfield specifying which execution stage a brarrier should start/end at.
//
// Maps to VkPipelineStageFlagBits.
enum class ExecutionStage : uint32_t {
// Top of the pipeline when commands are initially issued by the device.
kCommandIssue = 1 << 0,
// Stage of the pipeline when dispatch parameter data is consumed.
kCommandProcess = 1 << 1,
// Stage where dispatch commands execute.
kDispatch = 1 << 2,
// Stage where transfer (copy/clear/fill/etc) commands execute.
kTransfer = 1 << 3,
// Final stage in the pipeline when commands are retired on the device.
kCommandRetire = 1 << 4,
// Pseudo-stage for read/writes by the host. Not executed on device.
kHost = 1 << 5,
};
IREE_BITFIELD(ExecutionStage);
using ExecutionStageBitfield = ExecutionStage;
// Bitfield specifying which scopes will access memory and how.
//
// Maps to VkAccessFlagBits.
enum class AccessScope : uint32_t {
// Read access to indirect command data as part of an indirect dispatch.
kIndirectCommandRead = 1 << 0,
// Constant uniform buffer reads by the device.
kConstantRead = 1 << 1,
// Storage buffer reads by dispatch commands.
kDispatchRead = 1 << 2,
// Storage buffer writes by dispatch commands.
kDispatchWrite = 1 << 3,
// Source of a transfer operation.
kTransferRead = 1 << 4,
// Target of a transfer operation.
kTransferWrite = 1 << 5,
// Read operation by the host through mapped memory.
kHostRead = 1 << 6,
// Write operation by the host through mapped memory.
kHostWrite = 1 << 7,
// External/non-specific read.
kMemoryRead = 1 << 8,
// External/non-specific write.
kMemoryWrite = 1 << 9,
};
IREE_BITFIELD(AccessScope);
using AccessScopeBitfield = AccessScope;
// Defines a global memory barrier.
// These are cheaper to encode than buffer-specific barriers but may cause
// stalls and bubbles in device pipelines if applied too broadly. Prefer them
// over equivalently large sets of buffer-specific barriers (such as when
// completely changing execution contexts).
//
// Maps to VkMemoryBarrier.
struct MemoryBarrier {
// All access scopes prior-to the barrier (inclusive).
AccessScopeBitfield source_scope;
// All access scopes following the barrier (inclusive).
AccessScopeBitfield target_scope;
};
// Defines a memory barrier that applies to a range of a specific buffer.
// Use of these (vs. global memory barriers) provides fine-grained execution
// ordering to device command processors and allows for more aggressive
// reordering.
//
// Maps to VkBufferMemoryBarrier.
struct BufferBarrier {
// All access scopes prior-to the barrier (inclusive).
AccessScopeBitfield source_scope;
// All access scopes following the barrier (inclusive).
AccessScopeBitfield target_scope;
// Buffer the barrier is restricted to.
// The barrier will apply to the entire physical device allocation.
Buffer* buffer = nullptr;
// Relative offset/length within |buffer| (which may itself be mapped into the
// device allocation at an offset).
device_size_t offset = 0;
device_size_t length = kWholeBuffer;
};
// Represents a binding to a buffer with a set of attributes.
// This may be used by drivers to validate alignment.
struct BufferBinding {
// Access rights of the buffer contents by the executable.
MemoryAccessBitfield access = MemoryAccess::kAll;
// The buffer this binding references.
// The buffer is not retained by the binding and must be kept alive externally
// for the duration it is in use by the queue.
Buffer* buffer = nullptr;
// Shape of the buffer contents.
Shape shape;
// Size of each element within the buffer, in bytes.
int8_t element_size = 0;
BufferBinding() = default;
BufferBinding(MemoryAccessBitfield access, Buffer* buffer)
: access(access), buffer(buffer) {}
BufferBinding(MemoryAccessBitfield access, Buffer* buffer, Shape shape,
int8_t element_size)
: access(access),
buffer(buffer),
shape(shape),
element_size(element_size) {}
BufferBinding(MemoryAccessBitfield access, const BufferView& buffer_view)
: access(access),
buffer(buffer_view.buffer.get()),
shape(buffer_view.shape),
element_size(buffer_view.element_size) {}
};
// Wraps parameters for a Dispatch request.
struct DispatchRequest {
// Executable prepared for use on the device.
// The executable must remain alive until all in-flight dispatch requests
// that use it have completed.
Executable* executable = nullptr;
// Executable entry point ordinal.
int entry_point = 0;
// TODO(benvanik): predication.
// Static workload parameters defining the X, Y, and Z workgroup counts.
std::array<int32_t, 3> workload;
// An optional buffer containing the dynamic workload to dispatch.
// The contents need not be available at the time of recording but must be
// made visible prior to execution of the dispatch command.
//
// Buffer contents are expected to be 3 int32 values defining the X, Y, and Z
// workgroup counts.
//
// The buffer must have been allocated with BufferUsage::kDispatch and be
// of MemoryType::kDeviceVisible.
Buffer* workload_buffer = nullptr;
// A list of buffers that contain the execution inputs/outputs.
// Order is dependent on executable arg layout.
//
// Buffers must have been allocated with BufferUsage::kDispatch and be
// of MemoryType::kDeviceVisible.
absl::Span<const BufferBinding> bindings;
// TODO(benvanik): push-constant equivalent (uniforms, etc).
};
// Asynchronous command buffer recording interface.
// Commands are recorded by the implementation for later submission to command
// queues.
//
// Buffers and synchronization objects referenced must remain valid and not be
// modified or read while there are commands in-flight. The usual flow is to
// populate input buffers, Dispatch using those buffers, wait on a Fence until
// the buffers are guaranteed to no longer be in use, and then reuse or release
// the buffers.
//
// Errors that can be recognized when operations are enqueued will be returned
// immediately, such as invalid argument errors. Errors that can only be
// determined at execution time will be returned on fences. Once a failure
// occurs the device queue will enter an error state that invalidates all
// operations on the device queue (as ordering is not strict and any may still
// be in-flight). In this case the user of the device queue should treat all
// in-flight operations as cancelled and fully reset themselves. Other device
// queues that may be waiting on events from the device queue will also enter
// error states. Only once a user has acknowledged and cleared the error state
// with a Reset the queue will become usable, and otherwise all operations will
// return errors.
//
// Command buffers are thread-compatible. Use multiple command buffers if trying
// to record commands from multiple threads. Command buffers must not be mutated
// between when they have are submitted for execution on a queue and when the
// fence fires indicating the completion of their execution.
class CommandBuffer : public Resource {
public:
virtual CommandBuffer* impl() { return this; }
// Device allocator that commands encoded into the buffer share compatibility
// with.
Allocator* allocator() const { return allocator_; }
// Command buffer operation mode.
CommandBufferModeBitfield mode() const { return mode_; }
// Command categories that may be recorded into the buffer.
CommandCategoryBitfield command_categories() const {
return command_categories_;
}
// True if the command buffer is between a Begin/End recording block.
virtual bool is_recording() const = 0;
// Resets and begins recording into the command buffer, clearing all
// previously recorded contents.
// The command buffer must not be in-flight.
virtual Status Begin() = 0;
// Ends recording into the command buffer.
// This must be called prior to submitting the command buffer for execution.
virtual Status End() = 0;
// TODO(benvanik): annotations for debugging and tracing:
// enter/exit
// stack frame manipulation
// explicit timers? or profiling buffer?
// TODO(b/138719910): cross-queue and external acquire/release.
// virtual Status AcquireBuffer() = 0;
// virtual Status ReleaseBuffer() = 0;
// Defines a memory dependency between commands recorded before and after the
// barrier. One or more memory or buffer barriers can be specified to indicate
// between which stages or buffers the dependencies exist.
virtual Status ExecutionBarrier(
ExecutionStageBitfield source_stage_mask,
ExecutionStageBitfield target_stage_mask,
absl::Span<const MemoryBarrier> memory_barriers,
absl::Span<const BufferBarrier> buffer_barriers) = 0;
// Sets an event to the signaled state.
// |source_stage_mask| specifies when the event is signaled.
//
// Events are only valid within a single command buffer. Events can only be
// used on non-transfer queues.
virtual Status SignalEvent(Event* event,
ExecutionStageBitfield source_stage_mask) = 0;
// Resets an event to the non-signaled state.
// |source_stage_mask| specifies when the event is unsignaled.
//
// Events are only valid within a single command buffer. Events can only be
// used on non-transfer queues.
virtual Status ResetEvent(Event* event,
ExecutionStageBitfield source_stage_mask) = 0;
// Waits for one or more events to be signaled and defines a memory dependency
// between the synchronization scope of the signal operations and the commands
// following the wait.
//
// |source_stage_mask| must include ExecutionStage::kHost for Event::Signal to
// be visibile.
//
// Events are only valid within a single command buffer. Events remain
// signaled even after waiting and must be reset to be reused. Events can only
// be used on non-transfer queues.
virtual Status WaitEvents(
absl::Span<Event*> events, ExecutionStageBitfield source_stage_mask,
ExecutionStageBitfield target_stage_mask,
absl::Span<const MemoryBarrier> memory_barriers,
absl::Span<const BufferBarrier> buffer_barriers) = 0;
// Fills the target buffer with the given repeating value.
// Expects that value_length is one of 1, 2, or 4 and that the offset and
// length are aligned to the natural alignment of the value.
// The target buffer must be compatible with the devices owned by this
// device queue and be allocated with BufferUsage::kTransfer.
virtual Status FillBuffer(Buffer* target_buffer, device_size_t target_offset,
device_size_t length, const void* pattern,
size_t pattern_length) = 0;
// Hints to the device queue that the given buffer will not be used again.
// After encoding a discard the buffer contents will be considered undefined.
// This is because the discard may be used to elide write backs to host memory
// or aggressively reuse the allocation for other purposes.
//
// For buffers allocated with MemoryType::kTransient this may allow
// the device queue to reclaim the memory used by the buffer earlier than
// otherwise possible.
virtual Status DiscardBuffer(Buffer* buffer) = 0;
// Updates a range of the given target buffer from the source host memory.
// The source host memory is copied immediately into the command buffer and
// occupies command buffer space. It is strongly recommended that large buffer
// updates are performed via CopyBuffer where there is the possibility of a
// zero-copy path.
// The |source_buffer| may be releaed by the caller immediately after this
// call returns.
// The |target_buffer| must be compatible with the devices owned by this
// device queue and be allocated with BufferUsage::kTransfer.
virtual Status UpdateBuffer(const void* source_buffer,
device_size_t source_offset,
Buffer* target_buffer,
device_size_t target_offset,
device_size_t length) = 0;
// Copies a range of one buffer to another.
// Both buffers must be compatible with the devices owned by this device
// queue and be allocated with BufferUsage::kTransfer. Though the source and
// target buffer may be the same the ranges must not overlap (as with memcpy).
//
// This can be used to perform device->host, host->device, and device->device
// copies.
virtual Status CopyBuffer(Buffer* source_buffer, device_size_t source_offset,
Buffer* target_buffer, device_size_t target_offset,
device_size_t length) = 0;
// Dispatches an execution request.
// The request may execute overlapped with any other transfer operation or
// dispatch made within the same barrier-defined sequence.
//
// The executable specified must be registered for use with the device driver
// owning this queue. It must not be unregistered until all requests that use
// it have completed.
//
// Fails if the queue does not support dispatch operations (as indicated by
// can_dispatch).
virtual Status Dispatch(const DispatchRequest& dispatch_request) = 0;
protected:
CommandBuffer(Allocator* allocator, CommandBufferModeBitfield mode,
CommandCategoryBitfield command_categories)
: allocator_(allocator),
mode_(mode),
command_categories_(command_categories) {}
private:
Allocator* const allocator_;
const CommandBufferModeBitfield mode_;
const CommandCategoryBitfield command_categories_;
};
} // namespace hal
} // namespace iree
#endif // IREE_HAL_COMMAND_BUFFER_H_