hal/command_buffer.h - 3p/openxla/iree - Git at Google

 // Copyright 2019 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef IREE_HAL_COMMAND_BUFFER_H_
 #define IREE_HAL_COMMAND_BUFFER_H_

 #include <cstdint>

 #include "base/bitfield.h"
 #include "base/shape.h"
 #include "base/status.h"
 #include "hal/allocator.h"
 #include "hal/buffer.h"
 #include "hal/buffer_view.h"
 #include "hal/event.h"
 #include "hal/executable.h"
 #include "hal/resource.h"

 namespace iree {
 namespace hal {

 // A bitfield specifying the mode of operation for a command buffer.
 enum class CommandBufferMode : uint32_t {
   // Command buffer will be submitted once and never used again.
   // This may enable in-place patching of command buffers that reduce overhead
   // when it's known that command buffers will not be reused.
   kOneShot = 1 << 0,
 };
 IREE_BITFIELD(CommandBufferMode);
 using CommandBufferModeBitfield = CommandBufferMode;
 std::string CommandBufferModeString(CommandBufferModeBitfield mode);

 // A bitfield specifying the category of commands in a command queue.
 enum class CommandCategory : uint32_t {
   // Command is considered a transfer operation (memcpy, etc).
   kTransfer = 1 << 0,
   // Command is considered a dispatch operation (dispatch/execute).
   kDispatch = 1 << 1,
 };
 IREE_BITFIELD(CommandCategory);
 using CommandCategoryBitfield = CommandCategory;
 std::string CommandCategoryString(CommandCategoryBitfield categories);

 // Bitfield specifying which execution stage a brarrier should start/end at.
 //
 // Maps to VkPipelineStageFlagBits.
 enum class ExecutionStage : uint32_t {
   // Top of the pipeline when commands are initially issued by the device.
   kCommandIssue = 1 << 0,
   // Stage of the pipeline when dispatch parameter data is consumed.
   kCommandProcess = 1 << 1,
   // Stage where dispatch commands execute.
   kDispatch = 1 << 2,
   // Stage where transfer (copy/clear/fill/etc) commands execute.
   kTransfer = 1 << 3,
   // Final stage in the pipeline when commands are retired on the device.
   kCommandRetire = 1 << 4,
   // Pseudo-stage for read/writes by the host. Not executed on device.
   kHost = 1 << 5,
 };
 IREE_BITFIELD(ExecutionStage);
 using ExecutionStageBitfield = ExecutionStage;

 // Bitfield specifying which scopes will access memory and how.
 //
 // Maps to VkAccessFlagBits.
 enum class AccessScope : uint32_t {
   // Read access to indirect command data as part of an indirect dispatch.
   kIndirectCommandRead = 1 << 0,
   // Constant uniform buffer reads by the device.
   kConstantRead = 1 << 1,
   // Storage buffer reads by dispatch commands.
   kDispatchRead = 1 << 2,
   // Storage buffer writes by dispatch commands.
   kDispatchWrite = 1 << 3,
   // Source of a transfer operation.
   kTransferRead = 1 << 4,
   // Target of a transfer operation.
   kTransferWrite = 1 << 5,
   // Read operation by the host through mapped memory.
   kHostRead = 1 << 6,
   // Write operation by the host through mapped memory.
   kHostWrite = 1 << 7,
   // External/non-specific read.
   kMemoryRead = 1 << 8,
   // External/non-specific write.
   kMemoryWrite = 1 << 9,
 };
 IREE_BITFIELD(AccessScope);
 using AccessScopeBitfield = AccessScope;

 // Defines a global memory barrier.
 // These are cheaper to encode than buffer-specific barriers but may cause
 // stalls and bubbles in device pipelines if applied too broadly. Prefer them
 // over equivalently large sets of buffer-specific barriers (such as when
 // completely changing execution contexts).
 //
 // Maps to VkMemoryBarrier.
 struct MemoryBarrier {
   // All access scopes prior-to the barrier (inclusive).
   AccessScopeBitfield source_scope;
   // All access scopes following the barrier (inclusive).
   AccessScopeBitfield target_scope;
 };

 // Defines a memory barrier that applies to a range of a specific buffer.
 // Use of these (vs. global memory barriers) provides fine-grained execution
 // ordering to device command processors and allows for more aggressive
 // reordering.
 //
 // Maps to VkBufferMemoryBarrier.
 struct BufferBarrier {
   // All access scopes prior-to the barrier (inclusive).
   AccessScopeBitfield source_scope;
   // All access scopes following the barrier (inclusive).
   AccessScopeBitfield target_scope;
   // Buffer the barrier is restricted to.
   // The barrier will apply to the entire physical device allocation.
   Buffer* buffer = nullptr;
   // Relative offset/length within |buffer| (which may itself be mapped into the
   // device allocation at an offset).
   device_size_t offset = 0;
   device_size_t length = kWholeBuffer;
 };

 // Represents a binding to a buffer with a set of attributes.
 // This may be used by drivers to validate alignment.
 struct BufferBinding {
   // Access rights of the buffer contents by the executable.
   MemoryAccessBitfield access = MemoryAccess::kAll;

   // The buffer this binding references.
   // The buffer is not retained by the binding and must be kept alive externally
   // for the duration it is in use by the queue.
   Buffer* buffer = nullptr;

   // Shape of the buffer contents.
   Shape shape;

   // Size of each element within the buffer, in bytes.
   int8_t element_size = 0;

   BufferBinding() = default;
   BufferBinding(MemoryAccessBitfield access, Buffer* buffer)
       : access(access), buffer(buffer) {}
   BufferBinding(MemoryAccessBitfield access, Buffer* buffer, Shape shape,
                 int8_t element_size)
       : access(access),
         buffer(buffer),
         shape(shape),
         element_size(element_size) {}
   BufferBinding(MemoryAccessBitfield access, const BufferView& buffer_view)
       : access(access),
         buffer(buffer_view.buffer.get()),
         shape(buffer_view.shape),
         element_size(buffer_view.element_size) {}
 };

 // Wraps parameters for a Dispatch request.
 struct DispatchRequest {
   // Executable prepared for use on the device.
   // The executable must remain alive until all in-flight dispatch requests
   // that use it have completed.
   Executable* executable = nullptr;

   // Executable entry point ordinal.
   int entry_point = 0;

   // TODO(benvanik): predication.

   // Static workload parameters defining the X, Y, and Z workgroup counts.
   std::array<int32_t, 3> workload;

   // An optional buffer containing the dynamic workload to dispatch.
   // The contents need not be available at the time of recording but must be
   // made visible prior to execution of the dispatch command.
   //
   // Buffer contents are expected to be 3 int32 values defining the X, Y, and Z
   // workgroup counts.
   //
   // The buffer must have been allocated with BufferUsage::kDispatch and be
   // of MemoryType::kDeviceVisible.
   Buffer* workload_buffer = nullptr;

   // A list of buffers that contain the execution inputs/outputs.
   // Order is dependent on executable arg layout.
   //
   // Buffers must have been allocated with BufferUsage::kDispatch and be
   // of MemoryType::kDeviceVisible.
   absl::Span<const BufferBinding> bindings;

   // TODO(benvanik): push-constant equivalent (uniforms, etc).
 };

 // Asynchronous command buffer recording interface.
 // Commands are recorded by the implementation for later submission to command
 // queues.
 //
 // Buffers and synchronization objects referenced must remain valid and not be
 // modified or read while there are commands in-flight. The usual flow is to
 // populate input buffers, Dispatch using those buffers, wait on a Fence until
 // the buffers are guaranteed to no longer be in use, and then reuse or release
 // the buffers.
 //
 // Errors that can be recognized when operations are enqueued will be returned
 // immediately, such as invalid argument errors. Errors that can only be
 // determined at execution time will be returned on fences. Once a failure
 // occurs the device queue will enter an error state that invalidates all
 // operations on the device queue (as ordering is not strict and any may still
 // be in-flight). In this case the user of the device queue should treat all
 // in-flight operations as cancelled and fully reset themselves. Other device
 // queues that may be waiting on events from the device queue will also enter
 // error states. Only once a user has acknowledged and cleared the error state
 // with a Reset the queue will become usable, and otherwise all operations will
 // return errors.
 //
 // Command buffers are thread-compatible. Use multiple command buffers if trying
 // to record commands from multiple threads. Command buffers must not be mutated
 // between when they have are submitted for execution on a queue and when the
 // fence fires indicating the completion of their execution.
 class CommandBuffer : public Resource {
  public:
   virtual CommandBuffer* impl() { return this; }

   // Device allocator that commands encoded into the buffer share compatibility
   // with.
   Allocator* allocator() const { return allocator_; }

   // Command buffer operation mode.
   CommandBufferModeBitfield mode() const { return mode_; }

   // Command categories that may be recorded into the buffer.
   CommandCategoryBitfield command_categories() const {
     return command_categories_;
   }

   // True if the command buffer is between a Begin/End recording block.
   virtual bool is_recording() const = 0;

   // Resets and begins recording into the command buffer, clearing all
   // previously recorded contents.
   // The command buffer must not be in-flight.
   virtual Status Begin() = 0;

   // Ends recording into the command buffer.
   // This must be called prior to submitting the command buffer for execution.
   virtual Status End() = 0;

   // TODO(benvanik): annotations for debugging and tracing:
   //  enter/exit
   //  stack frame manipulation
   //  explicit timers? or profiling buffer?

   // TODO(b/138719910): cross-queue and external acquire/release.
   // virtual Status AcquireBuffer() = 0;
   // virtual Status ReleaseBuffer() = 0;

   // Defines a memory dependency between commands recorded before and after the
   // barrier. One or more memory or buffer barriers can be specified to indicate
   // between which stages or buffers the dependencies exist.
   virtual Status ExecutionBarrier(
       ExecutionStageBitfield source_stage_mask,
       ExecutionStageBitfield target_stage_mask,
       absl::Span<const MemoryBarrier> memory_barriers,
       absl::Span<const BufferBarrier> buffer_barriers) = 0;

   // Sets an event to the signaled state.
   // |source_stage_mask| specifies when the event is signaled.
   //
   // Events are only valid within a single command buffer. Events can only be
   // used on non-transfer queues.
   virtual Status SignalEvent(Event* event,
                              ExecutionStageBitfield source_stage_mask) = 0;

   // Resets an event to the non-signaled state.
   // |source_stage_mask| specifies when the event is unsignaled.
   //
   // Events are only valid within a single command buffer. Events can only be
   // used on non-transfer queues.
   virtual Status ResetEvent(Event* event,
                             ExecutionStageBitfield source_stage_mask) = 0;

   // Waits for one or more events to be signaled and defines a memory dependency
   // between the synchronization scope of the signal operations and the commands
   // following the wait.
   //
   // |source_stage_mask| must include ExecutionStage::kHost for Event::Signal to
   // be visibile.
   //
   // Events are only valid within a single command buffer. Events remain
   // signaled even after waiting and must be reset to be reused. Events can only
   // be used on non-transfer queues.
   virtual Status WaitEvents(
       absl::Span<Event*> events, ExecutionStageBitfield source_stage_mask,
       ExecutionStageBitfield target_stage_mask,
       absl::Span<const MemoryBarrier> memory_barriers,
       absl::Span<const BufferBarrier> buffer_barriers) = 0;

   // Fills the target buffer with the given repeating value.
   // Expects that value_length is one of 1, 2, or 4 and that the offset and
   // length are aligned to the natural alignment of the value.
   // The target buffer must be compatible with the devices owned by this
   // device queue and be allocated with BufferUsage::kTransfer.
   virtual Status FillBuffer(Buffer* target_buffer, device_size_t target_offset,
                             device_size_t length, const void* pattern,
                             size_t pattern_length) = 0;

   // Hints to the device queue that the given buffer will not be used again.
   // After encoding a discard the buffer contents will be considered undefined.
   // This is because the discard may be used to elide write backs to host memory
   // or aggressively reuse the allocation for other purposes.
   //
   // For buffers allocated with MemoryType::kTransient this may allow
   // the device queue to reclaim the memory used by the buffer earlier than
   // otherwise possible.
   virtual Status DiscardBuffer(Buffer* buffer) = 0;

   // Updates a range of the given target buffer from the source host memory.
   // The source host memory is copied immediately into the command buffer and
   // occupies command buffer space. It is strongly recommended that large buffer
   // updates are performed via CopyBuffer where there is the possibility of a
   // zero-copy path.
   // The |source_buffer| may be releaed by the caller immediately after this
   // call returns.
   // The |target_buffer| must be compatible with the devices owned by this
   // device queue and be allocated with BufferUsage::kTransfer.
   virtual Status UpdateBuffer(const void* source_buffer,
                               device_size_t source_offset,
                               Buffer* target_buffer,
                               device_size_t target_offset,
                               device_size_t length) = 0;

   // Copies a range of one buffer to another.
   // Both buffers must be compatible with the devices owned by this device
   // queue and be allocated with BufferUsage::kTransfer. Though the source and
   // target buffer may be the same the ranges must not overlap (as with memcpy).
   //
   // This can be used to perform device->host, host->device, and device->device
   // copies.
   virtual Status CopyBuffer(Buffer* source_buffer, device_size_t source_offset,
                             Buffer* target_buffer, device_size_t target_offset,
                             device_size_t length) = 0;

   // Dispatches an execution request.
   // The request may execute overlapped with any other transfer operation or
   // dispatch made within the same barrier-defined sequence.
   //
   // The executable specified must be registered for use with the device driver
   // owning this queue. It must not be unregistered until all requests that use
   // it have completed.
   //
   // Fails if the queue does not support dispatch operations (as indicated by
   // can_dispatch).
   virtual Status Dispatch(const DispatchRequest& dispatch_request) = 0;

  protected:
   CommandBuffer(Allocator* allocator, CommandBufferModeBitfield mode,
                 CommandCategoryBitfield command_categories)
       : allocator_(allocator),
         mode_(mode),
         command_categories_(command_categories) {}

  private:
   Allocator* const allocator_;
   const CommandBufferModeBitfield mode_;
   const CommandCategoryBitfield command_categories_;
 };

 }  // namespace hal
 }  // namespace iree

 #endif  // IREE_HAL_COMMAND_BUFFER_H_
	// Copyright 2019 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#ifndef IREE_HAL_COMMAND_BUFFER_H_
	#define IREE_HAL_COMMAND_BUFFER_H_

	#include <cstdint>

	#include "base/bitfield.h"
	#include "base/shape.h"
	#include "base/status.h"
	#include "hal/allocator.h"
	#include "hal/buffer.h"
	#include "hal/buffer_view.h"
	#include "hal/event.h"
	#include "hal/executable.h"
	#include "hal/resource.h"

	namespace iree {
	namespace hal {

	// A bitfield specifying the mode of operation for a command buffer.
	enum class CommandBufferMode : uint32_t {
	// Command buffer will be submitted once and never used again.
	// This may enable in-place patching of command buffers that reduce overhead
	// when it's known that command buffers will not be reused.
	kOneShot = 1 << 0,
	};
	IREE_BITFIELD(CommandBufferMode);
	using CommandBufferModeBitfield = CommandBufferMode;
	std::string CommandBufferModeString(CommandBufferModeBitfield mode);

	// A bitfield specifying the category of commands in a command queue.
	enum class CommandCategory : uint32_t {
	// Command is considered a transfer operation (memcpy, etc).
	kTransfer = 1 << 0,
	// Command is considered a dispatch operation (dispatch/execute).
	kDispatch = 1 << 1,
	};
	IREE_BITFIELD(CommandCategory);
	using CommandCategoryBitfield = CommandCategory;
	std::string CommandCategoryString(CommandCategoryBitfield categories);

	// Bitfield specifying which execution stage a brarrier should start/end at.
	//
	// Maps to VkPipelineStageFlagBits.
	enum class ExecutionStage : uint32_t {
	// Top of the pipeline when commands are initially issued by the device.
	kCommandIssue = 1 << 0,
	// Stage of the pipeline when dispatch parameter data is consumed.
	kCommandProcess = 1 << 1,
	// Stage where dispatch commands execute.
	kDispatch = 1 << 2,
	// Stage where transfer (copy/clear/fill/etc) commands execute.
	kTransfer = 1 << 3,
	// Final stage in the pipeline when commands are retired on the device.
	kCommandRetire = 1 << 4,
	// Pseudo-stage for read/writes by the host. Not executed on device.
	kHost = 1 << 5,
	};
	IREE_BITFIELD(ExecutionStage);
	using ExecutionStageBitfield = ExecutionStage;

	// Bitfield specifying which scopes will access memory and how.
	//
	// Maps to VkAccessFlagBits.
	enum class AccessScope : uint32_t {
	// Read access to indirect command data as part of an indirect dispatch.
	kIndirectCommandRead = 1 << 0,
	// Constant uniform buffer reads by the device.
	kConstantRead = 1 << 1,
	// Storage buffer reads by dispatch commands.
	kDispatchRead = 1 << 2,
	// Storage buffer writes by dispatch commands.
	kDispatchWrite = 1 << 3,
	// Source of a transfer operation.
	kTransferRead = 1 << 4,
	// Target of a transfer operation.
	kTransferWrite = 1 << 5,
	// Read operation by the host through mapped memory.
	kHostRead = 1 << 6,
	// Write operation by the host through mapped memory.
	kHostWrite = 1 << 7,
	// External/non-specific read.
	kMemoryRead = 1 << 8,
	// External/non-specific write.
	kMemoryWrite = 1 << 9,
	};
	IREE_BITFIELD(AccessScope);
	using AccessScopeBitfield = AccessScope;

	// Defines a global memory barrier.
	// These are cheaper to encode than buffer-specific barriers but may cause
	// stalls and bubbles in device pipelines if applied too broadly. Prefer them
	// over equivalently large sets of buffer-specific barriers (such as when
	// completely changing execution contexts).
	//
	// Maps to VkMemoryBarrier.
	struct MemoryBarrier {
	// All access scopes prior-to the barrier (inclusive).
	AccessScopeBitfield source_scope;
	// All access scopes following the barrier (inclusive).
	AccessScopeBitfield target_scope;
	};

	// Defines a memory barrier that applies to a range of a specific buffer.
	// Use of these (vs. global memory barriers) provides fine-grained execution
	// ordering to device command processors and allows for more aggressive
	// reordering.
	//
	// Maps to VkBufferMemoryBarrier.
	struct BufferBarrier {
	// All access scopes prior-to the barrier (inclusive).
	AccessScopeBitfield source_scope;
	// All access scopes following the barrier (inclusive).
	AccessScopeBitfield target_scope;
	// Buffer the barrier is restricted to.
	// The barrier will apply to the entire physical device allocation.
	Buffer* buffer = nullptr;
	// Relative offset/length within \|buffer\| (which may itself be mapped into the
	// device allocation at an offset).
	device_size_t offset = 0;
	device_size_t length = kWholeBuffer;
	};

	// Represents a binding to a buffer with a set of attributes.
	// This may be used by drivers to validate alignment.
	struct BufferBinding {
	// Access rights of the buffer contents by the executable.
	MemoryAccessBitfield access = MemoryAccess::kAll;

	// The buffer this binding references.
	// The buffer is not retained by the binding and must be kept alive externally
	// for the duration it is in use by the queue.
	Buffer* buffer = nullptr;

	// Shape of the buffer contents.
	Shape shape;

	// Size of each element within the buffer, in bytes.
	int8_t element_size = 0;

	BufferBinding() = default;
	BufferBinding(MemoryAccessBitfield access, Buffer* buffer)
	: access(access), buffer(buffer) {}
	BufferBinding(MemoryAccessBitfield access, Buffer* buffer, Shape shape,
	int8_t element_size)
	: access(access),
	buffer(buffer),
	shape(shape),
	element_size(element_size) {}
	BufferBinding(MemoryAccessBitfield access, const BufferView& buffer_view)
	: access(access),
	buffer(buffer_view.buffer.get()),
	shape(buffer_view.shape),
	element_size(buffer_view.element_size) {}
	};

	// Wraps parameters for a Dispatch request.
	struct DispatchRequest {
	// Executable prepared for use on the device.
	// The executable must remain alive until all in-flight dispatch requests
	// that use it have completed.
	Executable* executable = nullptr;

	// Executable entry point ordinal.
	int entry_point = 0;

	// TODO(benvanik): predication.

	// Static workload parameters defining the X, Y, and Z workgroup counts.
	std::array<int32_t, 3> workload;

	// An optional buffer containing the dynamic workload to dispatch.
	// The contents need not be available at the time of recording but must be
	// made visible prior to execution of the dispatch command.
	//
	// Buffer contents are expected to be 3 int32 values defining the X, Y, and Z
	// workgroup counts.
	//
	// The buffer must have been allocated with BufferUsage::kDispatch and be
	// of MemoryType::kDeviceVisible.
	Buffer* workload_buffer = nullptr;

	// A list of buffers that contain the execution inputs/outputs.
	// Order is dependent on executable arg layout.
	//
	// Buffers must have been allocated with BufferUsage::kDispatch and be
	// of MemoryType::kDeviceVisible.
	absl::Span<const BufferBinding> bindings;

	// TODO(benvanik): push-constant equivalent (uniforms, etc).
	};

	// Asynchronous command buffer recording interface.
	// Commands are recorded by the implementation for later submission to command
	// queues.
	//
	// Buffers and synchronization objects referenced must remain valid and not be
	// modified or read while there are commands in-flight. The usual flow is to
	// populate input buffers, Dispatch using those buffers, wait on a Fence until
	// the buffers are guaranteed to no longer be in use, and then reuse or release
	// the buffers.
	//
	// Errors that can be recognized when operations are enqueued will be returned
	// immediately, such as invalid argument errors. Errors that can only be
	// determined at execution time will be returned on fences. Once a failure
	// occurs the device queue will enter an error state that invalidates all
	// operations on the device queue (as ordering is not strict and any may still
	// be in-flight). In this case the user of the device queue should treat all
	// in-flight operations as cancelled and fully reset themselves. Other device
	// queues that may be waiting on events from the device queue will also enter
	// error states. Only once a user has acknowledged and cleared the error state
	// with a Reset the queue will become usable, and otherwise all operations will
	// return errors.
	//
	// Command buffers are thread-compatible. Use multiple command buffers if trying
	// to record commands from multiple threads. Command buffers must not be mutated
	// between when they have are submitted for execution on a queue and when the
	// fence fires indicating the completion of their execution.
	class CommandBuffer : public Resource {
	public:
	virtual CommandBuffer* impl() { return this; }

	// Device allocator that commands encoded into the buffer share compatibility
	// with.
	Allocator* allocator() const { return allocator_; }

	// Command buffer operation mode.
	CommandBufferModeBitfield mode() const { return mode_; }

	// Command categories that may be recorded into the buffer.
	CommandCategoryBitfield command_categories() const {
	return command_categories_;
	}

	// True if the command buffer is between a Begin/End recording block.
	virtual bool is_recording() const = 0;

	// Resets and begins recording into the command buffer, clearing all
	// previously recorded contents.
	// The command buffer must not be in-flight.
	virtual Status Begin() = 0;

	// Ends recording into the command buffer.
	// This must be called prior to submitting the command buffer for execution.
	virtual Status End() = 0;

	// TODO(benvanik): annotations for debugging and tracing:
	// enter/exit
	// stack frame manipulation
	// explicit timers? or profiling buffer?

	// TODO(b/138719910): cross-queue and external acquire/release.
	// virtual Status AcquireBuffer() = 0;
	// virtual Status ReleaseBuffer() = 0;

	// Defines a memory dependency between commands recorded before and after the
	// barrier. One or more memory or buffer barriers can be specified to indicate
	// between which stages or buffers the dependencies exist.
	virtual Status ExecutionBarrier(
	ExecutionStageBitfield source_stage_mask,
	ExecutionStageBitfield target_stage_mask,
	absl::Span<const MemoryBarrier> memory_barriers,
	absl::Span<const BufferBarrier> buffer_barriers) = 0;

	// Sets an event to the signaled state.
	// \|source_stage_mask\| specifies when the event is signaled.
	//
	// Events are only valid within a single command buffer. Events can only be
	// used on non-transfer queues.
	virtual Status SignalEvent(Event* event,
	ExecutionStageBitfield source_stage_mask) = 0;

	// Resets an event to the non-signaled state.
	// \|source_stage_mask\| specifies when the event is unsignaled.
	//
	// Events are only valid within a single command buffer. Events can only be
	// used on non-transfer queues.
	virtual Status ResetEvent(Event* event,
	ExecutionStageBitfield source_stage_mask) = 0;

	// Waits for one or more events to be signaled and defines a memory dependency
	// between the synchronization scope of the signal operations and the commands
	// following the wait.
	//
	// \|source_stage_mask\| must include ExecutionStage::kHost for Event::Signal to
	// be visibile.
	//
	// Events are only valid within a single command buffer. Events remain
	// signaled even after waiting and must be reset to be reused. Events can only
	// be used on non-transfer queues.
	virtual Status WaitEvents(
	absl::Span<Event*> events, ExecutionStageBitfield source_stage_mask,
	ExecutionStageBitfield target_stage_mask,
	absl::Span<const MemoryBarrier> memory_barriers,
	absl::Span<const BufferBarrier> buffer_barriers) = 0;

	// Fills the target buffer with the given repeating value.
	// Expects that value_length is one of 1, 2, or 4 and that the offset and
	// length are aligned to the natural alignment of the value.
	// The target buffer must be compatible with the devices owned by this
	// device queue and be allocated with BufferUsage::kTransfer.
	virtual Status FillBuffer(Buffer* target_buffer, device_size_t target_offset,
	device_size_t length, const void* pattern,
	size_t pattern_length) = 0;

	// Hints to the device queue that the given buffer will not be used again.
	// After encoding a discard the buffer contents will be considered undefined.
	// This is because the discard may be used to elide write backs to host memory
	// or aggressively reuse the allocation for other purposes.
	//
	// For buffers allocated with MemoryType::kTransient this may allow
	// the device queue to reclaim the memory used by the buffer earlier than
	// otherwise possible.
	virtual Status DiscardBuffer(Buffer* buffer) = 0;

	// Updates a range of the given target buffer from the source host memory.
	// The source host memory is copied immediately into the command buffer and
	// occupies command buffer space. It is strongly recommended that large buffer
	// updates are performed via CopyBuffer where there is the possibility of a
	// zero-copy path.
	// The \|source_buffer\| may be releaed by the caller immediately after this
	// call returns.
	// The \|target_buffer\| must be compatible with the devices owned by this
	// device queue and be allocated with BufferUsage::kTransfer.
	virtual Status UpdateBuffer(const void* source_buffer,
	device_size_t source_offset,
	Buffer* target_buffer,
	device_size_t target_offset,
	device_size_t length) = 0;

	// Copies a range of one buffer to another.
	// Both buffers must be compatible with the devices owned by this device
	// queue and be allocated with BufferUsage::kTransfer. Though the source and
	// target buffer may be the same the ranges must not overlap (as with memcpy).
	//
	// This can be used to perform device->host, host->device, and device->device
	// copies.
	virtual Status CopyBuffer(Buffer* source_buffer, device_size_t source_offset,
	Buffer* target_buffer, device_size_t target_offset,
	device_size_t length) = 0;

	// Dispatches an execution request.
	// The request may execute overlapped with any other transfer operation or
	// dispatch made within the same barrier-defined sequence.
	//
	// The executable specified must be registered for use with the device driver
	// owning this queue. It must not be unregistered until all requests that use
	// it have completed.
	//
	// Fails if the queue does not support dispatch operations (as indicated by
	// can_dispatch).
	virtual Status Dispatch(const DispatchRequest& dispatch_request) = 0;

	protected:
	CommandBuffer(Allocator* allocator, CommandBufferModeBitfield mode,
	CommandCategoryBitfield command_categories)
	: allocator_(allocator),
	mode_(mode),
	command_categories_(command_categories) {}

	private:
	Allocator* const allocator_;
	const CommandBufferModeBitfield mode_;
	const CommandCategoryBitfield command_categories_;
	};

	} // namespace hal
	} // namespace iree

	#endif // IREE_HAL_COMMAND_BUFFER_H_