blob: ea66a5d715daeb50c908d6cafe5344a341fa9558 [file] [log] [blame]
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Polyfill Metal kernels for buffer copies without 4-byte aligned offsets / lengths.
struct CopySpec {
uint64_t src_buffer_offset; // Source buffer offset (in bytes)
uint64_t dst_buffer_offset; // Destination buffer offset (in bytes)
uint64_t length; // Buffer length to fill (in bytes)
};
// Copies data from |src_buffer| to |dst_buffer| with the given |spec|ification.
//
// No alignment requirement on source/destination buffer offset and length.
// Each thread copies over one byte. This won't be good for perf; so just a fallback.
kernel void copy_buffer_1byte(
device uint8_t *src_buffer [[buffer(0)]],
device uint8_t *dst_buffer [[buffer(1)]],
constant CopySpec &spec [[buffer(2)]],
uint id [[thread_position_in_grid]]
) {
if (id >= spec.length) return;
dst_buffer[spec.dst_buffer_offset + id] = src_buffer[spec.src_buffer_offset + id];
}