[Codegen] Add PCF bufferization interfaces (#22805)

This adds bufferization interfaces for PCF ops. Tensor operands are
bufferized while `pcf.sref` operands are preserved, now ready for
conversion at a later point in the pipeline.
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BUILD.bazel
new file mode 100644
index 0000000..a186ed0
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BUILD.bazel
@@ -0,0 +1,44 @@
+# Copyright 2025 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/bazel:build_defs.oss.bzl", "iree_compiler_cc_library")
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+iree_compiler_cc_library(
+    name = "ExternalModels",
+    srcs = [
+        "BufferizationExternalModels.cpp",
+        "Interfaces.cpp",
+    ],
+    hdrs = [
+        "BufferizationExternalModels.h",
+        "Interfaces.h",
+    ],
+    deps = [
+        "//compiler/src/iree/compiler/Codegen/Dialect/CPU/IR:IREECPUDialect",
+        "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
+        "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils",
+        "//compiler/src/iree/compiler/Codegen/Dialect/GPU/IR:IREEGPUDialect",
+        "//compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils:KnownTargets",
+        "//compiler/src/iree/compiler/Codegen/Dialect/PCF/IR",
+        "//compiler/src/iree/compiler/Codegen/Utils",
+        "//compiler/src/iree/compiler/Dialect/Encoding/IR",
+        "//compiler/src/iree/compiler/Dialect/Encoding/Utils",
+        "//compiler/src/iree/compiler/Dialect/TensorExt/IR",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:ArithDialect",
+        "@llvm-project//mlir:BufferizationInterfaces",
+        "@llvm-project//mlir:DialectUtils",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:LinalgDialect",
+        "@llvm-project//mlir:TensorDialect",
+    ],
+)
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BufferizationExternalModels.cpp b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BufferizationExternalModels.cpp
new file mode 100644
index 0000000..9a03faa
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BufferizationExternalModels.cpp
@@ -0,0 +1,326 @@
+// Copyright 2025 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===- BufferizationExternalModels.cpp -----------------------------------===//
+//
+// This file implements bufferization interfaces for PCF ops.
+//
+//===---------------------------------------------------------------------===//
+
+#include "iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BufferizationExternalModels.h"
+
+#include "iree/compiler/Codegen/Dialect/PCF/IR/PCFAttrs.h"
+#include "iree/compiler/Codegen/Dialect/PCF/IR/PCFOps.h"
+#include "iree/compiler/Codegen/Dialect/PCF/IR/PCFTypes.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinTypeInterfaces.h"
+
+namespace mlir::iree_compiler::IREE::PCF {
+
+using namespace mlir::bufferization;
+
+namespace {
+
+struct GenericOpInterface
+    : BufferizableOpInterface::ExternalModel<GenericOpInterface,
+                                             PCF::GenericOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
+                              const AnalysisState &state) const {
+    // Parallel ops can be treated as though they never read.
+    return false;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const AnalysisState &state) const {
+    // Generic ops must always be assumed to write to a tensor (init) operand.
+    return true;
+  }
+
+  AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand,
+                                      const AnalysisState &state) const {
+    auto genericOp = cast<PCF::GenericOp>(op);
+    OpResult tiedResult = genericOp.getTiedResult(opOperand);
+    if (!tiedResult) {
+      return {};
+    }
+
+    return {{tiedResult, BufferRelation::Equivalent,
+             /*isDefinite=*/true}};
+  }
+
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationOptions &options,
+                          BufferizationState &state) const {
+    auto genericOp = cast<PCF::GenericOp>(op);
+    Location loc = genericOp.getLoc();
+
+    SmallVector<Value> newInits;
+    newInits.reserve(genericOp.getInits().size());
+    for (Value init : genericOp.getInits()) {
+      if (isa<RankedTensorType>(init.getType())) {
+        FailureOr<Value> newInit = getBuffer(rewriter, init, options, state);
+        if (failed(newInit)) {
+          return op->emitOpError("failed to get init buffer");
+        }
+        newInits.push_back(*newInit);
+      } else {
+        newInits.push_back(init);
+      }
+    }
+
+    SmallVector<Type> newResultTypes;
+    for (Value result : genericOp.getResults()) {
+      if (isa<TensorType>(result.getType())) {
+        FailureOr<BufferLikeType> resultType =
+            bufferization::getBufferType(result, options, state);
+        if (failed(resultType)) {
+          return failure();
+        }
+        newResultTypes.push_back(*resultType);
+      } else {
+        newResultTypes.push_back(result.getType());
+      }
+    }
+
+    auto newGenericOp = PCF::GenericOp::create(
+        rewriter, loc, newResultTypes, genericOp.getScope(), newInits,
+        genericOp.getDynamicSizes(), genericOp.getIsTied(),
+        genericOp.getNumIterators(), genericOp.getSyncOnReturn());
+    newGenericOp.getRegion().takeBody(genericOp.getRegion());
+    newGenericOp.getInitializer().takeBody(genericOp.getInitializer());
+    replaceOpWithBufferizedValues(rewriter, op, newGenericOp.getResults());
+    return success();
+  }
+
+  FailureOr<BufferLikeType>
+  getBufferType(Operation *op, Value value, const BufferizationOptions &options,
+                const BufferizationState &state,
+                SmallVector<Value> &invocationStack) const {
+    auto genericOp = cast<PCF::GenericOp>(op);
+
+    // Block arguments are `pcf.sref`, so this must always be an opresult.
+    auto result = cast<OpResult>(value);
+    assert(result.getOwner() == op && "invalid value");
+
+    // If the result has a tied init, use that as the buffer type.
+    OpOperand *tiedInit = genericOp.getTiedInit(result.getResultNumber());
+    if (tiedInit) {
+      return bufferization::detail::asMemRefType(bufferization::getBufferType(
+          tiedInit->get(), options, state, invocationStack));
+    }
+
+    auto resultType = cast<RankedTensorType>(result.getType());
+
+    // Else query the scope for the memory space to allocate for.
+    FailureOr<Attribute> memSpace =
+        genericOp.getScope().getAllocMemSpace(op->getContext());
+    if (failed(memSpace)) {
+      return failure();
+    }
+    return cast<BufferLikeType>(
+        getMemRefTypeWithStaticIdentityLayout(resultType, *memSpace));
+  }
+};
+
+struct LoopOpInterface
+    : BufferizableOpInterface::ExternalModel<LoopOpInterface, PCF::LoopOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
+                              const AnalysisState &state) const {
+    // Parallel ops can be treated as though they never read.
+    return false;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const AnalysisState &state) const {
+    // Generic ops must always be assumed to write to a tensor (init) operand.
+    return true;
+  }
+
+  AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand,
+                                      const AnalysisState &state) const {
+    auto loopOp = cast<PCF::LoopOp>(op);
+    OpResult tiedResult = loopOp.getTiedResult(opOperand);
+    if (!tiedResult) {
+      return {};
+    }
+
+    return {{tiedResult, BufferRelation::Equivalent,
+             /*isDefinite=*/true}};
+  }
+
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationOptions &options,
+                          BufferizationState &state) const {
+    auto loopOp = cast<PCF::LoopOp>(op);
+    Location loc = loopOp.getLoc();
+
+    SmallVector<Value> newInits;
+    newInits.reserve(loopOp.getInits().size());
+    for (Value init : loopOp.getInits()) {
+      if (isa<RankedTensorType>(init.getType())) {
+        FailureOr<Value> newInit = getBuffer(rewriter, init, options, state);
+        if (failed(newInit)) {
+          return op->emitOpError("failed to get init buffer");
+        }
+        newInits.push_back(*newInit);
+      } else {
+        newInits.push_back(init);
+      }
+    }
+
+    SmallVector<Type> newResultTypes;
+    for (Value result : loopOp.getResults()) {
+      if (isa<TensorType>(result.getType())) {
+        FailureOr<BufferLikeType> resultType =
+            bufferization::getBufferType(result, options, state);
+        if (failed(resultType)) {
+          return failure();
+        }
+        newResultTypes.push_back(*resultType);
+      } else {
+        newResultTypes.push_back(result.getType());
+      }
+    }
+
+    auto newLoopOp = PCF::LoopOp::create(
+        rewriter, loc, newResultTypes, loopOp.getScope(), loopOp.getCount(),
+        newInits, loopOp.getDynamicSizes(), loopOp.getIsTied(),
+        loopOp.getSyncOnReturn());
+    newLoopOp.getRegion().takeBody(loopOp.getRegion());
+    replaceOpWithBufferizedValues(rewriter, op, newLoopOp.getResults());
+    return success();
+  }
+
+  FailureOr<BufferLikeType>
+  getBufferType(Operation *op, Value value, const BufferizationOptions &options,
+                const BufferizationState &state,
+                SmallVector<Value> &invocationStack) const {
+    auto loopOp = cast<PCF::LoopOp>(op);
+
+    // Block arguments are `pcf.sref`, so this must always be an opresult.
+    auto result = cast<OpResult>(value);
+    assert(result.getOwner() == op && "invalid value");
+
+    // If the result has a tied init, use that as the buffer type.
+    OpOperand *tiedInit = loopOp.getTiedInit(result.getResultNumber());
+    if (tiedInit) {
+      return bufferization::detail::asMemRefType(bufferization::getBufferType(
+          tiedInit->get(), options, state, invocationStack));
+    }
+
+    auto resultType = cast<RankedTensorType>(result.getType());
+
+    // Else query the scope for the memory space to allocate for.
+    FailureOr<Attribute> memSpace =
+        loopOp.getScope().getAllocMemSpace(op->getContext());
+    if (failed(memSpace)) {
+      return failure();
+    }
+    return cast<BufferLikeType>(
+        getMemRefTypeWithStaticIdentityLayout(resultType, *memSpace));
+  }
+};
+
+struct WriteSliceOpInterface
+    : BufferizableOpInterface::ExternalModel<WriteSliceOpInterface,
+                                             PCF::WriteSliceOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
+                              const AnalysisState &state) const {
+    // The only valid tensor operand is the source which is always read.
+    return true;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const AnalysisState &state) const {
+    // The only valid tensor operand is the source which is only read.
+    return false;
+  }
+
+  AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand,
+                                      const AnalysisState &state) const {
+    return {};
+  }
+
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationOptions &options,
+                          BufferizationState &state) const {
+    auto writeOp = cast<PCF::WriteSliceOp>(op);
+
+    if (isa<RankedTensorType>(writeOp.getSourceType())) {
+      FailureOr<Value> newSrc =
+          getBuffer(rewriter, writeOp.getSource(), options, state);
+      if (failed(newSrc)) {
+        return failure();
+      }
+      writeOp.getSourceMutable().assign(*newSrc);
+    }
+    return success();
+  }
+};
+
+struct ReadSliceOpInterface
+    : BufferizableOpInterface::ExternalModel<ReadSliceOpInterface,
+                                             PCF::ReadSliceOp> {
+  static MemRefType
+  getMaximallyDynamicBufferType(MLIRContext *context,
+                                PCF::ShapedRefType sourceType) {
+    // Create result type with maximally dynamic layout and no memory space.
+    // Layout and memory space aren't known until resolving sref types, after
+    // which we will propagate both to this operation's users.
+    SmallVector<int64_t> strides(sourceType.getRank(), ShapedType::kDynamic);
+    auto layout =
+        StridedLayoutAttr::get(context, ShapedType::kDynamic, strides);
+    return MemRefType::get(sourceType.getShape(), sourceType.getElementType(),
+                           layout,
+                           /*memorySpace=*/nullptr);
+  }
+  FailureOr<BaseMemRefType>
+  getBufferType(Operation *op, Value value, const BufferizationOptions &options,
+                const BufferizationState &state,
+                SmallVector<Value> &invocationStack) const {
+    auto readOp = cast<PCF::ReadSliceOp>(op);
+    return getMaximallyDynamicBufferType(op->getContext(),
+                                         readOp.getSourceType());
+  }
+
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationOptions &options,
+                          BufferizationState &state) const {
+    auto readOp = cast<PCF::ReadSliceOp>(op);
+
+    // Skip vector results.
+    if (!isa<RankedTensorType>(readOp.getResultType())) {
+      return success();
+    }
+
+    // Create result type with maximally dynamic layout and no memory space.
+    auto resultType =
+        getMaximallyDynamicBufferType(op->getContext(), readOp.getSourceType());
+
+    // GetMemrefOp lets us get a memref out of a read_slice. Accesses to srefs
+    // are allowed to ignore accesses to this memref.
+    auto getMemrefOp = PCF::GetMemrefOp::create(
+        rewriter, readOp.getLoc(), resultType, readOp.getSource(),
+        readOp.getMixedOffsets(), readOp.getMixedSizes(),
+        readOp.getMixedStrides());
+    replaceOpWithBufferizedValues(rewriter, op, getMemrefOp.getResult());
+    return success();
+  }
+};
+
+} // namespace
+
+void registerBufferizationExternalModels(DialectRegistry &registry) {
+  registry.addExtension(+[](MLIRContext *ctx, PCF::PCFDialect *dialect) {
+    GenericOp::attachInterface<GenericOpInterface>(*ctx);
+    LoopOp::attachInterface<LoopOpInterface>(*ctx);
+    ReadSliceOp::attachInterface<ReadSliceOpInterface>(*ctx);
+    WriteSliceOp::attachInterface<WriteSliceOpInterface>(*ctx);
+  });
+}
+
+} // namespace mlir::iree_compiler::IREE::PCF
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BufferizationExternalModels.h b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BufferizationExternalModels.h
new file mode 100644
index 0000000..ce1814c
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BufferizationExternalModels.h
@@ -0,0 +1,20 @@
+// Copyright 2025 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_COMPILER_CODEGEN_DIALECT_PCF_EXTERNALINTERFACES_BUFFERIZATIONEXTERNALMODELS_H_
+#define IREE_COMPILER_CODEGEN_DIALECT_PCF_EXTERNALINTERFACES_BUFFERIZATIONEXTERNALMODELS_H_
+
+namespace mlir {
+class DialectRegistry;
+} // namespace mlir
+
+namespace mlir::iree_compiler::IREE::PCF {
+
+void registerBufferizationExternalModels(DialectRegistry &registry);
+
+} // namespace mlir::iree_compiler::IREE::PCF
+
+#endif // IREE_COMPILER_CODEGEN_DIALECT_PCF_EXTERNALINTERFACES_BUFFERIZATIONEXTERNALMODELS_H_
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/CMakeLists.txt
new file mode 100644
index 0000000..628593e
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/CMakeLists.txt
@@ -0,0 +1,41 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BUILD.bazel#
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+  NAME
+    ExternalModels
+  HDRS
+    "BufferizationExternalModels.h"
+    "Interfaces.h"
+  SRCS
+    "BufferizationExternalModels.cpp"
+    "Interfaces.cpp"
+  DEPS
+    LLVMSupport
+    MLIRArithDialect
+    MLIRIR
+    MLIRLinalgDialect
+    MLIRTensorDialect
+    iree::compiler::Codegen::Dialect::CPU::IR::IREECPUDialect
+    iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect
+    iree::compiler::Codegen::Dialect::Codegen::Utils
+    iree::compiler::Codegen::Dialect::GPU::IR::IREEGPUDialect
+    iree::compiler::Codegen::Dialect::GPU::TargetUtils::KnownTargets
+    iree::compiler::Codegen::Dialect::PCF::IR
+    iree::compiler::Codegen::Utils
+    iree::compiler::Dialect::Encoding::IR
+    iree::compiler::Dialect::Encoding::Utils
+    iree::compiler::Dialect::TensorExt::IR
+  PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/Interfaces.cpp b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/Interfaces.cpp
new file mode 100644
index 0000000..3e1d535
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/Interfaces.cpp
@@ -0,0 +1,17 @@
+// Copyright 2025 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/Interfaces.h"
+
+#include "iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/BufferizationExternalModels.h"
+
+namespace mlir::iree_compiler {
+
+void registerPCFExternalInterfaces(DialectRegistry &registry) {
+  IREE::PCF::registerBufferizationExternalModels(registry);
+}
+
+} // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/Interfaces.h b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/Interfaces.h
new file mode 100644
index 0000000..bdadc53
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/Interfaces.h
@@ -0,0 +1,19 @@
+// Copyright 2025 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_COMPILER_CODEGEN_DIALECT_PCF_EXTERNALINTERFACES_INTERFACES_H_
+#define IREE_COMPILER_CODEGEN_DIALECT_PCF_EXTERNALINTERFACES_INTERFACES_H_
+
+#include "mlir/IR/Dialect.h"
+
+namespace mlir::iree_compiler {
+
+/// Registers all external interfaces implemented on PCF ops.
+void registerPCFExternalInterfaces(DialectRegistry &registry);
+
+} // namespace mlir::iree_compiler
+
+#endif // IREE_COMPILER_CODEGEN_DIALECT_PCF_EXTERNALINTERFACES_INTERFACES_H_
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/BUILD.bazel
new file mode 100644
index 0000000..63fd7bf
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/BUILD.bazel
@@ -0,0 +1,30 @@
+# Copyright 2025 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Tests for common transforms.
+
+load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob")
+load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
+
+package(
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+iree_lit_test_suite(
+    name = "lit",
+    srcs = enforce_glob(
+        [
+            "bufferize.mlir",
+        ],
+        include = ["*.mlir"],
+    ),
+    cfg = "//compiler:lit.cfg.py",
+    tools = [
+        "//tools:iree-opt",
+        "@llvm-project//llvm:FileCheck",
+    ],
+)
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/CMakeLists.txt
new file mode 100644
index 0000000..238a8ce
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/CMakeLists.txt
@@ -0,0 +1,23 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/BUILD.bazel#
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_lit_test_suite(
+  NAME
+    lit
+  SRCS
+    "bufferize.mlir"
+  TOOLS
+    FileCheck
+    iree-opt
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/bufferize.mlir b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/bufferize.mlir
new file mode 100644
index 0000000..a4bb9f5
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/test/bufferize.mlir
@@ -0,0 +1,200 @@
+// RUN: iree-opt %s --one-shot-bufferize --split-input-file | FileCheck %s
+
+util.func private @bufferize_generic(%d0: index, %d1: index, %d2: index, %d3: index) {
+  %0 = bufferization.alloc_tensor(%d0) : tensor<?xi32>
+  %1 = bufferization.alloc_tensor(%d3) {memory_space = "foo"} : tensor<?xi32>
+  %2:4 = pcf.generic scope(#pcf.test_scope)
+    execute(%ref = %0, %ref_1, %ref_2, %ref_3 = %1)[%id: index, %n: index]
+         : (!pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>)
+        -> (tensor<?xi32>, tensor<?xi32>{%d1}, tensor<?xi32>{%d2}, tensor<?xi32>) {
+    util.optimization_barrier %id, %n, %ref, %ref_1, %ref_2, %ref_3 : index, index, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>
+    pcf.return
+  }
+  util.return
+}
+
+// CHECK-LABEL: @bufferize_generic(
+//  CHECK-SAME:   %[[D0:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D1:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D2:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D3:[A-Za-z0-9]+]]: index
+
+//   CHECK-DAG:   %[[ALLOC:.+]] = memref.alloc(%[[D0]]) {alignment = 64 : i64} : memref<?xi32>
+//   CHECK-DAG:   %[[ALLOC1:.+]] = memref.alloc(%[[D3]]) {alignment = 64 : i64} : memref<?xi32, "foo">
+//       CHECK:   pcf.generic scope(#pcf.test_scope)
+//  CHECK-NEXT:     execute(%[[REF:[A-Za-z0-9_]+]] = %[[ALLOC]],
+//  CHECK-SAME:             %[[REF1:[A-Za-z0-9_]+]],
+//  CHECK-SAME:             %[[REF2:[A-Za-z0-9_]+]],
+//  CHECK-SAME:             %[[REF3:[A-Za-z0-9_]+]] = %[[ALLOC1]]
+//  CHECK-SAME:             [%{{.*}}: index, %{{.*}}: index]
+//  CHECK-NEXT:          : (!pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>)
+//  CHECK-NEXT:         -> (memref<?xi32>, memref<?xi32>{%[[D1]]}, memref<?xi32>{%[[D2]]}, memref<?xi32, "foo">) {
+//       CHECK:       pcf.return
+//  CHECK-NEXT:     }
+
+// -----
+
+util.func private @replay_bufferize_generic(%0: memref<?xi32>, %1: memref<?xi32>, %d0: index, %d1: index) {
+  %2:4 = pcf.generic scope(#pcf.test_scope)
+    execute(%ref = %0, %ref_1, %ref_2, %ref_3 = %1)[%id: index, %n: index]
+         : (!pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>)
+        -> (memref<?xi32>, memref<?xi32>{%d0}, memref<?xi32>{%d1}, memref<?xi32>) {
+    util.optimization_barrier %id, %n, %ref, %ref_1, %ref_2, %ref_3 : index, index, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>
+    pcf.return
+  }
+  util.optimization_barrier %2#0, %2#1, %2#2, %2#3 : memref<?xi32>, memref<?xi32>, memref<?xi32>, memref<?xi32>
+  util.return
+}
+
+// Verify that replaying bufferization works.
+// CHECK-LABEL: @replay_bufferize_generic(
+//       CHECK:   pcf.generic scope(#pcf.test_scope)
+//       CHECK:            -> (memref<?xi32>, memref<?xi32>{%{{.*}}}, memref<?xi32>{%{{.*}}}, memref<?xi32>) {
+
+// -----
+
+util.func private @bufferize_generic_mixed(%d0: index, %d1: index, %d2: index, %1: memref<?xi32, "foo">) {
+  %0 = bufferization.alloc_tensor(%d0) : tensor<?xi32>
+  %2:4 = pcf.generic scope(#pcf.test_scope)
+    execute(%ref = %0, %ref_1, %ref_2, %ref_3 = %1)[%id: index, %n: index]
+         : (!pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>)
+        -> (tensor<?xi32>, memref<?xi32>{%d1}, tensor<?xi32>{%d2}, memref<?xi32, "foo">) {
+    util.optimization_barrier %id, %n, %ref, %ref_1, %ref_2, %ref_3 : index, index, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>
+    pcf.return
+  }
+  util.return
+}
+
+// CHECK-LABEL: @bufferize_generic_mixed(
+//  CHECK-SAME:   %[[D0:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D1:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D2:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[INIT1:[A-Za-z0-9]+]]: memref<?xi32, "foo">
+
+//       CHECK:   %[[ALLOC:.+]] = memref.alloc(%[[D0]]) {alignment = 64 : i64} : memref<?xi32>
+//       CHECK:   pcf.generic scope(#pcf.test_scope)
+//  CHECK-NEXT:     execute(%[[REF:[A-Za-z0-9_]+]] = %[[ALLOC]],
+//  CHECK-SAME:             %[[REF1:[A-Za-z0-9_]+]],
+//  CHECK-SAME:             %[[REF2:[A-Za-z0-9_]+]],
+//  CHECK-SAME:             %[[REF3:[A-Za-z0-9_]+]] = %[[INIT1]]
+//  CHECK-SAME:             [%{{.*}}: index, %{{.*}}: index]
+//  CHECK-NEXT:          : (!pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>)
+//  CHECK-NEXT:         -> (memref<?xi32>, memref<?xi32>{%[[D1]]}, memref<?xi32>{%[[D2]]}, memref<?xi32, "foo">) {
+//       CHECK:       pcf.return
+//  CHECK-NEXT:     }
+
+// -----
+
+util.func private @bufferize_loop(%d0: index, %d1: index, %d2: index, %d3: index, %n: index) {
+  %0 = bufferization.alloc_tensor(%d0) : tensor<?xi32>
+  %1 = bufferization.alloc_tensor(%d3) {memory_space = "foo"} : tensor<?xi32>
+  %2:4 = pcf.loop scope(#pcf.test_scope) count(%n)
+    execute(%ref = %0, %ref_1, %ref_2, %ref_3 = %1)[%id: index]
+         : (!pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>)
+        -> (tensor<?xi32>, tensor<?xi32>{%d1}, tensor<?xi32>{%d2}, tensor<?xi32>) {
+    util.optimization_barrier %id, %ref, %ref_1, %ref_2, %ref_3 : index, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>
+    pcf.return
+  }
+  util.return
+}
+
+// CHECK-LABEL: @bufferize_loop(
+//  CHECK-SAME:   %[[D0:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D1:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D2:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D3:[A-Za-z0-9]+]]: index
+
+//   CHECK-DAG:   %[[ALLOC:.+]] = memref.alloc(%[[D0]]) {alignment = 64 : i64} : memref<?xi32>
+//   CHECK-DAG:   %[[ALLOC1:.+]] = memref.alloc(%[[D3]]) {alignment = 64 : i64} : memref<?xi32, "foo">
+//       CHECK:   pcf.loop scope(#pcf.test_scope) count
+//  CHECK-NEXT:     execute(%[[REF:[A-Za-z0-9_]+]] = %[[ALLOC]],
+//  CHECK-SAME:             %[[REF1:[A-Za-z0-9_]+]],
+//  CHECK-SAME:             %[[REF2:[A-Za-z0-9_]+]],
+//  CHECK-SAME:             %[[REF3:[A-Za-z0-9_]+]] = %[[ALLOC1]]
+//  CHECK-SAME:             [%{{.*}}: index]
+//  CHECK-NEXT:          : (!pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>)
+//  CHECK-NEXT:         -> (memref<?xi32>, memref<?xi32>{%[[D1]]}, memref<?xi32>{%[[D2]]}, memref<?xi32, "foo">) {
+//       CHECK:       pcf.return
+//  CHECK-NEXT:     }
+
+// -----
+
+util.func private @bufferize_loop_mixed(%d0: index, %d1: index, %d2: index, %1: memref<?xi32, "foo">, %n: index) {
+  %0 = bufferization.alloc_tensor(%d0) : tensor<?xi32>
+  %2:4 = pcf.loop sync true scope(#pcf.test_scope) count(%n)
+    execute(%ref = %0, %ref_1, %ref_2, %ref_3 = %1)[%id: index]
+         : (!pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>)
+        -> (tensor<?xi32>, memref<?xi32>{%d1}, tensor<?xi32>{%d2}, memref<?xi32, "foo">) {
+    util.optimization_barrier %id, %ref, %ref_1, %ref_2, %ref_3 : index, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>, !pcf.sref<?xi32, #pcf.test_scope>
+    pcf.return
+  }
+  util.return
+}
+
+// CHECK-LABEL: @bufferize_loop_mixed(
+//  CHECK-SAME:   %[[D0:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D1:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[D2:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[INIT1:[A-Za-z0-9]+]]: memref<?xi32, "foo">
+
+//       CHECK:   %[[ALLOC:.+]] = memref.alloc(%[[D0]]) {alignment = 64 : i64} : memref<?xi32>
+//       CHECK:   pcf.loop sync true scope(#pcf.test_scope) count
+//  CHECK-NEXT:     execute(%[[REF:[A-Za-z0-9_]+]] = %[[ALLOC]],
+//  CHECK-SAME:             %[[REF1:[A-Za-z0-9_]+]],
+//  CHECK-SAME:             %[[REF2:[A-Za-z0-9_]+]],
+//  CHECK-SAME:             %[[REF3:[A-Za-z0-9_]+]] = %[[INIT1]]
+//  CHECK-SAME:             [%{{.*}}: index]
+//  CHECK-NEXT:          : (!pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>,
+//  CHECK-SAME:             !pcf.sref<?xi32, #pcf.test_scope>)
+//  CHECK-NEXT:         -> (memref<?xi32>, memref<?xi32>{%[[D1]]}, memref<?xi32>{%[[D2]]}, memref<?xi32, "foo">) {
+//       CHECK:       pcf.return
+//  CHECK-NEXT:     }
+
+// -----
+
+util.func private @write_tensor(%dst: !pcf.sref<?xi32, #pcf.test_scope>) {
+  %src = bufferization.alloc_tensor() : tensor<2xi32>
+  pcf.write_slice %src into %dst[1] [2] [1] : tensor<2xi32> into !pcf.sref<?xi32, #pcf.test_scope>
+  util.return
+}
+
+// CHECK-LABEL: @write_tensor
+//  CHECK-SAME:   %[[DST:[A-Za-z0-9]+]]: !pcf.sref<?xi32, #pcf.test_scope>
+//       CHECK:   %[[SRC:.+]] = memref.alloc() {alignment = 64 : i64} : memref<2xi32>
+//  CHECK-NEXT:   pcf.write_slice %[[SRC]] into %[[DST]][1] [2] [1] : memref<2xi32> into !pcf.sref<?xi32, #pcf.test_scope>
+
+// -----
+
+util.func private @replay_write_tensor_bufferize(%src: memref<2xi32>, %dst: !pcf.sref<?xi32, #pcf.test_scope>) {
+  pcf.write_slice %src into %dst[1] [2] [1] : memref<2xi32> into !pcf.sref<?xi32, #pcf.test_scope>
+  util.return
+}
+
+// CHECK-LABEL: @replay_write_tensor_bufferize
+//  CHECK-NEXT:   pcf.write_slice %{{.*}} into %{{.*}}[1] [2] [1] : memref<2xi32> into !pcf.sref<?xi32, #pcf.test_scope>
+
+// -----
+
+util.func private @read_tensor(%src: !pcf.sref<?x?xi32, #pcf.test_scope>, %s0: index, %s1: index) -> tensor<?x?xi32> {
+  %result = pcf.read_slice %src[0, 1] [%s0, %s1] [1, 1] : !pcf.sref<?x?xi32, #pcf.test_scope> to tensor<?x?xi32>
+  util.return %result : tensor<?x?xi32>
+}
+
+// CHECK-LABEL: @read_tensor
+//  CHECK-SAME:   %[[SRC:[A-Za-z0-9]+]]: !pcf.sref<?x?xi32, #pcf.test_scope>
+//  CHECK-SAME:   %[[S0:[A-Za-z0-9]+]]: index
+//  CHECK-SAME:   %[[S1:[A-Za-z0-9]+]]: index
+//  CHECK-NEXT:   %[[MEMREF:.+]] = pcf.get_memref %[[SRC]][0, 1] [%[[S0]], %[[S1]]] [1, 1] : !pcf.sref<?x?xi32, #pcf.test_scope> to memref<?x?xi32, strided<[?, ?], offset: ?>>
+//  CHECK-NEXT:   %[[RESULT:.+]] = bufferization.to_tensor %[[MEMREF]] : memref<?x?xi32, strided<[?, ?], offset: ?>> to tensor<?x?xi32>
+//  CHECK-NEXT:   util.return %[[RESULT]] : tensor<?x?xi32>
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Interfaces/BUILD.bazel
index 8a83a45..1501c78 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/BUILD.bazel
@@ -49,6 +49,7 @@
         ":ProcessorOpInterfaces",
         ":UKernelOpInterface",
         ":TensorMaskingOpInterface",
+        "//compiler/src/iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces:ExternalModels",
         "//compiler/src/iree/compiler/Codegen/ExternalInterfaces:ExternalModels",
         "//compiler/src/iree/compiler/Dialect/LinalgExt/TransformExtensions:LinalgExtExtensions",
         "//llvm-external-projects/iree-dialects:IREELinalgTransformDialect",
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Interfaces/CMakeLists.txt
index 5589757..6170487 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/CMakeLists.txt
@@ -48,6 +48,7 @@
     MLIRVectorTransforms
     iree::compiler::Codegen::Common::TransformExtensions::CommonExtensions
     iree::compiler::Codegen::Dialect::GPU::TransformExtensions::IREEGPUExtensions
+    iree::compiler::Codegen::Dialect::PCF::ExternalInterfaces::ExternalModels
     iree::compiler::Codegen::ExternalInterfaces::ExternalModels
     iree::compiler::Codegen::LLVMCPU::TransformExtensions::LLVMCPUExtensions
     iree::compiler::Codegen::LLVMGPU::TransformExtensions::LLVMGPUExtensions
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp
index 00fcf89..fe51909 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp
@@ -7,6 +7,7 @@
 #include "iree/compiler/Codegen/Interfaces/Interfaces.h"
 
 #include "iree/compiler/Codegen/Dialect/GPU/TransformExtensions/IREEGPUExtensions.h"
+#include "iree/compiler/Codegen/Dialect/PCF/ExternalInterfaces/Interfaces.h"
 #include "iree/compiler/Codegen/ExternalInterfaces/Interfaces.h"
 #include "iree/compiler/Codegen/Interfaces/BufferizationInterfaces.h"
 #include "iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.h"
@@ -47,6 +48,7 @@
 void registerCodegenInterfaces(DialectRegistry &registry) {
   registerProcessorOpInterfaceExternalModels(registry);
   registerCodegenExternalInterfaces(registry);
+  registerPCFExternalInterfaces(registry);
   registerBufferizationInterfaces(registry);
   registerTensorMaskingOpInterface(registry);
   // TODO: Remove this dependency once the transform dialect extensions