[experimental][regression] Add ROCM Regression test. (#14861)

diff --git a/.github/workflows/pkgci.yml b/.github/workflows/pkgci.yml
index 9dbb325..8f2da75 100644
--- a/.github/workflows/pkgci.yml
+++ b/.github/workflows/pkgci.yml
@@ -35,7 +35,12 @@
     uses: ./.github/workflows/pkgci_regression_test_cpu.yml
     needs: [build_packages]
 
-  regression_test_amdgpu:
-    name: Regression Test AMDGPU
-    uses: ./.github/workflows/pkgci_regression_test_amdgpu.yml
+  regression_test_amdgpu_vulkan:
+    name: Regression Test AMDGPU-Vulkan
+    uses: ./.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml
+    needs: [build_packages]
+
+  regression_test_amdgpu_rocm:
+    name: Regression Test AMDGPU-ROCm
+    uses: ./.github/workflows/pkgci_regression_test_amdgpu_rocm.yml
     needs: [build_packages]
diff --git a/.github/workflows/pkgci_regression_test_amdgpu_rocm.yml b/.github/workflows/pkgci_regression_test_amdgpu_rocm.yml
new file mode 100644
index 0000000..0554a16
--- /dev/null
+++ b/.github/workflows/pkgci_regression_test_amdgpu_rocm.yml
@@ -0,0 +1,52 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+name: PkgCI Regression Test (AMDGPU)
+on:
+  workflow_call:
+    inputs:
+      artifact_run_id:
+        type: string
+        default: ""
+  workflow_dispatch:
+    inputs:
+      artifact_run_id:
+        type: string
+        default: ""
+
+jobs:
+  linux_x86_64:
+    name: Linux (x86_64)
+    runs-on: nodai-amdgpu-w7900-x86-64
+    env:
+      PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
+      IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts
+      VENV_DIR: ${{ github.workspace }}/venv
+    steps:
+      - name: Checking out repository
+        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+        with:
+          submodules: false
+      - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1  # v4.7.0
+        with:
+          # Must match the subset of versions built in pkgci_build_packages.
+          python-version: '3.11'
+      - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a  # v3.0.2
+        with:
+          name: linux_x86_64_release_packages
+          path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
+      - name: Setup venv
+        run: |
+          ./build_tools/pkgci/setup_venv.py $VENV_DIR \
+            --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
+            --fetch-gh-workflow=${{ inputs.artifact_run_id }}
+      - name: Run Tests
+        run: |
+          source $VENV_DIR/bin/activate
+          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/hip/lib
+          pytest \
+            -s -m "plat_rdna3_rocm and presubmit" \
+            experimental/regression_suite
diff --git a/.github/workflows/pkgci_regression_test_amdgpu.yml b/.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml
similarity index 100%
rename from .github/workflows/pkgci_regression_test_amdgpu.yml
rename to .github/workflows/pkgci_regression_test_amdgpu_vulkan.yml
diff --git a/.gitignore b/.gitignore
index c555e39..c8c0b25 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,6 +68,7 @@
 
 # Temporary files
 iree/builtins/**/bin/*.ll
+artifacts/
 
 # Archive files
 *.tar
diff --git a/build_tools/pkgci/build_linux_packages.sh b/build_tools/pkgci/build_linux_packages.sh
index 30f89f5..f97af7e 100755
--- a/build_tools/pkgci/build_linux_packages.sh
+++ b/build_tools/pkgci/build_linux_packages.sh
@@ -188,10 +188,12 @@
 function build_iree_runtime() {
   # We install the needed build deps below for the tools.
   IREE_RUNTIME_BUILD_TRACY=ON IREE_RUNTIME_BUILD_TRACY_TOOLS=ON \
+  IREE_EXTERNAL_HAL_DRIVERS="rocm" \
   build_wheel runtime/
 }
 
 function build_iree_compiler() {
+  IREE_TARGET_BACKEND_ROCM=ON IREE_ENABLE_LLD=ON \
   build_wheel compiler/
 }
 
diff --git a/compiler/setup.py b/compiler/setup.py
index 87afbc5..01a347d 100644
--- a/compiler/setup.py
+++ b/compiler/setup.py
@@ -260,6 +260,8 @@
             "-DCMAKE_BUILD_TYPE={}".format(cfg),
             # TODO(scotttodd): include IREE_TARGET_BACKEND_WEBGPU here (and in env)
             get_env_cmake_option("IREE_ENABLE_CPUINFO", "ON"),
+            get_env_cmake_option("IREE_TARGET_BACKEND_ROCM", "ON"),
+            get_env_cmake_option("IREE_ENABLE_LLD", "OFF"),
         ]
         cmake_args.extend(get_cmake_version_info_args())
 
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTargetUtils.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTargetUtils.cpp
index da6814d..eaca113 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTargetUtils.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTargetUtils.cpp
@@ -128,7 +128,7 @@
 //===========Link LLVM Module to ROCDL End===================/
 
 //=====================Create HSACO Begin=============//
-// Link object file using ld.lld lnker to generate code object
+// Link object file using lld lnker to generate code object
 // Inspiration from this section comes from LLVM-PROJECT-MLIR by
 // ROCmSoftwarePlatform
 // https://github.com/ROCmSoftwarePlatform/llvm-project-mlir/blob/miopen-dialect/mlir/lib/ExecutionEngine/ROCm/BackendUtils.cpp
diff --git a/experimental/regression_suite/pyproject.toml b/experimental/regression_suite/pyproject.toml
index 6b53197..15c207c 100644
--- a/experimental/regression_suite/pyproject.toml
+++ b/experimental/regression_suite/pyproject.toml
@@ -7,6 +7,7 @@
     "plat_host_cpu: mark tests as running on the host CPU",
     "plat_rdna3_vulkan: mark tests as running on AMD RDNA3 Vulkan device",
     "plat_nvidia_a100: mark tests as running on NVIDIA A100 device",
+    "plat_rdna3_rocm: mark tests as running on AMD RDNA3 ROCm device",
     "presubmit: mark test as running on presubmit",
     "postsubmit: mark test as running on postsubmit",
     "unstable_linalg: mark test as depending on unstable, serialized linalg IR",
diff --git a/experimental/regression_suite/tests/pregenerated/test_llama2.py b/experimental/regression_suite/tests/pregenerated/test_llama2.py
index fff7bd5..a685829 100644
--- a/experimental/regression_suite/tests/pregenerated/test_llama2.py
+++ b/experimental/regression_suite/tests/pregenerated/test_llama2.py
@@ -95,6 +95,20 @@
 
 
 @pytest.fixture
+def llama2_7b_f16qi4_stripped_rdna3_rocm_vmfb(llama2_7b_f16qi4_stripped_source):
+    return iree_compile(
+        llama2_7b_f16qi4_stripped_source,
+        "rdna3_rocm",
+        flags=COMMON_FLAGS
+        + [
+            "--iree-hal-target-backends=rocm",
+            "--iree-rocm-target-chip=gfx1100",
+            "--iree-rocm-link-bc=true",
+        ],
+    )
+
+
+@pytest.fixture
 def llama2_7b_f16qi4_sm80_cuda_vmfb(llama2_7b_f16qi4_source):
     return iree_compile(
         llama2_7b_f16qi4_source,
@@ -204,6 +218,29 @@
     )
 
 
+@pytest.mark.presubmit
+@pytest.mark.unstable_linalg
+@pytest.mark.plat_rdna3_rocm
+def test_step_rdna3_rocm_stripped(llama2_7b_f16qi4_stripped_rdna3_rocm_vmfb):
+    iree_benchmark_module(
+        llama2_7b_f16qi4_stripped_rdna3_rocm_vmfb,
+        device="rocm",
+        function="first_vicuna_forward",
+        args=[
+            "--input=1x1xi64",
+        ],
+    )
+    iree_benchmark_module(
+        llama2_7b_f16qi4_stripped_rdna3_rocm_vmfb,
+        device="rocm",
+        function="second_vicuna_forward",
+        args=[
+            "--input=1x1xi64",
+        ]
+        + (["--input=1x32x1x128xf16"] * 64),
+    )
+
+
 ###############################################################################
 # Correctness
 ###############################################################################