Initial Adding ROCM HAL Backend to Experimental (#5943)

Initial pass to integrate ROCm in to IREE so that we can Codegen and run on AMDGPUs. Following steps similar to thomasraoux's CUDA backend. Since ROCm do not have graph or CommandBuffer by default, we implement ROCm's command buffer using stream API to default stream. Tested out and pass most CTS tests except:

semaphore_submission_test + semaphore_test-> some functionalities not implemented for rocm backend yet
command_buffer_test -> CommandBufferTest.CopySubBuffer

In the next patch:
-Complete semaphore functionality
-Squash CommandBuffer bugs
diff --git a/experimental/rocm/native_executable.h b/experimental/rocm/native_executable.h
new file mode 100644
index 0000000..d1ff352
--- /dev/null
+++ b/experimental/rocm/native_executable.h
@@ -0,0 +1,45 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef IREE_HAL_ROCM_NATIVE_EXECUTABLE_H_
+#define IREE_HAL_ROCM_NATIVE_EXECUTABLE_H_
+
+#include "experimental/rocm/context_wrapper.h"
+#include "experimental/rocm/rocm_headers.h"
+#include "iree/hal/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// Creates an executable from a HSACO module. The module may contain several
+// kernels that can be extracted along with the associated block size.
+iree_status_t iree_hal_rocm_native_executable_create(
+    iree_hal_rocm_context_wrapper_t *context,
+    const iree_hal_executable_spec_t *executable_spec,
+    iree_hal_executable_t **out_executable);
+
+hipFunction_t iree_hal_rocm_native_executable_for_entry_point(
+    iree_hal_executable_t *executable, int32_t entry_point);
+
+// Return the block size of the given |entry_point| within the executable.
+iree_status_t iree_hal_rocm_native_executable_block_size(
+    iree_hal_executable_t *executable, int32_t entry_point, uint32_t *x,
+    uint32_t *y, uint32_t *z);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // IREE_HAL_ROCM_NATIVE_EXECUTABLE_H_